In [None]:
# Install the necessary packages
!pip install -r ../requirements.txt

In [None]:
start = "2024-01-01"
end = "2024-03-01"
interval="1h"

In [None]:
from src.data_services.data_pipeline import load_data

# 1. Load data
tickers = ["ADAUSDT","AVAXUSDT","BTCUSDT","DOGEUSDT","DOTUSDT","ETHUSDT","LINKUSDT","LTCUSDT","SOLUSDT","XRPUSDT"]
df = load_data(
    tickers=tickers,
    start=start,
    end=end,
    interval=interval,
)
print(df.head())

In [None]:
# 2. Calculate Pearson's correlation matrix

corr_matrix = df.corr(method='pearson')
# print(corr_matrix)

import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, fmt=".2f",
            linewidths=0.5, square=True, cbar_kws={"shrink": 0.8})
plt.title("Macierz korelacji Pearsona (prices)")
plt.tight_layout()
plt.show()

In [None]:
# Log-returns correlation
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

log_returns = np.log(df / df.shift(1)).dropna()

log_returns_corr_matrix = log_returns.corr(method='pearson')
# print(log_returns_corr_matrix)

plt.figure(figsize=(8, 6))
sns.heatmap(log_returns_corr_matrix, annot=True, cmap='coolwarm', center=0, fmt=".2f",
            linewidths=0.5, square=True, cbar_kws={"shrink": 0.8})
plt.title("Pearson's correlation matrix (log-returns)")
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd

tickers = df.columns
corr_results = []

for i in range(len(tickers)):
    for j in range(i + 1, len(tickers)):
        a, b = tickers[i], tickers[j]
        corr_price = df[a].corr(df[b], method='pearson')
        corr_log_returns = log_returns[a].corr(log_returns[b])
        corr_results.append({
            'pair': f'{a}-{b}',
            'corr_price': corr_price,
            'corr_log_returns': corr_log_returns
        })

corr_df = pd.DataFrame(corr_results).sort_values('corr_price', ascending=False).reset_index(drop=True)

print(corr_df)

In [None]:
from itertools import combinations

tickers = ["ADAUSDT","AVAXUSDT","BTCUSDT","DOGEUSDT","DOTUSDT","ETHUSDT","LINKUSDT","LTCUSDT","SOLUSDT","XRPUSDT"]
pairs = list(combinations(tickers, 2))
print(len(pairs))
# print(pairs)

In [None]:
# 3. Perform Engle-Granger cointegration test

import pandas as pd
from statsmodels.tsa.stattools import coint

results = []
for x, y in pairs:
    score, p_value, _ = coint(df[x], df[y])
    results.append({'pair': f'{x}-{y}', 'score': score, 'p_value': p_value})

results_df = pd.DataFrame(results).sort_values('p_value', ascending=True).reset_index(drop=True)
print(results_df)

In [None]:
# 4. Merge correlation and cointegration dataframes

merged_df = results_df.merge(corr_df, on='pair', how='inner')
print(merged_df.sort_values('p_value', ascending=True))