In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from binance_data_loader import BinanceDataLoader
from sklearn.linear_model import LinearRegression
from tqdm import tqdm

In [None]:
data_loader = BinanceDataLoader(
    data_directory=r"C:\Users\USER\Documents\Binance_related\dailytickerdata2020",
    min_records=30,
    min_volume=1e5,
    start_date="2022-01-01",
    end_date=None
)

In [None]:
# Get price data
df = data_loader.get_price_matrix()
print(f"Price data shape: {df.shape}")
print(f"Date range: {df.index.min()} to {df.index.max()}")
df.head()

In [None]:
# Calculate daily returns
rets = df.pct_change().fillna(0)
print(f"Returns data shape: {rets.shape}")
rets.head()

In [None]:
# Volatility-Return Relationship Analysis
# Select top volume assets for analysis (to avoid too many lines)
volume_data = {}
for ticker in data_loader.get_universe():
    ticker_data = data_loader._crypto_universe[ticker]['data']
    avg_volume = ticker_data['volume'].mean()
    volume_data[ticker] = avg_volume

# Get top 10 highest volume assets
top_volume_assets = sorted(volume_data.items(), key=lambda x: x[1], reverse=True)[:10]
selected_assets = [asset[0] for asset in top_volume_assets]
print(f"Analyzing top 10 volume assets: {selected_assets}")

# Filter dataframe to selected assets
df_selected = df[selected_assets]
rets_selected = rets[selected_assets]

In [None]:
# Plot intercepts: Lag Return Vol vs Next Day Return
plt.figure(figsize=(12,8))

print("Calculating volatility-return relationships...")
for column in tqdm(df_selected.columns):
    intercepts = []
    slopes = []

    for lag in range(2, 365):
        try:
            # Calculate rolling volatility
            vol = rets_selected[1:][column].rolling(window=lag).std().iloc[lag:-1]
            
            # Calculate next day returns (forward-looking)
            next_day_rets = df_selected[column].pct_change(-1)[lag+1:-1]
            
            # Ensure same length and remove NaN values
            min_len = min(len(vol), len(next_day_rets))
            vol_clean = vol.iloc[:min_len].dropna()
            ret_clean = next_day_rets.iloc[:min_len].dropna()
            
            # Align indices
            common_idx = vol_clean.index.intersection(ret_clean.index)
            if len(common_idx) < 30:  # Need minimum observations
                intercepts.append(np.nan)
                slopes.append(np.nan)
                continue
                
            vol_aligned = vol_clean[common_idx]
            ret_aligned = ret_clean[common_idx]
            
            # Linear regression
            reg = LinearRegression().fit(np.array(vol_aligned).reshape(-1,1), np.array(ret_aligned))
            
            intercepts.append(reg.intercept_)
            slopes.append(reg.coef_[0])
            
        except Exception as e:
            intercepts.append(np.nan)
            slopes.append(np.nan)

    # Plot intercepts
    plt.plot(range(2, 365), intercepts, label=column, alpha=0.7)

plt.axhline(0, color='r', linestyle='--', alpha=0.8)
plt.title("Volatility-Return Relationship: Intercepts Across Different Lag Windows")
plt.xlabel("Volatility Calculation Window (days)")
plt.ylabel("Regression Intercept")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Plot slopes: Volatility coefficient in predicting next-day returns
plt.figure(figsize=(12,8))

print("Plotting volatility coefficients...")
for column in tqdm(df_selected.columns):
    intercepts = []
    slopes = []

    for lag in range(2, 365):
        try:
            # Calculate rolling volatility
            vol = rets_selected[1:][column].rolling(window=lag).std().iloc[lag:-1]
            
            # Calculate next day returns (forward-looking)
            next_day_rets = df_selected[column].pct_change(-1)[lag+1:-1]
            
            # Ensure same length and remove NaN values
            min_len = min(len(vol), len(next_day_rets))
            vol_clean = vol.iloc[:min_len].dropna()
            ret_clean = next_day_rets.iloc[:min_len].dropna()
            
            # Align indices
            common_idx = vol_clean.index.intersection(ret_clean.index)
            if len(common_idx) < 30:  # Need minimum observations
                intercepts.append(np.nan)
                slopes.append(np.nan)
                continue
                
            vol_aligned = vol_clean[common_idx]
            ret_aligned = ret_clean[common_idx]
            
            # Linear regression
            reg = LinearRegression().fit(np.array(vol_aligned).reshape(-1,1), np.array(ret_aligned))
            
            intercepts.append(reg.intercept_)
            slopes.append(reg.coef_[0])
            
        except Exception as e:
            intercepts.append(np.nan)
            slopes.append(np.nan)

    # Plot slopes
    plt.plot(range(2, 365), slopes, label=column, alpha=0.7)

plt.axhline(0, color='r', linestyle='--', alpha=0.8)
plt.title("Volatility-Return Relationship: Slopes Across Different Lag Windows")
plt.xlabel("Volatility Calculation Window (days)")
plt.ylabel("Volatility Coefficient (Slope)")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Summary statistics for specific lag windows
key_lags = [7, 14, 30, 60, 90, 180]  # Weekly, bi-weekly, monthly, etc.

summary_results = []

for lag in key_lags:
    lag_results = {'lag': lag}
    
    for column in df_selected.columns[:5]:  # Top 5 assets only
        try:
            vol = rets_selected[1:][column].rolling(window=lag).std().iloc[lag:-1]
            next_day_rets = df_selected[column].pct_change(-1)[lag+1:-1]
            
            min_len = min(len(vol), len(next_day_rets))
            vol_clean = vol.iloc[:min_len].dropna()
            ret_clean = next_day_rets.iloc[:min_len].dropna()
            
            common_idx = vol_clean.index.intersection(ret_clean.index)
            if len(common_idx) < 30:
                continue
                
            vol_aligned = vol_clean[common_idx]
            ret_aligned = ret_clean[common_idx]
            
            reg = LinearRegression().fit(np.array(vol_aligned).reshape(-1,1), np.array(ret_aligned))
            
            # Calculate R-squared
            r_squared = reg.score(np.array(vol_aligned).reshape(-1,1), np.array(ret_aligned))
            
            lag_results[f'{column}_intercept'] = reg.intercept_
            lag_results[f'{column}_slope'] = reg.coef_[0]
            lag_results[f'{column}_r2'] = r_squared
            
        except Exception as e:
            continue
    
    summary_results.append(lag_results)

summary_df = pd.DataFrame(summary_results)
print("\nSummary of Volatility-Return Relationships at Key Lag Windows:")
print(summary_df.round(4))