In [None]:
# Multi-Timeframe Crypto Futures Analysis
# This notebook compares the behavior of funding rates and futures premium across different timeframes

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
from scipy import stats
from statsmodels.tsa.stattools import adfuller, grangercausalitytests
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 12

# Load data from both timeframes
df_daily = pd.read_csv('../binance_data_pipeline/data/markets/BTCUSDT_1d.csv')
df_8h = pd.read_csv('../binance_data_pipeline/data/markets/BTCUSDT_8h.csv')

# Convert timestamp to datetime
df_daily['Timestamp'] = pd.to_datetime(df_daily['Timestamp'])
df_8h['Timestamp'] = pd.to_datetime(df_8h['Timestamp'])

# Filter data to relevant time period
df_daily = df_daily[df_daily["Timestamp"] >= "2023-06-25"]
df_8h = df_8h[df_8h["Timestamp"] >= "2023-06-25"]

# Function to prepare data with consistent calculations
def prepare_data(df, timeframe_name):
    # Make a copy to avoid modifying the original
    df_prep = df.copy()
    
    # Set binance base rate
    binance_rate = 0.0001 * 3 * 365 * 100  # 0.01% daily rate annualized and converted to percentage
    
    # Rename columns for clarity
    df_prep.rename(columns={
        "prompt_days_till_expiry": "prompt_dte", 
        "next_days_till_expiry": "next_dte"
    }, inplace=True)
    
    # Calculate basis and APR for prompt contract
    df_prep["prompt_basis"] = df_prep["prompt_close"] - df_prep["spot_close"]
    df_prep["prompt_basis_pct"] = (df_prep["prompt_basis"] / df_prep["spot_close"]) * 100
    df_prep["prompt_apr"] = (df_prep["prompt_basis"] / df_prep["spot_close"]) * (365 / df_prep["prompt_dte"]) * 100
    df_prep["prompt_apr_adjusted"] = df_prep["prompt_apr"] - binance_rate
    
    # Make funding rate consistent with APR units
    df_prep["funding_annualized"] *= 100
    df_prep["funding_annualized_adjusted"] = df_prep["funding_annualized"] - binance_rate
    
    # Calculate spread between funding and futures premium
    df_prep["funding_prompt_spread"] = df_prep["funding_annualized_adjusted"] - df_prep["prompt_apr_adjusted"]
    
    # Calculate Z-score of spread (30-period rolling window)
    window = 30 if timeframe_name == 'daily' else 90  # Equivalent to 30 days
    df_prep["funding_prompt_spread_zscore"] = (
        (df_prep["funding_prompt_spread"] - df_prep["funding_prompt_spread"].rolling(window=window).mean()) / 
        df_prep["funding_prompt_spread"].rolling(window=window).std()
    )
    
    # Calculate returns and volatility
    df_prep['spot_returns'] = df_prep['spot_close'].pct_change() * 100
    df_prep['perp_returns'] = df_prep['perp_close'].pct_change() * 100
    df_prep['prompt_returns'] = df_prep['prompt_close'].pct_change() * 100
    
    volatility_window = 20 if timeframe_name == 'daily' else 60  # Equivalent to 20 days
    df_prep['spot_volatility'] = df_prep['spot_returns'].rolling(window=volatility_window).std()
    df_prep['perp_volatility'] = df_prep['perp_returns'].rolling(window=volatility_window).std()
    df_prep['prompt_volatility'] = df_prep['prompt_returns'].rolling(window=volatility_window).std()
    
    # Calculate next contract metrics if available
    df_next = df_prep.dropna(subset=['next_close']).copy()
    if len(df_next) > 0:
        df_next["next_basis"] = df_next["next_close"] - df_next["spot_close"]
        df_next["next_basis_pct"] = (df_next["next_basis"] / df_next["spot_close"]) * 100
        df_next["next_apr"] = (df_next["next_basis"] / df_next["spot_close"]) * (365 / df_next["next_dte"]) * 100
        df_next["next_apr_adjusted"] = df_next["next_apr"] - binance_rate
        
        # Calculate forward rate between prompt and next
        df_next["forward_rate"] = (
            (df_next["next_close"] / df_next["prompt_close"]) - 1
        ) * (365 / (df_next["next_dte"] - df_next["prompt_dte"])) * 100
        
        # Calculate term structure slope
        df_next["term_structure_slope"] = df_next["next_apr_adjusted"] - df_next["prompt_apr_adjusted"]
        
        # Add timeframe identifier
        df_next["timeframe"] = timeframe_name
        
    # Add timeframe identifier
    df_prep["timeframe"] = timeframe_name
    
    return df_prep, df_next

# Prepare data for both timeframes
df_daily_prep, df_daily_next = prepare_data(df_daily, 'daily')
df_8h_prep, df_8h_next = prepare_data(df_8h, '8h')

# 1. Basic comparison of the two timeframes
print(f"Daily data points: {len(df_daily_prep)}")
print(f"8-hour data points: {len(df_8h_prep)}")

# Display sample of prepared data
print("\nSample of daily data:")
print(df_daily_prep[['Timestamp', 'prompt_apr_adjusted', 'funding_annualized_adjusted', 'funding_prompt_spread']].head())

print("\nSample of 8-hour data:")
print(df_8h_prep[['Timestamp', 'prompt_apr_adjusted', 'funding_annualized_adjusted', 'funding_prompt_spread']].head())

# 2. Resampling 8h data to daily for direct comparison
# Group by date and take the last value of each day (end of day)
df_8h_daily = df_8h_prep.copy()
df_8h_daily['Date'] = df_8h_daily['Timestamp'].dt.date
df_8h_daily_last = df_8h_daily.groupby('Date').last().reset_index()
df_8h_daily_last['Timestamp'] = pd.to_datetime(df_8h_daily_last['Date'])


In [None]:
# 3.1 Prompt APR comparison
plt.figure(figsize=(14, 8))
plt.plot(df_daily_prep['Timestamp'], df_daily_prep['prompt_apr_adjusted'], label='Daily Timeframe', color='blue')
plt.plot(df_8h_daily_last['Timestamp'], df_8h_daily_last['prompt_apr_adjusted'], label='8h Timeframe (End of Day)', 
         color='red', linestyle='--', alpha=0.8)
plt.title('Comparison of Prompt APR Across Timeframes', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Adjusted Prompt APR (%)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 3.2 Funding rate comparison
plt.figure(figsize=(14, 8))
plt.plot(df_daily_prep['Timestamp'], df_daily_prep['funding_annualized_adjusted'], label='Daily Timeframe', color='blue')
plt.plot(df_8h_daily_last['Timestamp'], df_8h_daily_last['funding_annualized_adjusted'], 
         label='8h Timeframe (End of Day)', color='red', linestyle='--', alpha=0.8)
plt.title('Comparison of Annualized Funding Rate Across Timeframes', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Adjusted Funding Rate (%)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 3.3 Funding-Prompt spread comparison
plt.figure(figsize=(14, 8))
plt.plot(df_daily_prep['Timestamp'], df_daily_prep['funding_prompt_spread'], label='Daily Timeframe', color='blue')
plt.plot(df_8h_daily_last['Timestamp'], df_8h_daily_last['funding_prompt_spread'], 
         label='8h Timeframe (End of Day)', color='red', linestyle='--', alpha=0.8)
plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
plt.title('Comparison of Funding-Prompt Spread Across Timeframes', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Spread (%)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 3.4 Z-score comparison
plt.figure(figsize=(14, 8))
plt.plot(df_daily_prep['Timestamp'], df_daily_prep['funding_prompt_spread_zscore'], label='Daily Timeframe', color='blue')
plt.plot(df_8h_daily_last['Timestamp'], df_8h_daily_last['funding_prompt_spread_zscore'], 
         label='8h Timeframe (End of Day)', color='red', linestyle='--', alpha=0.8)
plt.axhline(y=2, color='r', linestyle='--', alpha=0.5)
plt.axhline(y=-2, color='r', linestyle='--', alpha=0.5)
plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
plt.title('Comparison of Funding-Prompt Spread Z-Score Across Timeframes', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Z-Score')
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 4. Intraday analysis of 8-hour data

# 4.1 Add hour of day
df_8h_prep['hour'] = df_8h_prep['Timestamp'].dt.hour

# 4.2 Analyze patterns by hour of day
hourly_stats = df_8h_prep.groupby('hour').agg({
    'prompt_apr_adjusted': ['mean', 'std'],
    'funding_annualized_adjusted': ['mean', 'std'],
    'funding_prompt_spread': ['mean', 'std'],
    'spot_returns': ['mean', 'std'],
}).reset_index()

# 4.3 Visualize intraday patterns
fig, axs = plt.subplots(2, 2, figsize=(16, 12))

# APR by hour
axs[0, 0].bar(hourly_stats['hour'], hourly_stats['prompt_apr_adjusted']['mean'], 
             yerr=hourly_stats['prompt_apr_adjusted']['std']/np.sqrt(len(df_8h_prep)/24),
             alpha=0.7, capsize=5)
axs[0, 0].set_title('Average Prompt APR by Hour of Day', fontsize=14)
axs[0, 0].set_xlabel('Hour (UTC)')
axs[0, 0].set_ylabel('Prompt APR (%)')
axs[0, 0].set_xticks(range(0, 24, 4))
axs[0, 0].grid(True, alpha=0.3)

# Funding rate by hour
axs[0, 1].bar(hourly_stats['hour'], hourly_stats['funding_annualized_adjusted']['mean'],
             yerr=hourly_stats['funding_annualized_adjusted']['std']/np.sqrt(len(df_8h_prep)/24),
             alpha=0.7, capsize=5, color='orange')
axs[0, 1].set_title('Average Funding Rate by Hour of Day', fontsize=14)
axs[0, 1].set_xlabel('Hour (UTC)')
axs[0, 1].set_ylabel('Funding Rate (%)')
axs[0, 1].set_xticks(range(0, 24, 4))
axs[0, 1].grid(True, alpha=0.3)

# Spread by hour
axs[1, 0].bar(hourly_stats['hour'], hourly_stats['funding_prompt_spread']['mean'],
             yerr=hourly_stats['funding_prompt_spread']['std']/np.sqrt(len(df_8h_prep)/24),
             alpha=0.7, capsize=5, color='green')
axs[1, 0].set_title('Average Funding-Prompt Spread by Hour of Day', fontsize=14)
axs[1, 0].set_xlabel('Hour (UTC)')
axs[1, 0].set_ylabel('Spread (%)')
axs[1, 0].set_xticks(range(0, 24, 4))
axs[1, 0].grid(True, alpha=0.3)
axs[1, 0].axhline(y=0, color='r', linestyle='--')

# Returns by hour
axs[1, 1].bar(hourly_stats['hour'], hourly_stats['spot_returns']['mean'],
             yerr=hourly_stats['spot_returns']['std']/np.sqrt(len(df_8h_prep)/24),
             alpha=0.7, capsize=5, color='purple')
axs[1, 1].set_title('Average Spot Returns by Hour of Day', fontsize=14)
axs[1, 1].set_xlabel('Hour (UTC)')
axs[1, 1].set_ylabel('Returns (%)')
axs[1, 1].set_xticks(range(0, 24, 4))
axs[1, 1].grid(True, alpha=0.3)
axs[1, 1].axhline(y=0, color='r', linestyle='--')

plt.tight_layout()
plt.show()

In [None]:
# 5. Correlation analysis across timeframes

# 5.1 Calculate rolling correlations
window_daily = 30  # 30 days
window_8h = 90     # 90 8-hour periods ≈ 30 days

df_daily_prep['rolling_corr'] = (
    df_daily_prep['funding_annualized_adjusted']
    .rolling(window=window_daily)
    .corr(df_daily_prep['prompt_apr_adjusted'])
).dropna()

df_8h_prep['rolling_corr'] = (
    df_8h_prep['funding_annualized_adjusted']
    .rolling(window=window_8h)
    .corr(df_8h_prep['prompt_apr_adjusted'])
).dropna()

# 5.2 Visualize rolling correlations
plt.figure(figsize=(14, 8))
plt.plot(df_daily_prep['Timestamp'], df_daily_prep['rolling_corr'], label='Daily Timeframe', color='blue')
plt.plot(df_8h_daily_last['Timestamp'], df_8h_daily_last['rolling_corr'], 
         label='8h Timeframe', color='red', linestyle='--', alpha=0.8)
plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
plt.title('Rolling 30-Day Correlation Between Funding Rate and Prompt APR', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Correlation Coefficient')
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)
plt.ylim(-1, 1)
plt.tight_layout()
plt.show()

In [None]:
# 5.3 Scatter plot comparison
plt.figure(figsize=(12, 10))

plt.subplot(2, 1, 1)
sns.regplot(x=df_daily_prep['prompt_apr_adjusted'], y=df_daily_prep['funding_annualized_adjusted'], 
           scatter_kws={'alpha':0.5}, line_kws={'color':'red'})
plt.title('Daily Timeframe: Funding Rate vs Prompt APR', fontsize=14)
plt.xlabel('Prompt APR (%)')
plt.ylabel('Funding Rate (%)')
plt.grid(True, alpha=0.3)

plt.subplot(2, 1, 2)
sns.regplot(x=df_8h_prep['prompt_apr_adjusted'], y=df_8h_prep['funding_annualized_adjusted'], 
           scatter_kws={'alpha':0.5}, line_kws={'color':'red'})
plt.title('8-Hour Timeframe: Funding Rate vs Prompt APR', fontsize=14)
plt.xlabel('Prompt APR (%)')
plt.ylabel('Funding Rate (%)')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# 5.4 Calculate correlation statistics
corr_daily = df_daily_prep['prompt_apr_adjusted'].corr(df_daily_prep['funding_annualized_adjusted'])
corr_8h = df_8h_prep['prompt_apr_adjusted'].corr(df_8h_prep['funding_annualized_adjusted'])

print(f"Overall correlation - Daily timeframe: {corr_daily:.4f}")
print(f"Overall correlation - 8-hour timeframe: {corr_8h:.4f}")

In [None]:
# 6. Lead-lag analysis

# 6.1 Create a merged dataset for analysis
# First, ensure we have matching timestamps for comparison
df_daily_prep['Date'] = df_daily_prep['Timestamp'].dt.date
df_8h_prep['Date'] = df_8h_prep['Timestamp'].dt.date

# Get end-of-day values from 8h data
df_8h_eod = df_8h_prep.groupby('Date').last().reset_index()

# Merge with daily data on date
merged_df = pd.merge(
    df_daily_prep[['Date', 'prompt_apr_adjusted', 'funding_annualized_adjusted', 'funding_prompt_spread']],
    df_8h_eod[['Date', 'prompt_apr_adjusted', 'funding_annualized_adjusted', 'funding_prompt_spread']],
    on='Date',
    suffixes=('_daily', '_8h')
)

# 6.2 Granger causality tests
max_lag = 7  # Test up to 7 days

print("\nGranger Causality Tests:")
print("\nDoes 8h Prompt APR Granger-cause Daily Prompt APR?")
granger_result = grangercausalitytests(
    merged_df[['prompt_apr_adjusted_8h', 'prompt_apr_adjusted_daily']].dropna(), 
    maxlag=max_lag
)
for lag in range(1, max_lag + 1):
    f_stat = granger_result[lag][0]['ssr_ftest']
    print(f"Lag {lag}: F-statistic: {f_stat[0]:.4f}, p-value: {f_stat[1]:.4f}")

print("\nDoes Daily Prompt APR Granger-cause 8h Prompt APR?")
granger_result = grangercausalitytests(
    merged_df[['prompt_apr_adjusted_daily', 'prompt_apr_adjusted_8h']].dropna(), 
    maxlag=max_lag
)
for lag in range(1, max_lag + 1):
    f_stat = granger_result[lag][0]['ssr_ftest']
    print(f"Lag {lag}: F-statistic: {f_stat[0]:.4f}, p-value: {f_stat[1]:.4f}")

print("\nDoes 8h Funding Rate Granger-cause Daily Funding Rate?")
granger_result = grangercausalitytests(
    merged_df[['funding_annualized_adjusted_8h', 'funding_annualized_adjusted_daily']].dropna(), 
    maxlag=max_lag
)
for lag in range(1, max_lag + 1):
    f_stat = granger_result[lag][0]['ssr_ftest']
    print(f"Lag {lag}: F-statistic: {f_stat[0]:.4f}, p-value: {f_stat[1]:.4f}")

In [None]:
# 7. Cross-timeframe trading signal development

# 7.1 Create signals based on funding-prompt spread z-scores
def generate_signals(df, zscore_col, threshold=2.0):
    signals = pd.DataFrame(index=df.index)
    signals['Timestamp'] = df['Timestamp']
    
    # Long when spread is significantly negative (funding < premium)
    signals['long_signal'] = (df[zscore_col] < -threshold).astype(int)
    
    # Short when spread is significantly positive (funding > premium)
    signals['short_signal'] = (df[zscore_col] > threshold).astype(int)
    
    # Combined signal: 1 for long, -1 for short, 0 for no position
    signals['signal'] = signals['long_signal'] - signals['short_signal']
    
    return signals

# Generate signals for both timeframes
signals_daily = generate_signals(df_daily_prep, 'funding_prompt_spread_zscore', threshold=1.5)
signals_8h = generate_signals(df_8h_prep, 'funding_prompt_spread_zscore', threshold=1.5)

# 7.2 Visualize signals
plt.figure(figsize=(14, 10))
# Plot daily timeframe
plt.subplot(2, 1, 1)
plt.plot(df_daily_prep['Timestamp'], df_daily_prep['funding_prompt_spread_zscore'], color='blue')
plt.scatter(
    signals_daily[signals_daily['long_signal'] == 1]['Timestamp'],
    df_daily_prep.loc[signals_daily['long_signal'] == 1, 'funding_prompt_spread_zscore'],
    color='green', marker='^', s=100, label='Long Signal'
)
plt.scatter(
    signals_daily[signals_daily['short_signal'] == 1]['Timestamp'],
    df_daily_prep.loc[signals_daily['short_signal'] == 1, 'funding_prompt_spread_zscore'],
    color='red', marker='v', s=100, label='Short Signal'
)
plt.axhline(y=1.5, color='r', linestyle='--', alpha=0.5)
plt.axhline(y=-1.5, color='g', linestyle='--', alpha=0.5)
plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
plt.title('Daily Timeframe: Funding-Prompt Spread Z-Score with Signals', fontsize=14)
plt.xlabel('Date')
plt.ylabel('Z-Score')
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)

# Plot 8h timeframe
plt.subplot(2, 1, 2)
plt.plot(df_8h_prep['Timestamp'], df_8h_prep['funding_prompt_spread_zscore'], color='blue')
# To avoid overcrowding, we can sample some of the signals for visualization
sample_rate = 3  # Only show every 3rd signal
plt.scatter(
    signals_8h[signals_8h['long_signal'] == 1].iloc[::sample_rate]['Timestamp'],
    df_8h_prep.loc[signals_8h['long_signal'] == 1, 'funding_prompt_spread_zscore'].iloc[::sample_rate],
    color='green', marker='^', s=100, label='Long Signal'
)
plt.scatter(
    signals_8h[signals_8h['short_signal'] == 1].iloc[::sample_rate]['Timestamp'],
    df_8h_prep.loc[signals_8h['short_signal'] == 1, 'funding_prompt_spread_zscore'].iloc[::sample_rate],
    color='red', marker='v', s=100, label='Short Signal'
)
plt.axhline(y=1.5, color='r', linestyle='--', alpha=0.5)
plt.axhline(y=-1.5, color='g', linestyle='--', alpha=0.5)
plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
plt.title('8-Hour Timeframe: Funding-Prompt Spread Z-Score with Signals', fontsize=14)
plt.xlabel('Date')
plt.ylabel('Z-Score')
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# 7.3 Create a cross-timeframe signal
# Combine signals from both timeframes to create a stronger signal
df_8h_prep['daily_zscore'] = np.nan

# For each 8h period, get the most recent daily z-score
for i, row in df_8h_prep.iterrows():
    daily_data = df_daily_prep[df_daily_prep['Timestamp'] <= row['Timestamp']]
    if not daily_data.empty:
        df_8h_prep.at[i, 'daily_zscore'] = daily_data.iloc[-1]['funding_prompt_spread_zscore']

# Create combined signal that requires both timeframes to agree
df_8h_prep['combined_long_signal'] = (
    (df_8h_prep['funding_prompt_spread_zscore'] < -1.5) & 
    (df_8h_prep['daily_zscore'] < -1.0)
).astype(int)

df_8h_prep['combined_short_signal'] = (
    (df_8h_prep['funding_prompt_spread_zscore'] > 1.5) & 
    (df_8h_prep['daily_zscore'] > 1.0)
).astype(int)

df_8h_prep['combined_signal'] = df_8h_prep['combined_long_signal'] - df_8h_prep['combined_short_signal']

# 7.4 Calculate hypothetical returns
# For this simplified backtest, assume:
# - We enter at the close of the candle when the signal appears
# - We exit on signal reversal or after a fixed holding period
# - Transaction costs are ignored for now

# Implement a simple holding period strategy
holding_periods = [1, 3, 6, 12]  # Number of 8h periods to hold
results = []

for holding_period in holding_periods:
    # Create a copy for each holding period test
    backtest_df = df_8h_prep.copy()
    
    # Initialize position and PnL columns
    backtest_df['position'] = 0
    backtest_df['entry_price'] = np.nan
    backtest_df['exit_price'] = np.nan
    backtest_df['pnl_pct'] = 0.0
    
    # Simulate trading
    for i in range(len(backtest_df) - holding_period):
        # If we get a new signal and don't have an active position
        if backtest_df.iloc[i]['combined_signal'] != 0 and backtest_df.iloc[i]['position'] == 0:
            # Enter position
            backtest_df.loc[backtest_df.index[i], 'position'] = backtest_df.iloc[i]['combined_signal']
            backtest_df.loc[backtest_df.index[i], 'entry_price'] = backtest_df.iloc[i]['perp_close']
            
            # Exit after holding period
            exit_idx = backtest_df.index[i + holding_period]
            backtest_df.loc[exit_idx, 'exit_price'] = backtest_df.iloc[i + holding_period]['perp_close']
            
            # Calculate PnL
            if backtest_df.iloc[i]['combined_signal'] == 1:  # Long
                pnl = (backtest_df.iloc[i + holding_period]['perp_close'] / backtest_df.iloc[i]['perp_close'] - 1) * 100
            else:  # Short
                pnl = (backtest_df.iloc[i]['perp_close'] / backtest_df.iloc[i + holding_period]['perp_close'] - 1) * 100
                
            backtest_df.loc[exit_idx, 'pnl_pct'] = pnl
    
    # Calculate performance metrics
    total_trades = (backtest_df['entry_price'].notna()).sum()
    winning_trades = (backtest_df['pnl_pct'] > 0).sum()
    win_rate = winning_trades / total_trades if total_trades > 0 else 0
    avg_win = backtest_df.loc[backtest_df['pnl_pct'] > 0, 'pnl_pct'].mean() if winning_trades > 0 else 0
    avg_loss = backtest_df.loc[backtest_df['pnl_pct'] < 0, 'pnl_pct'].mean() if total_trades - winning_trades > 0 else 0
    profit_factor = abs(avg_win * winning_trades / (avg_loss * (total_trades - winning_trades))) if avg_loss != 0 and total_trades - winning_trades > 0 else 0
    cumulative_return = backtest_df['pnl_pct'].sum()
    
    results.append({
        'Holding Period': f"{holding_period * 8} hours",
        'Total Trades': total_trades,
        'Win Rate': f"{win_rate:.2%}",
        'Avg Win': f"{avg_win:.2f}%",
        'Avg Loss': f"{avg_loss:.2f}%",
        'Profit Factor': f"{profit_factor:.2f}",
        'Cumulative Return': f"{cumulative_return:.2f}%"
    })

# Display results
results_df = pd.DataFrame(results)
print("\nBacktest Results for Different Holding Periods:")
print(results_df)


In [None]:
# 7.5 Plot cumulative returns for best holding period
# Identify best holding period
best_idx = results_df['Cumulative Return'].str.rstrip('%').astype(float).idxmax()
best_holding_period = holding_periods[best_idx]

# Create a copy for best holding period visualization
backtest_df = df_8h_prep.copy()
backtest_df['position'] = 0
backtest_df['pnl_pct'] = 0.0
backtest_df['cumulative_return'] = 0.0

# Simulate trading with best holding period
for i in range(len(backtest_df) - best_holding_period):
    if backtest_df.iloc[i]['combined_signal'] != 0 and backtest_df.iloc[i]['position'] == 0:
        backtest_df.loc[backtest_df.index[i], 'position'] = backtest_df.iloc[i]['combined_signal']
        
        # Calculate PnL
        if backtest_df.iloc[i]['combined_signal'] == 1:  # Long
            pnl = (backtest_df.iloc[i + best_holding_period]['perp_close'] / backtest_df.iloc[i]['perp_close'] - 1) * 100
        else:  # Short
            pnl = (backtest_df.iloc[i]['perp_close'] / backtest_df.iloc[i + best_holding_period]['perp_close'] - 1) * 100
            
        backtest_df.loc[backtest_df.index[i + best_holding_period], 'pnl_pct'] = pnl

# Calculate cumulative return
backtest_df['cumulative_return'] = backtest_df['pnl_pct'].cumsum()

# Plot cumulative return
plt.figure(figsize=(14, 8))
plt.plot(backtest_df['Timestamp'], backtest_df['cumulative_return'])
plt.title(f'Cumulative Return with {best_holding_period * 8}-Hour Holding Period', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Cumulative Return (%)')
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 8. Analysis across market regimes

# 8.1 Identify market regimes using K-means clustering
# Select features for clustering 
features = df_daily_prep[['spot_volatility', 'prompt_apr_adjusted', 'funding_annualized_adjusted']].dropna()
features_scaled = StandardScaler().fit_transform(features)

# Determine optimal number of clusters
silhouette_scores = []
k_range = range(2, 6)
for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    cluster_labels = kmeans.fit_predict(features_scaled)
    silhouette_scores.append(silhouette_score(features_scaled, cluster_labels))
    
optimal_k = k_range[np.argmax(silhouette_scores)]
print(f"\nOptimal number of clusters: {optimal_k}")

# Apply K-means with optimal k
kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
df_daily_prep['regime'] = np.nan
df_daily_prep.loc[features.index, 'regime'] = kmeans.fit_predict(features_scaled)
# 8.2 Analyze regime characteristics
print("\nMarket Regime Characteristics:")
for i in range(optimal_k):
    regime_data = df_daily_prep[df_daily_prep['regime'] == i]
    print(f"\nRegime {i}:")
    print(f"Number of days: {len(regime_data)}")
    print(f"Average Spot Volatility: {regime_data['spot_volatility'].mean():.2f}")
    print(f"Average Prompt APR: {regime_data['prompt_apr_adjusted'].mean():.2f}%")
    print(f"Average Funding Rate: {regime_data['funding_annualized_adjusted'].mean():.2f}%")
    print(f"Average Funding-Prompt Spread: {regime_data['funding_prompt_spread'].mean():.2f}%")

# 8.3 Map regimes to 8h data
df_8h_prep['regime'] = np.nan
for i, row in df_8h_prep.iterrows():
    daily_data = df_daily_prep[df_daily_prep['Timestamp'].dt.date == row['Timestamp'].date()]
    if not daily_data.empty and not daily_data['regime'].isna().all():
        df_8h_prep.at[i, 'regime'] = daily_data.iloc[0]['regime']

# 8.4 Analyze strategy performance by regime
regime_performance = []

for regime in range(optimal_k):
    regime_data = df_8h_prep[df_8h_prep['regime'] == regime].copy()
    
    if len(regime_data) > 0:
        # Generate signals just for this regime
        regime_data['regime_long_signal'] = (
            (regime_data['funding_prompt_spread_zscore'] < -1.5) & 
            (regime_data['daily_zscore'] < -1.0)
        ).astype(int)
        
        regime_data['regime_short_signal'] = (
            (regime_data['funding_prompt_spread_zscore'] > 1.5) & 
            (regime_data['daily_zscore'] > 1.0)
        ).astype(int)
        
        regime_data['regime_signal'] = regime_data['regime_long_signal'] - regime_data['regime_short_signal']
        
        # Count signals
        long_signals = regime_data['regime_long_signal'].sum()
        short_signals = regime_data['regime_short_signal'].sum()
        
        # Calculate hypothetical returns (simplified)
        regime_data['next_return'] = regime_data['perp_close'].pct_change(best_holding_period).shift(-best_holding_period) * 100
        
        # For longs: next return is the return
        long_returns = regime_data.loc[regime_data['regime_long_signal'] == 1, 'next_return']
        
        # For shorts: next return is negated
        short_returns = -regime_data.loc[regime_data['regime_short_signal'] == 1, 'next_return']
        
        # Calculate performance metrics
        avg_long_return = long_returns.mean() if len(long_returns) > 0 else 0
        avg_short_return = short_returns.mean() if len(short_returns) > 0 else 0
        win_rate_long = (long_returns > 0).mean() if len(long_returns) > 0 else 0
        win_rate_short = (short_returns > 0).mean() if len(short_returns) > 0 else 0
        
        regime_performance.append({
            'Regime': regime,
            'Days': len(regime_data) // 3,  # Approximate days (8h periods / 3)
            'Long Signals': long_signals,
            'Short Signals': short_signals,
            'Avg Long Return': f"{avg_long_return:.2f}%",
            'Avg Short Return': f"{avg_short_return:.2f}%",
            'Win Rate Long': f"{win_rate_long:.2%}",
            'Win Rate Short': f"{win_rate_short:.2%}"
        })

# Display regime performance
regime_performance_df = pd.DataFrame(regime_performance)
print("\nStrategy Performance by Market Regime:")
print(regime_performance_df)


In [None]:
# 8.5 Visualize regimes and signals
plt.figure(figsize=(14, 10))

# Plot regimes
plt.subplot(2, 1, 1)
for regime in range(optimal_k):
    regime_data = df_daily_prep[df_daily_prep['regime'] == regime]
    plt.scatter(
        regime_data['Timestamp'], 
        regime_data['funding_prompt_spread'],
        label=f'Regime {regime}',
        alpha=0.7,
        s=50
    )
plt.title('Market Regimes Based on Clustering', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Funding-Prompt Spread (%)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)

# Plot signals by regime
plt.subplot(2, 1, 2)
for regime in range(optimal_k):
    signals_in_regime = df_8h_prep[
        (df_8h_prep['regime'] == regime) & 
        (df_8h_prep['combined_signal'] != 0)
    ]
    
    if len(signals_in_regime) > 0:
        plt.scatter(
            signals_in_regime['Timestamp'],
            signals_in_regime['combined_signal'],
            label=f'Regime {regime} Signals',
            alpha=0.7,
            s=50
        )

plt.title('Trading Signals by Market Regime', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Signal Direction (1=Long, -1=Short)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)
plt.yticks([-1, 0, 1])

plt.tight_layout()
plt.show()

In [None]:
# 9. Summarize findings
print("\nKey Findings from Multi-Timeframe Analysis:")
print("1. Correlation between funding rate and prompt APR varies across timeframes")
print(f"   - Daily correlation: {corr_daily:.4f}")
print(f"   - 8-hour correlation: {corr_8h:.4f}")
print(f"2. The {best_holding_period * 8}-hour holding period showed the best performance")
print(f"3. {optimal_k} distinct market regimes were identified")
print("4. The strategy performs differently across market regimes")
print("5. Cross-timeframe signals provide stronger confirmation than single timeframe signals")

In [None]:
# 10. Proposed Trading Strategy
print("\nProposed Trading Strategy:")
print("Based on the analysis, we propose a cross-timeframe mean reversion strategy that:")
print("1. Uses both daily and 8-hour data to generate more robust signals")
print("2. Trades based on discrepancies between funding rates and quarterly futures premium")
print("3. Adapts position sizing and holding periods based on the current market regime")
print("4. Implements the following rules:")
print("   - Long Entry: When both daily and 8h z-scores are below -1.5 and -1.0 respectively")
print("   - Short Entry: When both daily and 8h z-scores are above 1.5 and 1.0 respectively")
print("   - Exit: After {best_holding_period * 8} hours or on signal reversal")
print("   - Position Sizing: Larger in regimes that show better historical performance")