In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import pnl_utils

# Set plotting style
plt.style.use("ggplot")
sns.set(font_scale=1.2)
plt.rcParams["figure.figsize"] = (14, 8)
plt.rcParams["font.size"] = 12

# Load data
print("Loading market data...")
df = pd.read_csv("../binance_data_pipeline/data/markets/BTCUSDT_8h.csv")
df["Timestamp"] = pd.to_datetime(df["Timestamp"])

# Filter data to the relevant time period where we have futures data
df = df[df["Timestamp"] >= "2023-06-25"]

print(
    f"Data loaded with {len(df)} rows from {df['Timestamp'].min().date()} to {df['Timestamp'].max().date()}"
)

# Display a sample of the data
df.head()

In [None]:
# Step 1: Data Preprocessing and Metric Calculation
print("\n--- Step 1: Computing Key Metrics ---")

# Rename days till expiry columns
df.rename(
    columns={
        "prompt_days_till_expiry": "prompt_dte",
        "next_days_till_expiry": "next_dte",
    },
    inplace=True,
)

# Set Binance's base interest rate (adjusted to percentage terms)
binance_rate = (
    0.0001 * 3 * 365 * 100
)  # 0.01% daily rate annualized and converted to percentage

# Calculate basis for prompt futures contract
df["prompt_basis"] = df["prompt_close"] - df["spot_close"]
df["prompt_basis_pct"] = (df["prompt_basis"] / df["spot_close"]) * 100

# Calculate APR for prompt futures contract (adjusted for Binance rate)
df["prompt_apr"] = (
    (df["prompt_basis"] / df["spot_close"]) * (365 / df["prompt_dte"]) * 100
)

# Convert funding rate to equivalent APR units
df["funding_annualized"] = (
    df["funding_rate"] * 3 * 365 * 100
)  # Convert to annual percentage


# Calculate the spread between funding rate and futures premium
df["funding_prompt_spread"] = df["funding_annualized"] - df["prompt_apr"]

# Calculate z-score of the spread using 30-day rolling window
df["funding_prompt_spread_zscore"] = (
    df["funding_prompt_spread"] - df["funding_prompt_spread"].rolling(window=30).mean()
) / df["funding_prompt_spread"].rolling(window=30).std()

# Calculate market return metrics for performance comparison
df["spot_returns"] = df["spot_close"].pct_change() * 100
df["perp_returns"] = df["perp_close"].pct_change() * 100
df["prompt_returns"] = df["prompt_close"].pct_change() * 100

# Calculate volatility (20-day rolling standard deviation of returns)
df["spot_volatility"] = df["spot_returns"].rolling(window=20).std()
df["perp_volatility"] = df["perp_returns"].rolling(window=20).std()
df["prompt_volatility"] = df["prompt_returns"].rolling(window=20).std()

# Display calculated metrics
metrics_df = df[
    [
        "Timestamp",
        "spot_close",
        "perp_close",
        "prompt_close",
        "funding_annualized",
        "prompt_apr",
        "funding_prompt_spread",
        "funding_prompt_spread_zscore",
    ]
].copy()

print("Sample of calculated metrics:")
print(metrics_df.dropna().head())

In [None]:
# Step 2: Visualize Key Metrics for Strategy Development
print("\n--- Step 2: Visualizing Key Metrics ---")

# Plot 1: Funding Rate vs Prompt APR Over Time
plt.figure(figsize=(14, 7))
plt.plot(
    df["Timestamp"],
    df["funding_annualized"],
    "b-",
    label="Funding Rate (Annualized)",
)
plt.plot(df["Timestamp"], df["prompt_apr"], "r-", label="Prompt Futures APR")
plt.axhline(y=0, color="k", linestyle="--", alpha=0.3)
plt.title("Funding Rate vs Prompt Futures APR Over Time", fontsize=16)
plt.xlabel("Date")
plt.ylabel("Annualized Rate (%)")
plt.legend()
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Plot 2: The Funding-Prompt Spread and its Z-Score
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

# Spread plot
ax1.plot(df["Timestamp"], df["funding_prompt_spread"], "g-")
ax1.axhline(y=0, color="k", linestyle="--", alpha=0.3)
ax1.set_title("Funding Rate - Prompt APR Spread", fontsize=14)
ax1.set_ylabel("Spread (%)")
ax1.grid(True, alpha=0.3)

# Z-score plot
ax2.plot(df["Timestamp"], df["funding_prompt_spread_zscore"], "purple")
ax2.axhline(y=0, color="k", linestyle="--", alpha=0.3)
ax2.axhline(y=1.5, color="r", linestyle="--", alpha=0.5)
ax2.axhline(y=-1.5, color="g", linestyle="--", alpha=0.5)
ax2.set_title("Z-Score of Funding-Prompt Spread", fontsize=14)
ax2.set_xlabel("Date")
ax2.set_ylabel("Z-Score")
ax2.grid(True, alpha=0.3)

plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Plot 3: Distribution of Spread Z-Scores
plt.figure(figsize=(10, 6))
sns.histplot(df["funding_prompt_spread_zscore"].dropna(), bins=30, kde=True)
plt.axvline(x=1.5, color="r", linestyle="--", alpha=0.7, label="Signal Threshold: +1.5")
plt.axvline(
    x=-1.5, color="g", linestyle="--", alpha=0.7, label="Signal Threshold: -1.5"
)
plt.title("Distribution of Funding-Prompt Spread Z-Scores", fontsize=16)
plt.xlabel("Z-Score")
plt.ylabel("Frequency")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Plot 4: Scatter plot of Funding Rate vs. Prompt APR
plt.figure(figsize=(10, 8))
sns.scatterplot(x="prompt_apr", y="funding_annualized", data=df, alpha=0.7)
plt.axline([0, 0], [1, 1], color="r", linestyle="--", alpha=0.7, label="Equal Rates")
plt.title("Funding Rate vs. Prompt APR", fontsize=16)
plt.xlabel("Prompt Futures APR (%)")
plt.ylabel("Funding Rate (%)")
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Step 3: Define the Trading Strategy
print("\n--- Step 3: Defining Trading Strategy ---")

# Run the strategy with default parameters
print("Running strategy backtest...")

import pnl_utils
# Generate signals
strategy_df = pnl_utils.generate_signals(df, z_threshold=1.5)
print(f"Generated {(strategy_df['signal'] != 0).sum()} trading signals")

# Calculate positions
positioned_df = pnl_utils.calculate_positions(
    strategy_df, initial_capital=10000, leverage=1, max_allocation=0.8
)

# Calculate P&L
backtest_results = pnl_utils.calculate_pnl(positioned_df, trading_fee_pct=0.04)

# Calculate performance metrics
performance = pnl_utils.calculate_performance_metrics(backtest_results)

# Display performance summary
print("\nStrategy Performance Summary:")
for key, value in performance.items():
    if isinstance(value, float):
        print(f"{key}: {value:.2f}")
    else:
        print(f"{key}: {value}")

In [None]:
# Step 4: Visualize Backtest Results
print("\n--- Step 4: Visualizing Backtest Results ---")

# Plot 1: Equity Curve
plt.figure(figsize=(14, 7))
plt.plot(
    backtest_results["Timestamp"], backtest_results["cumulative_pnl"], "b-", linewidth=2
)
plt.title("Strategy Equity Curve", fontsize=16)
plt.xlabel("Date")
plt.ylabel("Cumulative P&L (USD)")
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Plot 2: Positions and Signals
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

# Asset prices
ax1.plot(
    backtest_results["Timestamp"],
    backtest_results["spot_close"],
    "k-",
    alpha=0.5,
    label="Spot Price",
)
ax1.plot(
    backtest_results["Timestamp"],
    backtest_results["perp_close"],
    "b-",
    alpha=0.5,
    label="Perpetual Future Price",
)
ax1.plot(
    backtest_results["Timestamp"],
    backtest_results["prompt_close"],
    "r-",
    alpha=0.5,
    label="Prompt Future Price",
)
ax1.set_title("Asset Prices", fontsize=14)
ax1.set_ylabel("Price (USD)")
ax1.legend()
ax1.grid(True, alpha=0.3)

# Positions
ax2.plot(
    backtest_results["Timestamp"],
    backtest_results["position_perp"],
    "b-",
    label="Perpetual Position",
)
ax2.plot(
    backtest_results["Timestamp"],
    backtest_results["position_prompt"],
    "r-",
    label="Prompt Position",
)
ax2.set_title("Strategy Positions (in BTC)", fontsize=14)
ax2.set_xlabel("Date")
ax2.set_ylabel("Position Size (BTC)")
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Plot 3: P&L Breakdown
plt.figure(figsize=(14, 7))
width = 0.25
x = np.arange(3)
pnl_components = [
    performance["trading_pnl"],
    performance["funding_pnl"],
    -performance["fee_cost"],
]
plt.bar(x, pnl_components, width=width, color=["blue", "green", "red"])
plt.xticks(x, ["Trading P&L", "Funding P&L", "Fee Cost"])
plt.title("P&L Component Breakdown", fontsize=16)
plt.ylabel("P&L (USD)")
plt.grid(True, alpha=0.3, axis="y")

# Add values above each bar
for i, v in enumerate(pnl_components):
    plt.text(i, v + (100 if v >= 0 else -100), f"{v:.2f}", ha="center")

plt.tight_layout()
plt.show()

# Plot 4: Drawdown Chart
plt.figure(figsize=(14, 7))
cumulative_pnl = backtest_results["cumulative_pnl"]
drawdown = (cumulative_pnl.cummax() - cumulative_pnl) / cumulative_pnl.cummax() * 100
plt.plot(backtest_results["Timestamp"], drawdown, "r-")
plt.fill_between(backtest_results["Timestamp"], drawdown, 0, color="red", alpha=0.3)
plt.title("Strategy Drawdown", fontsize=16)
plt.xlabel("Date")
plt.ylabel("Drawdown (%)")
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Step 5: Parameter Sensitivity Analysis
print("\n--- Step 5: Parameter Sensitivity Analysis ---")

# Test different z-score thresholds
z_thresholds = [1.0, 1.5, 2.0, 2.5]
leverage_values = [2, 3, 4]

# Initialize results container
sensitivity_results = []

# Run sensitivity analysis
for z in z_thresholds:
    for lev in leverage_values:
        signals = pnl_utils.generate_signals(df, z_threshold=z)
        positions = pnl_utils.calculate_positions(signals, leverage=lev)
        results = pnl_utils.calculate_pnl(positions)
        metrics = pnl_utils.calculate_performance_metrics(results)

        sensitivity_results.append(
            {
                "z_threshold": z,
                "leverage": lev,
                "total_pnl": metrics["total_pnl"],
                "sharpe_ratio": metrics["sharpe_ratio"],
                "max_drawdown_pct": metrics["max_drawdown_pct"],
                "win_rate": metrics["win_rate"],
                "profit_factor": metrics["profit_factor"],
            }
        )

# Convert to DataFrame for easier visualization
sensitivity_df = pd.DataFrame(sensitivity_results)

# Heatmap of Sharpe Ratio by parameter combination
pivot_sharpe = sensitivity_df.pivot(
    index="leverage", columns="z_threshold", values="sharpe_ratio"
)
plt.figure(figsize=(10, 6))
sns.heatmap(pivot_sharpe, annot=True, cmap="viridis", fmt=".2f")
plt.title("Sharpe Ratio by Parameter Combination", fontsize=16)
plt.xlabel("Z-Score Threshold")
plt.ylabel("Leverage")
plt.tight_layout()
plt.show()

# Heatmap of Total P&L by parameter combination
pivot_pnl = sensitivity_df.pivot(
    index="leverage", columns="z_threshold", values="total_pnl"
)
plt.figure(figsize=(10, 6))
sns.heatmap(pivot_pnl, annot=True, cmap="viridis", fmt=".2f")
plt.title("Total P&L by Parameter Combination", fontsize=16)
plt.xlabel("Z-Score Threshold")
plt.ylabel("Leverage")
plt.tight_layout()
plt.show()

In [None]:
# Step 6: Strategy Summary and Conclusions
print("\n--- Step 6: Strategy Summary and Conclusions ---")

# Get the best parameter combination based on Sharpe ratio
best_params = sensitivity_df.loc[sensitivity_df["sharpe_ratio"].idxmax()]

print(f"Best Parameter Combination:")
print(f"Z-Score Threshold: {best_params['z_threshold']}")
print(f"Leverage: {best_params['leverage']}")
print(f"Resulting Sharpe Ratio: {best_params['sharpe_ratio']:.2f}")
print(f"Total P&L: ${best_params['total_pnl']:.2f}")
print(f"Max Drawdown: {best_params['max_drawdown_pct']:.2f}%")
print(f"Win Rate: {best_params['win_rate']:.2f}")
print(f"Profit Factor: {best_params['profit_factor']:.2f}")

# Final summary report
print("\nStrategy Summary:")
print(
    """
1. Strategy Logic:
   - Go SHORT on perpetual futures when funding rate is significantly higher than prompt futures APR (z-score > threshold)
   - Go LONG on prompt futures to maintain delta neutrality
   - Go LONG on perpetual futures when funding rate is significantly lower than prompt futures APR (z-score < -threshold)
   - Go SHORT on prompt futures to maintain delta neutrality

2. Primary Profit Sources:
   - Funding payments from perpetual futures (when going short during high funding periods)
   - Futures basis convergence 
   - Price differential between instruments

3. Risk Management:
   - Delta-neutral positioning minimizes directional exposure
   - Position sizing based on available capital and leverage
   - Monitoring of z-score to enter and exit positions at optimal times

4. Key Insights:
   - The funding-prompt spread exhibits mean-reverting behavior
   - Strategy performance is sensitive to z-score threshold and leverage settings
   - Trading costs can significantly impact profitability
   
5. Potential Improvements:
   - Dynamic position sizing based on spread magnitude
   - Implementation of stop-loss mechanisms
   - Consideration of market volatility regimes for adaptive parameter selection
   - Addition of risk controls for extreme market conditions
"""
)