In [1]:
import logging
# This is necessary to recognize the modules
import os
import sys
from decimal import Decimal
import warnings

warnings.filterwarnings("ignore")
logging.getLogger("asyncio").setLevel(logging.CRITICAL)
root_path = os.path.abspath(os.path.join(os.getcwd(), '../..'))
sys.path.append(root_path)

In [2]:
from core.data_sources.clob import CLOBDataSource

clob = CLOBDataSource()
candles = await clob.get_candles_batch_last_days(connector_name="binance_perpetual", trading_pairs=["SOL-USDT", "POPCAT-USDT"], interval="1m", days=1)


Batch 1/1
Start: 0, End: 10


In [3]:
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
import pandas as pd
import scipy.stats as stats

def analyze_pair_cointegration(y_col, x_col, lookback_days=14, signal_days=3, z_score_threshold=1.5):
    """
    Analyze cointegration between two price series using Engle-Granger method.
    
    Parameters:
    -----------
    y_col : pd.Series
        Dependent variable price series (e.g., LTC)
    x_col : pd.Series
        Independent variable price series (e.g., BTC)
    lookback_days : int
        Number of days to use for cointegration analysis
    signal_days : int
        Number of days to analyze for recent signals
    z_score_threshold : float
        Z-score threshold for generating trading signals
    
    Returns:
    --------
    dict : Contains analysis results including:
        - Cointegration statistics (P-Value, Alpha, Beta)
        - Spread analysis (Z_t, Z-scores)
        - Trading signals and confidence metrics
        - Risk management parameters
    """
    # Calculate periods for 15m candles
    lookback_periods = lookback_days * 24 * 4  # 15-min candles per day
    signal_periods = signal_days * 24 * 4
    
    # Prepare price series
    y_col = y_col.dropna()
    x_col = x_col.dropna()
    
    # Ensure finite values and same length
    y_col = y_col[np.isfinite(y_col)].tail(lookback_periods)
    x_col = x_col[np.isfinite(x_col)].tail(lookback_periods)
    
    min_len = min(len(y_col), len(x_col))
    y_col = y_col[-min_len:]
    x_col = x_col[-min_len:]
    
    y, x = y_col.values, x_col.values
    
    # 1. Perform Engle-Granger cointegration test
    score, p_value, _ = coint(y, x, trend='c', autolag='AIC')
    
    # 2. Estimate cointegrating relationship
    X = sm.add_constant(x)
    model = sm.OLS(y, X)
    results = model.fit()
    
    alpha = results.params[0]  # Intercept
    beta = results.params[1]   # Slope coefficient
    
    # 3. Calculate spread (Z_t) and its statistics
    z_t = y - (alpha + beta * x)  # Residuals
    z_mean = np.mean(z_t)
    z_std = np.std(z_t)
    current_z_score = (z_t[-1] - z_mean) / z_std
    
    # 4. Calculate recent predictions
    y_recent = y_col.tail(signal_periods)
    x_recent = x_col.tail(signal_periods)
    y_pred = alpha + beta * x_recent
    recent_spread = y_recent - y_pred
    
    # 5. Determine trading strategy
    if abs(current_z_score) < z_score_threshold:
        strategy = "Hold"
        confidence = 0
        mean_reversion_prob = 0.5  # Neutral probability
    else:
        # Calculate confidence metrics
        p_value_confidence = 1 - p_value  # Higher confidence with lower p-value
        z_score_confidence = stats.norm.cdf(abs(current_z_score)) - 0.5
        mean_reversion_prob = 1 - stats.norm.cdf(abs(current_z_score))
        
        confidence = (p_value_confidence + z_score_confidence + mean_reversion_prob) / 3
        
        if current_z_score > z_score_threshold:
            strategy = "Short"  # Y is overvalued relative to X
        else:
            strategy = "Long"   # Y is undervalued relative to X
    
    # 6. Calculate risk metrics
    try:
        half_life = -np.log(2) / np.polyfit(z_t[:-1], np.diff(z_t), 1)[0]
    except:
        half_life = np.nan
    
    volatility = np.std(np.diff(z_t))
    
    # 7. Calculate grid levels for trading
    current_price = y_col.iloc[-1]
    if strategy != "Hold":
        entry_price = current_price
        target_price = current_price * (1 - current_z_score * z_std * beta) if strategy == "Short" \
                      else current_price * (1 + abs(current_z_score) * z_std * beta)
        stop_price = current_price * (1 + current_z_score * 0.5 * z_std * beta) if strategy == "Short" \
                    else current_price * (1 - abs(current_z_score) * 0.5 * z_std * beta)
    else:
        entry_price = target_price = stop_price = None
    
    return {
        # Cointegration statistics
        'P-Value': p_value,
        'Alpha': alpha,
        'Beta': beta,
        'Model_Results': results.summary(),
        
        # Spread analysis
        'Z_t': z_t,
        'Z_mean': z_mean,
        'Z_std': z_std,
        'Current_Z_score': current_z_score,
        'Half_Life': half_life,
        'Spread_Volatility': volatility,
        
        # Trading signals
        'Strategy': strategy,
        'Confidence': confidence,
        'Mean_Reversion_Probability': mean_reversion_prob,
        
        # Grid levels
        'Entry_Price': entry_price,
        'Target_Price': target_price,
        'Stop_Price': stop_price,
        
        # Recent performance
        'Recent_Spread': recent_spread,
        'Predictions': y_pred,
        'Actual_Values': y_recent,
        
        # Risk parameters
        'Position_Ratio': beta,
        'Z_score_Threshold': z_score_threshold
    }

# Example usage:
def analyze_pair(y_col, x_col, pair_name=""):
    """Print a comprehensive analysis of a trading pair"""
    result = analyze_pair_cointegration(y_col, x_col)
    
    print(f"\nAnalysis for {pair_name}")
    print("=" * 50)
    print(f"Cointegration P-Value: {result['P-Value']:.4f}")
    print(f"Current Z-score: {result['Current_Z_score']:.2f}")
    print(f"Strategy: {result['Strategy']}")
    print(f"Confidence: {result['Confidence']:.2%}")
    print(f"Mean Reversion Probability: {result['Mean_Reversion_Probability']:.2%}")
    
    if result['Strategy'] != "Hold":
        print("\nTrading Levels:")
        print(f"Entry Price: {result['Entry_Price']:.2f}")
        print(f"Target Price: {result['Target_Price']:.2f}")
        print(f"Stop Price: {result['Stop_Price']:.2f}")
        print(f"Position Ratio (β): {result['Beta']:.4f}")
    
    return result


# Run analysis
result = analyze_pair(candles[0].data["close"], candles[1].data["close"], f"{candles[0].trading_pair.split('-')[0]}-{candles[1].trading_pair.split('-')[0]}")


Analysis for SOL-POPCAT
Cointegration P-Value: 0.1211
Current Z-score: 1.63
Strategy: Short
Confidence: 45.96%
Mean Reversion Probability: 5.20%

Trading Levels:
Entry Price: 134.30
Target Price: -7010.13
Stop Price: 3706.51
Position Ratio (β): 78.0396


I0000 00:00:1745025100.221140 4300702 fork_posix.cc:75] Other threads are currently calling into gRPC, skipping fork() handlers


In [5]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

def plot_cointegration_analysis(y_col, x_col, lookback_days=[3, 5, 7, 14], signal_days=3, z_score_threshold=1.5):
    """
    Plot cointegration analysis for multiple lookback periods with shared x-axis.
    
    Parameters:
    -----------
    y_col, x_col : pd.Series
        Price series to analyze
    lookback_days : list
        List of lookback periods to compare
    signal_days : int
        Days to analyze for recent signals
    z_score_threshold : float
        Z-score threshold for signals
    """
    # Store results for each lookback period
    results = []
    colors = ['green', 'purple', 'orange', 'brown']
    
    # Get maximum lookback period for x-axis alignment
    max_lookback = max(lookback_days) * 24 * 4
    
    # Prepare price series
    y_col = y_col.tail(max_lookback)
    x_col = x_col.tail(max_lookback)
    
    # Create subplots with shared x-axis
    fig = make_subplots(
        rows=2, cols=1,
        shared_xaxes=True,
        vertical_spacing=0.15,
        subplot_titles=['Normalized Prices and Predictions', 'Z_t (Spread) - Multiple Lookback Periods'],
        row_heights=[0.5, 0.5]
    )
    
    # Plot 1: Original price series
    fig.add_trace(
        go.Scatter(
            x=y_col.index,
            y=y_col,
            name='Y Series',
            line=dict(color='blue', width=1)
        ),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=x_col.index,
            y=x_col,
            name='X Series',
            line=dict(color='red', width=1)
        ),
        row=1, col=1
    )
    
    # Analyze each lookback period
    summary_text = []
    for lookback, color in zip(lookback_days, colors):
        result = analyze_pair_cointegration(
            y_col, x_col,
            lookback_days=lookback,
            signal_days=signal_days,
            z_score_threshold=z_score_threshold
        )
        results.append(result)
        
        # Add predictions to first plot
        fig.add_trace(
            go.Scatter(
                x=result['Predictions'].index,
                y=result['Predictions'],
                name=f'Predicted ({lookback}d)',
                line=dict(color=color, dash='dash', width=1)
            ),
            row=1, col=1
        )
        
        # Add Z_t series to second plot
        z_t_series = pd.Series(result['Z_t'], index=y_col.index[-len(result['Z_t']):])
        fig.add_trace(
            go.Scatter(
                x=z_t_series.index,
                y=z_t_series,
                name=f'Z_t ({lookback}d)',
                line=dict(color=color, width=1)
            ),
            row=2, col=1
        )
        
        # Add summary statistics
        summary = (
            f"{lookback}d Lookback: "
            f"β={result['Beta']:.3f} | "
            f"Z-score={result['Current_Z_score']:.2f} | "
            f"Strategy={result['Strategy']}"
        )
        summary_text.append(summary)
        
        # Add threshold lines for the most recent period
        if lookback == lookback_days[-1]:
            for threshold in [-z_score_threshold, 0, z_score_threshold]:
                fig.add_trace(
                    go.Scatter(
                        x=z_t_series.index,
                        y=[threshold] * len(z_t_series),
                        name=f'Threshold ({threshold:.1f}σ)',
                        line=dict(color='gray', dash='dot', width=1),
                        showlegend=True
                    ),
                    row=2, col=1
                )
    
    # Update layout
    title_text = (
        "Pair Analysis - Multiple Lookback Periods<br>" +
        "<br>".join(summary_text)
    )
    
    fig.update_layout(
        title=dict(
            text=title_text,
            x=0.5,
            xanchor='center'
        ),
        width=1200,
        height=800,
        showlegend=True,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=1.05
        ),
        xaxis_title="Date",
        yaxis_title="Normalized Price",
        yaxis2_title="Z-Score"
    )
    
    # Update y-axes labels
    fig.update_yaxes(title_text="Normalized Price", row=1, col=1)
    fig.update_yaxes(title_text="Z-Score", row=2, col=1)
    
    return fig, results

# Example usage:
def analyze_trading_pair(candle1, candle2, lookback_days=[3, 5, 7, 14]):
    """
    Analyze and plot a trading pair with multiple lookback periods.
    """
    # Get normalized price series
    y_prices = candle1.data["close"].pct_change().add(1).cumprod()
    x_prices = candle2.data["close"].pct_change().add(1).cumprod()
    
    # Create analysis plot
    fig, results = plot_cointegration_analysis(
        y_prices, 
        x_prices, 
        lookback_days=lookback_days
    )
    
    # Print detailed analysis for the longest lookback period
    latest_result = results[-1]
    print(f"\nDetailed Analysis (Lookback: {lookback_days[-1]} days)")
    print("=" * 50)
    print(f"Cointegration P-Value: {latest_result['P-Value']:.4f}")
    print(f"Beta: {latest_result['Beta']:.4f}")
    print(f"Current Z-score: {latest_result['Current_Z_score']:.2f}")
    print(f"Strategy: {latest_result['Strategy']}")
    print(f"Confidence: {latest_result['Confidence']:.2%}")
    
    if latest_result['Strategy'] != "Hold":
        print("\nTrading Levels:")
        print(f"Entry Price: {latest_result['Entry_Price']:.2f}")
        print(f"Target Price: {latest_result['Target_Price']:.2f}")
        print(f"Stop Price: {latest_result['Stop_Price']:.2f}")
    
    return fig, results

# Run analysis
fig, results = analyze_trading_pair(candles[0], candles[1], lookback_days=[1])
fig.show()


Detailed Analysis (Lookback: 1 days)
Cointegration P-Value: 0.2288
Beta: 0.1465
Current Z-score: 0.99
Strategy: Hold
Confidence: 0.00%
