In [1]:
pip install yfinance pandas numpy statsmodels matplotlib seaborn ipywidgets

Note: you may need to restart the kernel to use updated packages.


In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display, clear_output

# Just setting up some nice plot aesthetics 
plt.style.use('seaborn-v0_8-darkgrid')

def run_pair_trading_strategy(stock1_ticker, stock2_ticker, start_date, end_date,
                              initial_capital, transaction_cost_pct,
                              rolling_window_spread, rolling_window_zscore_threshold,
                              entry_z_score_multiplier, exit_z_score_multiplier,
                              stop_loss_z_score_multiplier):
    """
    This function is basically the whole trading strategy in one package
    It takes in our chosen stocks and a some  parameters, then runs a backtest

    Args:
        stock1_ticker (str): The Yahoo Finance ticker for the first stock (e.g., 'HDFCBANK.NS')
        stock2_ticker (str): Same for the second stock
        start_date (str): When we start pulling historical data from
        end_date (str): When we stop pulling data
        initial_capital (float): Our starting simulated investment capital.
        transaction_cost_pct (float): A percentage representing transaction costs (like brokerage fees).
        
        
        rolling_window_spread (int): The number of days for calculating the rolling mean/std of the spread.
        rolling_window_zscore_threshold (int): The window size for the dynamic Z-score thresholds
        
        entry_z_score_multiplier (float): Multiplier to set how 'extreme' the Z-score needs to be to enter a trade.
        exit_z_score_multiplier (float): Multiplier to set how close to the mean the Z-score needs to be to exit
        
        stop_loss_z_score_multiplier (float): Multiplier for the Z-score to trigger a stop-loss

    Returns:
        tuple: (portfolio_df, performance_metrics, trades_list, df_data) - Contains all the computed data, metrics, and trade details
    """

    

    # taking daily adjusted close prices using yfinance. 
    stock1_data = yf.download(stock1_ticker, start=start_date, end=end_date, progress=False, auto_adjust=False)
    stock2_data = yf.download(stock2_ticker, start=start_date, end=end_date, progress=False, auto_adjust=False)

    # Combining the adjusted close prices into one DataFrame. Easier to work with.
    df = pd.DataFrame()
    df[stock1_ticker] = stock1_data['Adj Close']
    df[stock2_ticker] = stock2_data['Adj Close']

    # Dropping any rows with missing data (NaNs)
    df.dropna(inplace=True)
    
    # A quick check to make sure we actually have enough data for the rolling calculations.
    if len(df) < max(rolling_window_spread, rolling_window_zscore_threshold):
        print(f"Not enough data for {stock1_ticker}-{stock2_ticker} with these window sizes. Skipping this pair.")
        return None, None, None, None

    # 3. ENGLE-GRANGER TEST
    def check_cointegration(series1, series2):
        """
        This function performs the Engle-Granger test. It's basically checking if
        our two stock prices have a stable, long-term relationship, which is
        essential for a mean-reversion strategy like this.
        """
        # Running OLS regression. It tries to find a linear relationship between the two series.
        model = sm.OLS(series1, sm.add_constant(series2))
        results = model.fit()
        residuals = results.resid # The 'error' or deviation from that linear relationship.
        adf_test = adfuller(residuals) # ADF test checks if these errors are stationary (mean-reverting).

        print('\n--- Cointegration Test Results (The Statistical Bit) ---')
        print(f'Hedge Ratio (from OLS): {results.params[1]:.4f}') # This ratio tells us how much of one stock 'matches' the other.
        print(f'ADF Statistic: {adf_test[0]:.4f}')
        print(f'p-value: {adf_test[1]:.4f}') # This p-value is key: lower than 0.05 is usually good!

        if adf_test[1] < 0.05:
            print("Conclusion: The series appear to be cointegrated. This is good for our strategy!")
            return True
        else:
            print("Conclusion: The series are NOT cointegrated. This strategy probably won't work well here.")
            return False

    print("\n--- Running the Cointegration Test ---")
    # Applying the cointegration test on the log prices. L(og prices help normalize the data.)
    is_cointegrated = check_cointegration(np.log(df[stock1_ticker]), np.log(df[stock2_ticker]))

    if not is_cointegrated:
        print(f"Strategy Halted: {stock1_ticker} and {stock2_ticker} don't seem to be suitable partners.")
        return None, None, None, None

    # 4. SPREAD CALCULATION AND Z-SCORE
    # Calculating the spread: the difference between the log prices. This is what we expect to mean-revert.
    df['spread'] = np.log(df[stock1_ticker]) - np.log(df[stock2_ticker])
    # Calculating the rolling mean and standard deviation of the spread.
    # This helps us define the 'normal' range for the spread over time.
    df['rolling_mean_spread'] = df['spread'].rolling(window=rolling_window_spread).mean()
    df['rolling_std_spread'] = df['spread'].rolling(window=rolling_window_spread).std()
    # The Z-score tells us how many standard deviations the current spread is from its rolling mean.
    # High Z-score = unusually wide spread; Low Z-score = unusually narrow spread.
    df['z_score'] = (df['spread'] - df['rolling_mean_spread']) / df['rolling_std_spread']
    df.dropna(inplace=True) # Drop NaNs that appear due to the rolling calculations.
    
    # We calculate the rolling standard deviation of the Z-score itself.
    # This allows our entry/exit/stop-loss levels to adapt to how volatile the Z-score has been.
    df['rolling_std_z_score'] = df['z_score'].rolling(window=rolling_window_zscore_threshold).std()
    df['dynamic_entry_threshold'] = entry_z_score_multiplier * df['rolling_std_z_score']
    df['dynamic_exit_threshold'] = exit_z_score_multiplier * df['rolling_std_z_score']
    df['dynamic_stop_loss_threshold'] = stop_loss_z_score_multiplier * df['rolling_std_z_score']
    df.dropna(inplace=True) # Another round of NaN removal after new rolling calcs.

    print("\nZ-score and dynamic threshold calculations are complete!")
    print(f"Entry threshold adapts based on: {entry_z_score_multiplier} * rolling_std_z_score (More flexible entry!)")
    print(f"Exit threshold adapts based on: {exit_z_score_multiplier} * rolling_std_z_score (Aims to close near the mean!)")
    print(f"Stop loss threshold adapts based on: {stop_loss_z_score_multiplier} * rolling_std_z_score (Our risk control!)")

    # 5. backtesting engine
    capital = initial_capital # This is our capital that changes with P&L.
    positions = 0 # 0: no open trade, 1: long the spread, -1: short the spread.
    portfolio = pd.DataFrame(index=df.index) # DataFrame to record our portfolio value over time.
    portfolio['positions'] = 0
    portfolio['portfolio_value'] = initial_capital
    trades = [] # A list to keep track of every single trade we make.

    # Looping through each day of our data to simulate trading decisions.
    for i in range(len(df)):
        current_z_score = df['z_score'].iloc[i]
        price1 = df[stock1_ticker].iloc[i]
        price2 = df[stock2_ticker].iloc[i]

        # Get the dynamic threshold values for the current day.
        entry_thresh = df['dynamic_entry_threshold'].iloc[i]
        exit_thresh = df['dynamic_exit_threshold'].iloc[i]
        stop_loss_thresh = df['dynamic_stop_loss_threshold'].iloc[i]

        # Entry Logic: If we don't have an open position...
        if positions == 0:
            if current_z_score > entry_thresh:
                # If Z-score is very high, the spread is wide. We expect it to narrow.
                positions = -1 # Go short the spread (short Stock1, long Stock2).
                # Calculate transaction costs for opening the trade.
                entry_cost = (initial_capital / 2) * transaction_cost_pct * 2 # Assuming capital / 2 per leg.
                trades.append({
                    'type': 'SHORT',
                    'entry_date': df.index[i],
                    'entry_price1': price1,
                    'entry_price2': price2,
                    'entry_z_score': current_z_score,
                    'pnl': -entry_cost # Initial P&L is just the cost.
                })
            elif current_z_score < -entry_thresh:
                # If Z-score is very low, the spread is narrow. We expect it to widen.
                positions = 1 # Go long the spread (long Stock1, short Stock2).
                entry_cost = (initial_capital / 2) * transaction_cost_pct * 2
                trades.append({
                    'type': 'LONG',
                    'entry_date': df.index[i],
                    'entry_price1': price1,
                    'entry_price2': price2,
                    'entry_z_score': current_z_score,
                    'pnl': -entry_cost
                })

        # Exit Logic: If we currently have an open position...
        elif positions != 0:
            trade = trades[-1] # Referencing the most recent trade.

            # Mean Reversion Exit: Did the spread revert back enough?
            if (positions == 1 and current_z_score >= -exit_thresh) or \
               (positions == -1 and current_z_score <= exit_thresh):
                
                # Calculating the profit/loss for this completed trade.
                if trade['type'] == 'LONG': # If we were long the spread (long Stock1, short Stock2)
                    pnl_stock1 = (price1 - trade['entry_price1']) / trade['entry_price1']
                    pnl_stock2 = (trade['entry_price2'] - price2) / trade['entry_price2'] # Profit from the short leg.
                    pnl = (pnl_stock1 + pnl_stock2) * (initial_capital / 2) # Total P&L based on allocated capital.
                else: # If we were short the spread (short Stock1, long Stock2)
                    pnl_stock1 = (trade['entry_price1'] - price1) / trade['entry_price1'] # Profit from the short leg.
                    pnl_stock2 = (price2 - trade['entry_price2']) / trade['entry_price2']
                    pnl = (pnl_stock1 + pnl_stock2) * (initial_capital / 2)
                
                exit_cost = (initial_capital / 2) * transaction_cost_pct * 2 # Costs for closing the trade.
                trade['pnl'] += pnl - exit_cost # Update the trade's P&L with exit costs.
                trade['exit_date'] = df.index[i]
                trade['exit_z_score'] = current_z_score
                capital += trade['pnl'] # Update our total capital.
                positions = 0 # Close the position.

            # Stop Loss Logic:  the spread move too far against us, cut losses
            elif (positions == 1 and current_z_score < -stop_loss_thresh) or \
                 (positions == -1 and current_z_score > stop_loss_thresh):
                
                # Calculating P&L for the stop-loss situation
                if trade['type'] == 'LONG':
                    pnl_stock1 = (price1 - trade['entry_price1']) / trade['entry_price1']
                    pnl_stock2 = (trade['entry_price2'] - price2) / trade['entry_price2']
                    pnl = (pnl_stock1 + pnl_stock2) * (initial_capital / 2)
                else: # SHORT
                    pnl_stock1 = (trade['entry_price1'] - price1) / trade['entry_price1']
                    pnl_stock2 = (price2 - trade['entry_price2']) / trade['entry_price2']
                    pnl = (pnl_stock1 + pnl_stock2) * (initial_capital / 2)

                exit_cost = (initial_capital / 2) * transaction_cost_pct * 2
                trade['pnl'] += pnl - exit_cost
                trade['exit_date'] = df.index[i]
                trade['exit_z_score'] = current_z_score
                trade['stop_loss_hit'] = True # Marking this trade as a stop-loss.
                capital += trade['pnl']
                positions = 0 # Close the position.

        # Logging our portfolio status for the current day.
        portfolio.loc[df.index[i], 'positions'] = positions
        portfolio.loc[df.index[i], 'portfolio_value'] = capital

    print("Backtest simulation finished. Time to see the results!")

    # 6. PERFORMANCE METRICS (Analyzing the Backtest Results)
    # Calculate daily returns of our portfolio.
    portfolio['daily_return'] = portfolio['portfolio_value'].pct_change().fillna(0)
    # Total return over the entire period.
    total_return = (portfolio['portfolio_value'].iloc[-1] / initial_capital) - 1
    # Number of days in our backtest period.
    days_in_backtest = (portfolio.index[-1] - portfolio.index[0]).days
    # Annualize the total return for easier comparison.
    annualized_return = (1 + total_return) ** (365.25 / days_in_backtest) - 1 if days_in_backtest > 0 else 0
    # Calculate annualized volatility (how much our portfolio value fluctuated annually).
    annualized_volatility = portfolio['daily_return'].std() * np.sqrt(252) # Assuming 252 trading days in a year.
    # Sharpe Ratio: Measures risk-adjusted return. Higher is better!
    sharpe_ratio = annualized_return / annualized_volatility if annualized_volatility != 0 else 0
    # Maximum Drawdown: The largest peak-to-trough drop in our portfolio value.
    portfolio['drawdown'] = (portfolio['portfolio_value'] / portfolio['portfolio_value'].cummax()) - 1
    max_drawdown = portfolio['drawdown'].min()
    # Filter for only completed trades to calculate stats on them.
    trade_list_completed = [t for t in trades if 'exit_date' in t]
    num_trades = len(trade_list_completed)
    # Win Rate: Percentage of profitable trades.
    win_rate = (sum(1 for t in trade_list_completed if t['pnl'] > 0) / num_trades) if num_trades > 0 else 0
    # Average Holding Period: How long trades were held on average.
    avg_holding_period = np.mean([(t['exit_date'] - t['entry_date']).days for t in trade_list_completed]) if num_trades > 0 else 0

    # Storing all our key performance metrics in a dictionary.
    performance_metrics = {
        "Total Return": total_return,
        "Annualized Return": annualized_return,
        "Sharpe Ratio": sharpe_ratio,
        "Maximum Drawdown": max_drawdown,
        "Number of Trades": num_trades,
        "Hit Ratio (Win Rate)": win_rate,
        "Average Holding Period": avg_holding_period
    }

    print("\n--- Strategy Performance Metrics (The Final Report Card!) ---")
    for metric, value in performance_metrics.items():
        if isinstance(value, (float, np.float64)):
            if 'Return' in metric or 'Drawdown' in metric or 'Win Rate' in metric:
                print(f"{metric}: {value:.2%}") # Format as percentage.
            elif 'Ratio' in metric:
                print(f"{metric}: {value:.2f}") # Format with 2 decimal places.
            elif 'Period' in metric:
                print(f"{metric}: {value:.2f} days") # Add 'days' suffix.
            else:
                print(f"{metric}: {value}")
        else:
            print(f"{metric}: {value}")
    
    return portfolio, performance_metrics, trades, df # Returning everything for analysis and plotting!

# setting up main dashboard using ipywidgets


# Setting some default parameters for the widgets.
initial_capital = 1000000.0
transaction_cost_pct = 0.001
start_date = '2020-01-01'
end_date = '2024-12-31'

# Dropdown menu to pick which stock pair we want to analyze.
stock_pair_options = [
    ('HDFC Bank & ICICI Bank', ('HDFCBANK.NS', 'ICICIBANK.NS')),
    ('TATA POWER & JSW ENERGY', ('TATAPOWER.NS', 'JSWENERGY.NS'))
]
pair_selector = widgets.Dropdown(
    options=stock_pair_options,
    value=('HDFCBANK.NS', 'ICICIBANK.NS'),
    description='Choose Your Stock Pair:',
    disabled=False,
)

# Sliders to let us change the strategy's parameters. 
rolling_window_spread_slider = widgets.IntSlider(
    value=60,
    min=10,
    max=200,
    step=5,
    description='Spread Window Size:', # Adjusts the window for mean/std of the spread.
    continuous_update=False # Updates only when slider is released, not while dragging.
)

rolling_window_zscore_threshold_slider = widgets.IntSlider(
    value=120,
    min=30,
    max=300,
    step=10,
    description='Z-score Threshold Window:', # Affects how dynamic our thresholds are.
    continuous_update=False
)

entry_z_score_multiplier_slider = widgets.FloatSlider(
    value=1.5,
    min=0.5,
    max=3.0,
    step=0.1,
    description='Entry Trigger (Z-score):', # Higher means we need a bigger price divergence to enter.
    continuous_update=False
)

exit_z_score_multiplier_slider = widgets.FloatSlider(
    value=0.2,
    min=0.0,
    max=1.0,
    step=0.05,
    description='Exit Trigger (Z-score):', # Lower means we exit closer to the mean.
    continuous_update=False
)

stop_loss_z_score_multiplier_slider = widgets.FloatSlider(
    value=3.0,
    min=2.0,
    max=5.0,
    step=0.1,
    description='Stop Loss Trigger (Z-score):', # Sets how much Z-score deviation we tolerate before cutting losses.
    continuous_update=False
)

# This is the output area where all the text and plots will show up.
output_area = widgets.Output()

# This function connects all the widgets to our backtesting logic.
# When a slider is moved, this function runs and updates the results.
def interactive_backtest(pair, rw_spread, rw_z_threshold, entry_mult, exit_mult, stop_loss_mult):
    with output_area:
        clear_output(wait=True) # Clears the previous output for a fresh run.
        stock1, stock2 = pair # Getting the selected stock tickers.
        
        # Calling our main strategy function with the current widget values.
        portfolio, metrics, trades, df_data = run_pair_trading_strategy(
            stock1_ticker=stock1,
            stock2_ticker=stock2,
            start_date=start_date,
            end_date=end_date,
            initial_capital=initial_capital,
            transaction_cost_pct=transaction_cost_pct,
            rolling_window_spread=rw_spread,
            rolling_window_zscore_threshold=rw_z_threshold,
            entry_z_score_multiplier=entry_mult,
            exit_z_score_multiplier=exit_mult,
            stop_loss_z_score_multiplier=stop_loss_mult
        )

        # If the strategy couldn't run for some reason.
        if portfolio is None:
            print(f"Couldn't execute the strategy for {stock1}-{stock2} with these parameters. Please check inputs or data.")
            return

        trade_list_completed = [t for t in trades if 'exit_date' in t] # Only want completed trades for plotting.

        print(f"\n Generating Visualizations for {stock1}-{stock2}")

        # Plot 1: Z-score with Trading Signals
        plt.figure(figsize=(15, 7))
        df_data['z_score'].plot(title=f'Z-score with Trading Signals ({stock1} vs {stock2})', label='Z-score')
        
        # Plotting the dynamic entry, exit, and stop-loss thresholds.
        plt.plot(df_data.index, df_data['dynamic_entry_threshold'], color='red', linestyle='--', label='Short Entry Threshold')
        plt.plot(df_data.index, -df_data['dynamic_entry_threshold'], color='green', linestyle='--', label='Long Entry Threshold')
        plt.plot(df_data.index, df_data['dynamic_exit_threshold'], color='black', linestyle='-.', label='Mean Reversion Exit Threshold')
        plt.plot(df_data.index, -df_data['dynamic_exit_threshold'], color='black', linestyle='-.')
        plt.plot(df_data.index, df_data['dynamic_stop_loss_threshold'], color='purple', linestyle=':', label='Stop Loss Threshold')
        plt.plot(df_data.index, -df_data['dynamic_stop_loss_threshold'], color='purple', linestyle=':')

        # Marking where the strategy actually entered and exited trades.
        long_entries = [t['entry_date'] for t in trades if t['type'] == 'LONG']
        short_entries = [t['entry_date'] for t in trades if t['type'] == 'SHORT']
        exits = [t['exit_date'] for t in trade_list_completed]

        # Filtering dates to ensure they exist in our data for accurate plotting.
        long_entries_plot = [date for date in long_entries if date in df_data.index]
        short_entries_plot = [date for date in short_entries if date in df_data.index]
        exits_plot = [date for date in exits if date in df_data.index]

        plt.plot(long_entries_plot, df_data.loc[long_entries_plot]['z_score'], '^', markersize=10, color='green', label='Long Entry')
        plt.plot(short_entries_plot, df_data.loc[short_entries_plot]['z_score'], 'v', markersize=10, color='red', label='Short Entry')
        plt.plot(exits_plot, df_data.loc[exits_plot]['z_score'], 'x', markersize=10, color='k', label='Trade Exit')
        plt.legend()
        plt.show()

        #Plot 2: Strategy Equity Curve 
        plt.figure(figsize=(15, 7))
        portfolio['portfolio_value'].plot(label='Pair Trading Strategy Portfolio Value')
        plt.title(f'Strategy Equity Curve for {stock1}-{stock2} Pair')
        plt.ylabel('Portfolio Value (INR)')
        plt.legend()
        plt.show()

        #Plot 3: Cumulative Returns vs. Buy & Hold 
        plt.figure(figsize=(15, 7))
        portfolio['portfolio_value'].plot(label='Pair Trading Strategy')
        # Comparing my strategy vs just holding an indivdual stock
        (df_data[stock1] / df_data[stock1].iloc[0] * portfolio.loc[df_data.index[0], 'portfolio_value']).plot(label=f'Buy & Hold {stock1} (Benchmark)', alpha=0.7)
        (df_data[stock2] / df_data[stock2].iloc[0] * portfolio.loc[df_data.index[0], 'portfolio_value']).plot(label=f'Buy & Hold {stock2} (Benchmark)', alpha=0.7)
        plt.title(f'Strategy Equity Curve vs. Individual Buy & Hold for {stock1}-{stock2}')
        plt.xlabel('Date')
        plt.ylabel('Portfolio Value')
        plt.legend()
        plt.show()

        #Plot 4: Histogram of Trade Returns 
        plt.figure(figsize=(10, 6))
        trade_pnl = [t['pnl'] for t in trade_list_completed if 'pnl' in t]

        if trade_pnl: # Only generate if we actually have completed trades.
            sns.histplot(trade_pnl, bins=20, kde=True)
            plt.title(f'Histogram of Profit/Loss per Trade for {stock1}-{stock2}')
            plt.xlabel('Profit / Loss (INR)')
            plt.ylabel('Frequency of Trades')
        else:
            plt.text(0.5, 0.5, 'No Completed Trades to Plot', horizontalalignment='center', verticalalignment='center')
            plt.title(f'Histogram of P&L per Trade for {stock1}-{stock2}')
        plt.show()

#Arranging the widgets vertically in a box .
ui = widgets.VBox([
    pair_selector,
    rolling_window_spread_slider,
    rolling_window_zscore_threshold_slider,
    entry_z_score_multiplier_slider,
    exit_z_score_multiplier_slider,
    stop_loss_z_score_multiplier_slider
])

# Linking the widgets to our interactive_backtest function.
# This makes sure the function runs every time a widget's value changes.
interactive_plot = widgets.interactive_output(
    interactive_backtest,
    {
        'pair': pair_selector,
        'rw_spread': rolling_window_spread_slider,
        'rw_z_threshold': rolling_window_zscore_threshold_slider,
        'entry_mult': entry_z_score_multiplier_slider,
        'exit_mult': exit_z_score_multiplier_slider,
        'stop_loss_mult': stop_loss_z_score_multiplier_slider
    }
)

# Displaying the widget controls and the output area below them.
display(ui, output_area)


VBox(children=(Dropdown(description='Choose Your Stock Pair:', options=(('HDFC Bank & ICICI Bank', ('HDFCBANK.…

Output()