In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import backtest as bt

import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from itertools import combinations

In [2]:
tickers = ['ABBV', 'CB']
timeframe = '5y'
start_cash = 10_000
window = 200

In [3]:
df=yf.download(tickers, period=timeframe)


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  2 of 2 completed


In [4]:
def log_return(df):
    log_df = np.log(df['Close']/df['Close'].shift(1))
    log_df.columns = pd.MultiIndex.from_product([['Log Return'], log_df.columns])
    return log_df
log_return(df)

Unnamed: 0_level_0,Log Return,Log Return
Ticker,ABBV,CB
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2020-08-12,,
2020-08-13,-0.008728,0.007037
2020-08-14,0.004111,-0.008112
2020-08-17,0.013374,-0.033600
2020-08-18,-0.007292,-0.006058
...,...,...
2025-08-06,-0.011397,0.007258
2025-08-07,0.012957,-0.001188
2025-08-08,-0.004082,0.007141
2025-08-11,0.002975,-0.000885


In [5]:
def lin_reg(df,tickers,window):
    slope_col = [np.nan] * window
    intercept_col = [np.nan] * window
    for i in range(window, len(df)):
        data = df.iloc[i-window:i]
        stock1 = data[tickers[0]].values
        stock2 = data[tickers[1]].values
        stock1 = sm.add_constant(stock1)
        model = sm.OLS(stock2,stock1).fit()
        slope_col.append(model.params[1])
        intercept_col.append(model.params[0])
    return pd.DataFrame({'Slope': slope_col, 'Intercept': intercept_col}, index=df.index)

In [6]:
def calc_spread(data,tickers,window):
    df=data['Close']
    df[['Slope','Intercept']] = lin_reg(df,tickers,window)[['Slope','Intercept']]
    spread = df[tickers[1]] - (df['Intercept'] + df['Slope']*df[tickers[0]])
    spread_mean = spread.rolling(window).mean()
    spread_std = spread.rolling(window).std()
    z_score = (spread - spread_mean)/spread_std
    return pd.DataFrame({'Spread': spread, 'Spread Mean': spread_mean, 'Spread Std': spread_std, 'Z-Score': z_score})

In [7]:
def close(df):
    return df[['Close']]

In [8]:
df.iloc[395:405]

class Pairs(bt.Strategy):
    def __init__(self, data, cash):
        super().__init__(data, cash)
        self.indicator(close, data)
        self.indicator(calc_spread, data, tickers, window)
    
    def run(self, date_index, row):
        super().run(date_index,row)
        if abs(row['Z-Score']) >= 2:
            self.buy('CB', buy_price=self.cash*(-0.1)*row['Z-Score'])
            self.buy('ABBV', buy_price=self.cash*0.1*row['Z-Score'])
        else:
            self.sell('CB')
            self.sell('ABBV')

# Multiplier for divergence from Z-score


In [9]:
test = Pairs(data=df, cash=10000)
testrun = bt.Backtest(df, test)
testrun.run_backtest(monte_carlo_iterations=100, mc_replacement=False)
testrun.visualize()