In [1]:
from data_processor import DataProcessor

import pandas as pd

from utils import *

In [2]:
# Loading the training data (2017 to 2022)
DATA = pd.read_csv('./data/closing_prices.csv').set_index('Date')

In [3]:
# Get the list of stock tickers
TICKERS = DATA.columns

# Split training data (2017 to 2022) from the corpus

TRAINING_DATA = DATA[:'2023-01-01']
TESTING_DATA = DATA['2023-01-01':]

# Store all the cointegrated pairs and their regression models in this list
COINTEGRATED_PAIR_MODELS = []

for ticker1_index in range(len(TICKERS) - 1):
    for ticker2_index in range(ticker1_index + 1, len(TICKERS)):
        try:
            ticker1 = TICKERS[ticker1_index]
            ticker2 = TICKERS[ticker2_index]

            ticker1_data = TRAINING_DATA[ticker1]
            ticker2_data = TRAINING_DATA[ticker2]

            # Retrieve model 
            model = get_regression_model(ticker1_data, ticker2_data)

            # Get residuals to test for stationarity
            residuals = model.resid

            # Gauge cointegration by performing the AdFuller test with a confidence level of 99%
            if passes_adfuller_test(residuals, alpha=0.01):
                # If test passes then add the pair to the list
                print (f'Pair ({ticker1}, {ticker2}) is cointegrated (99% confidence)')
                COINTEGRATED_PAIR_MODELS.append({
                    'pair': f"{ticker1}:{ticker2}",
                    'ticker1': ticker1,
                    'ticker2': ticker2,
                    'model': model
                })
        
        except Exception as err:
            print (err)


Pair (ADANIPORTS, APOLLOHOSP) is cointegrated (99% confidence)
Pair (ADANIPORTS, BAJAJ-AUTO) is cointegrated (99% confidence)
Pair (ADANIPORTS, CIPLA) is cointegrated (99% confidence)
Pair (ADANIPORTS, JSWSTEEL) is cointegrated (99% confidence)
Pair (ADANIPORTS, ULTRACEMCO) is cointegrated (99% confidence)
Pair (APOLLOHOSP, ASIANPAINT) is cointegrated (99% confidence)
Pair (APOLLOHOSP, BAJAJFINSV) is cointegrated (99% confidence)
Pair (APOLLOHOSP, HCLTECH) is cointegrated (99% confidence)
Pair (APOLLOHOSP, HDFCBANK) is cointegrated (99% confidence)
Pair (APOLLOHOSP, INFY) is cointegrated (99% confidence)
Pair (APOLLOHOSP, TITAN) is cointegrated (99% confidence)
Pair (APOLLOHOSP, ULTRACEMCO) is cointegrated (99% confidence)
Pair (ASIANPAINT, BAJFINANCE) is cointegrated (99% confidence)
Pair (ASIANPAINT, HCLTECH) is cointegrated (99% confidence)
Pair (ASIANPAINT, HDFCBANK) is cointegrated (99% confidence)
Pair (ASIANPAINT, INFY) is cointegrated (99% confidence)
Pair (ASIANPAINT, KOTAKBAN

In [4]:
PAIRS_TO_BACKTEST = [
    'ADANIPORTS:JSWSTEEL',
    'APOLLOHOSP:HDFCBANK',
    'ASIANPAINT:BAJFINANCE',
    'BAJFINANCE:KOTAKBANK',
    'EICHERMOT:MARUTI',
    'GRASIM:POWERGRID',
    'HCLTECH:INFY',
    'INFY:TCS',
    'ICICIBANK:TITAN',
    'POWERGRID:SUNPHARMA'
]

In [5]:
BACKTESTING_DATA = pd.DataFrame()

for coint_pair_dict in COINTEGRATED_PAIR_MODELS:
    if coint_pair_dict['pair'] not in PAIRS_TO_BACKTEST:
        continue

    ticker1 = coint_pair_dict['ticker1']
    ticker2 = coint_pair_dict['ticker2']
    model = coint_pair_dict['model']
    
    closing_prices = TESTING_DATA[[ticker1, ticker2]]
                
    data = DataProcessor.get_processed_data(
        pair_tickers=[ticker1, ticker2],
        pair_closing_prices=closing_prices,
        pair_regression_model=model
    )

    if len(BACKTESTING_DATA) == 0:
        BACKTESTING_DATA = data
        
    else:
        if ticker1 in BACKTESTING_DATA.columns: data = data.drop(columns=[ticker1])
        if ticker2 in BACKTESTING_DATA.columns: data = data.drop(columns=[ticker2])    

        BACKTESTING_DATA = pd.merge(
            left=BACKTESTING_DATA,
            left_index=True,
            right=data.drop(columns=['date']),
            right_index=True
        )

In [6]:
BACKTESTING_DATA['date'] = pd.to_datetime(BACKTESTING_DATA['date'])

In [7]:
BACKTESTING_DATA.to_csv('./data/backtesting_data.csv', index=False)