In [1]:
from helper_functions import stock_list, stock_prices



In [2]:
import time
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

tic = time.time()

testsmpl=126
interval = 252*3

interval = 521

In [3]:
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.api import VAR
from statsmodels.tsa.vector_ar.vecm import VECM, select_coint_rank, select_order

def get_cointegration_params(df, verbose=False):
    lag_order = select_order(df, maxlags=10, deterministic="ci")
    lag_order = lag_order.aic

    rank_test = select_coint_rank(df, 0, lag_order, method="trace",
                              signif=0.05)

    is_cointegrated = rank_test.test_stats[0] > rank_test.crit_vals[0]
    if verbose:
        print(rank_test.summary())
    if not is_cointegrated:
        return False, np.NaN, np.NAN
    
    model = VECM(df, deterministic="ci",
             k_ar_diff=lag_order,
             coint_rank=rank_test.rank)
    vecm_res = model.fit()

    return True, vecm_res.beta, vecm_res.const_coint

In [4]:
def groom(s):
    s = s.replace('ي', 'ی')
    s = s.replace('ك', 'ک')
    return s

In [5]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [6]:
import shutil

PATH = r'./plots/'
if os.path.exists(PATH):
    shutil.rmtree(PATH)
os.makedirs(PATH)

pairs = []

for indice in ['Dow Jones', 'CAC 40', 'Dax', 'Teh50']:
    print(indice, sep=' ', end='', flush=True)
    PATH = rf'./plots/{indice}/'
    if not os.path.exists(PATH):
        os.makedirs(PATH)

    tickers = stock_list.get_stock_list(index=indice)
    symbolsnum = len(tickers)

    isTSE = (indice == 'Teh50')
    if isTSE:
        tickers = [groom(x) for x in tickers]

    data_historical = stock_prices.get_prices(tickers, is_tse=isTSE)
    data_historical = data_historical.dropna(how='all')
    data = data_historical[-interval:]

    limitPer = len(data) * .85
    data = data.dropna(thresh=limitPer, axis=1)

    data = np.log(data)

    data_train = data[:-testsmpl]
    data_test = data[-testsmpl:]

    cols = data_train.columns
    for i in range(len(cols)-1):
        for j in range(i+1, len(cols)):
            df_train = data_train[[cols[i], cols[j]]].copy()
            df_test = data_test[[cols[i], cols[j]]].copy()
            try:
                is_cointegrated, BJ2n, C0J2n = get_cointegration_params(df_train.dropna(how='any'))
            except:
                continue
            if not is_cointegrated:
                continue
            pairs.append({
                'sym1': cols[i],
                'sym2': cols[j],
                'indice': indice
            })

            cointRinsmpl = np.matmul(df_train, BJ2n) + C0J2n
            cointRtest = np.matmul(df_test, BJ2n) + C0J2n

            scointR = np.std(cointRinsmpl)[0]
            mcointR = np.mean(cointRinsmpl)[0]

            cointR = cointRinsmpl.append(cointRtest)
            longs = cointR<=mcointR-2*scointR
            shorts=cointR>=mcointR+2*scointR; 
            exitLongs=cointR>=mcointR-1*scointR; 
            exitShorts=cointR<=mcointR+1*scointR; 

            positionsL = np.zeros((cointR.shape[0], 2))
            positionsS = np.zeros((cointR.shape[0], 2))

            positionsL = pd.DataFrame(positionsL)
            positionsS = pd.DataFrame(positionsS)


            positionsL.iloc[positionsL[longs.values].index, 0] = 1
            positionsL.iloc[positionsL[longs.values].index, 1] = -1
            positionsL.iloc[positionsL[exitLongs.values].index, 0] = 0
            positionsL.iloc[positionsL[exitLongs.values].index, 1] = 0

            positionsS.iloc[positionsS[shorts.values].index, 0] = -1
            positionsS.iloc[positionsS[shorts.values].index, 1] = 1
            positionsS.iloc[positionsS[exitShorts.values].index, 0] = 0
            positionsS.iloc[positionsS[exitShorts.values].index, 1] = 0

            positions = positionsL + positionsS

            yret = np.log(df_train.append(df_test)).diff()
            yret = yret[1:]

            pnl=(
            positions[0:-1][0] * yret[yret.columns[0]].values 
            - BJ2n[1][0]*positions[0:-1][1]*yret[yret.columns[1]].values
            )

            rsuminsmpl = np.cumsum(pnl[:-df_test.shape[0]])
            rsumtest = np.cumsum(pnl[-df_test.shape[0]:])

            ShrpRatinsmpl = np.sqrt(252)*np.mean(pnl[:-df_test.shape[0]])/np.std(pnl[:-df_test.shape[0]])
            ShrpRatiTest = np.sqrt(252)*np.mean(pnl[-df_test.shape[0]:])/np.std(pnl[-df_test.shape[0]:])

            
            ticker1, ticker2 = df_train.columns
            fig, axs = plt.subplots(2, 2, figsize=(20, 10))
            axs[0, 0].plot(df_train[ticker1])
            axs[0, 0].plot(df_test[ticker1])
            axs[0, 0].plot(df_train[ticker2])
            axs[0, 0].plot(df_test[ticker2])
            axs[0, 0].set_title(f'Pair Prices for {ticker1} and {ticker2}')
            axs[0, 0].tick_params(axis='x', rotation=15)

            axs[0, 1].plot(cointR[:df_train.shape[0]])
            axs[0, 1].plot(cointR[-df_test.shape[0]:])
            axs[0, 1].set_title(f'Cointegrating Relations for {ticker1} and {ticker2}')
            axs[0, 1].plot(cointR.index, [mcointR - 2*scointR]*cointR.shape[0])
            axs[0, 1].plot(cointR.index, [mcointR + 2*scointR]*cointR.shape[0])
            axs[0, 1].tick_params(axis='x', rotation=15)

            axs[1, 0].plot(df_test.index, rsumtest)
            axs[1, 0].set_title(f'Out of Sample Cumulative Return for Pair {ticker1} and {ticker2}')

            axs[1, 1].plot(df_train.index[1:], rsuminsmpl)
            axs[1, 1].set_title(f'In Sample Cumulative Return for Pair {ticker1} and {ticker2}');
            axs[1, 1].tick_params(axis='x', rotation=15);

            fig.subplots_adjust(hspace=.3);

            fig.savefig(rf'./plots/{indice}/cointr_{ticker1}_{ticker2}');
            plt.close()

import datetime
filename = rf'./pairs_{str(datetime.datetime.now().date())}.xlsx'
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
df_pairs = pd.DataFrame(pairs)
for index, group_df in df_pairs.groupby("indice"):   
    group_df.to_excel(writer, sheet_name=str(index),index=False)
writer.save()

[*********************100%***********************]  30 of 30 completed
[*********************100%***********************]  40 of 40 completed

1 Failed download:
- OCBI: No data found, symbol may be delisted
[*********************100%***********************]  40 of 40 completed

1 Failed download:
- AZSEY: No data found, symbol may be delisted
Teh50

In [7]:
df_pairs

Unnamed: 0,sym1,sym2,indice
0,CRM,JPM,Dow Jones
1,CRM,NKE,Dow Jones
2,CRM,AMGN,Dow Jones
3,CRM,DOW,Dow Jones
4,CRM,GS-PK,Dow Jones
...,...,...,...
541,ومعادن,کچاد,Teh50
542,ونیکی,وپاسار,Teh50
543,وپارس,وپاسار,Teh50
544,پارسان,کگل,Teh50
