In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from helper_functions import stock_prices, stock_list



In [2]:
testsmpl=126
interval = 252*3

interval = 521

In [3]:
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.api import VAR
from statsmodels.tsa.vector_ar.vecm import VECM, select_coint_rank, select_order

def get_cointegration_params(df, verbose=False):
    lag_order = select_order(df, maxlags=10, deterministic="ci")
    lag_order = lag_order.aic

    rank_test = select_coint_rank(df, 0, lag_order, method="trace",
                              signif=0.05)

    is_cointegrated = rank_test.test_stats[0] > rank_test.crit_vals[0]
    if verbose:
        print(rank_test.summary())
    if not is_cointegrated:
        return False, np.NaN, np.NAN
    
    model = VECM(df, deterministic="ci",
             k_ar_diff=lag_order,
             coint_rank=rank_test.rank)
    vecm_res = model.fit()

    return True, lag_order, rank_test.rank

In [4]:
df = pd.DataFrame()
file = pd.ExcelFile('../01_pair_trading/pairs_2023-01-08.xlsx')
sheet_names = ['Dow Jones', 'CAC 40', 'Dax', 'Teh50']
for sheet in sheet_names:
    df_tmp = pd.read_excel(file, sheet_name=sheet)
    df = df.append(df_tmp)
file.close()

In [5]:
from sklearn.metrics import mean_absolute_percentage_error
def get_mape(df, ticker, pred_tag, test_count=126):
    df = df.dropna(how='any')
    test_true = df.iloc[-test_count:][ticker]
    test_pred = df.iloc[-test_count:][f'{ticker}_{pred_tag}']
    mapel = mean_absolute_percentage_error(
        test_true, test_pred
    ) 
    return mapel

In [6]:
def groom(s):
    s = s.replace('ي', 'ی')
    s = s.replace('ك', 'ک')
    return s

In [9]:
import itertools
import os
PATH = r'./preds_vecm/'
if not os.path.exists(PATH):
    os.makedirs(PATH)
    
errors = []

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')


for indice in ['Dow Jones', 'CAC 40', 'Dax']:
    print(indice, '>>', flush=True)
    df1 = df[df['indice']==indice]
    tickers = stock_list.get_stock_list(index=indice)
    isTSE = (indice == 'Teh50')
    if isTSE:
        tickers = [groom(x) for x in tickers]
    data_historical = stock_prices.get_prices(tickers, isTSE)


    for i in range(df1.shape[0]):
        ticker1, ticker2, indice = df1.iloc[i]

        data_historical1 = data_historical[[ticker1, ticker2]]

        data_historical1 = data_historical1.dropna(how='all')
        data = data_historical1[-interval:]

        limitPer = len(data) * .85
        data = data.dropna(thresh=limitPer, axis=1)

        data = np.log(data)

        data = data.dropna(how='any')

        data_train = data[:-testsmpl]
        data_test = data[-testsmpl:]

        df_train = data_train.copy()

        # 1Step
        df_train = data_train.copy()
        is_cointegrated, lag_order, rank = get_cointegration_params(df_train)
        if not is_cointegrated:
            continue

        df_predictions = pd.DataFrame()
        for d in range(testsmpl):
            model = VECM(df_train, deterministic="ci",
                    k_ar_diff=lag_order,
                    coint_rank=rank)

            vecm_res = model.fit()
            pred = vecm_res.predict(steps=1)
            data.loc[data_test.iloc[d].name, f'{ticker1}_1step'] = pred[0][0]
            data.loc[data_test.iloc[d].name, f'{ticker2}_1step'] = pred[0][1]
            df_train = df_train.append(data_test.iloc[d])
        
        # Multistep
        df_train = data_train.copy()
        is_cointegrated, lag_order, rank = get_cointegration_params(df_train)
        if not is_cointegrated:
            continue
        
        model = VECM(df_train, deterministic="ci",
                    k_ar_diff=lag_order,
                    coint_rank=rank)
        vecm_res = model.fit()
        preds = vecm_res.predict(steps=testsmpl)
        for i, pred in enumerate(preds):
            data.loc[data_test.iloc[i].name, f'{ticker1}_multi'] = pred[0]
            data.loc[data_test.iloc[i].name, f'{ticker2}_multi'] = pred[1]

        # Plotting
        ax = data.plot(figsize=(15, 8));
        ax.figure.savefig(rf'./preds_vecm/{ticker1}_{ticker2}.png');
        plt.close()

        # mape
        for ticker, tag in list(itertools.product([ticker1, ticker2], ['1step', 'multi'])):
            mape=get_mape(data, ticker=ticker, pred_tag=tag, test_count=testsmpl)
            errors.append({
                'tag': f'vecm_{tag}',
                'ticker': ticker,
                'pair': ticker2 if ticker==ticker1 else ticker1,
                'mape': mape*100,
                'indice': indice
            })

filename = rf'./vecm_mape.xlsx'
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
df_errors = pd.DataFrame(errors)
for index, group_df in df_errors.groupby("indice"):   
    group_df.to_excel(writer, sheet_name=str(index),index=False)
writer.save()

Dow Jones >>
[*********************100%***********************]  30 of 30 completed
CAC 40 >>
[*********************100%***********************]  40 of 40 completed

1 Failed download:
- OCBI: No data found, symbol may be delisted
Dax >>
[*********************100%***********************]  40 of 40 completed

1 Failed download:
- AZSEY: No data found, symbol may be delisted
