# 1 Import packages and define useful functions

In [12]:
import pandas as pd
import numpy as np
from scipy import signal
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [4]:
def get_shortest_array(a,b):
    '''
    input - a,b arrays (pandas.values) arrays
    output - returns length of shortest array using first index of np.shape
    '''
    
    len_a = np.shape(a)[0]
    len_b = np.shape(b)[0]
    
    return min(len_a,len_b)

In [5]:
def plot_cor(x, y, window, filename, x_name, y_name):
    '''
    x (df) - first time series
    y (df) - second time series
    window (int) - period length
    filename - output filename as .png
    x_name - first time series name (for title)
    y_name - second time series name (for title)
    Note that x and y don't have to be the same length. the 'get_shortest_array' function will be used to automatically trim the time series
    '''
    shortest_time_series_len = get_shortest_array(x,y)
    plt.plot(x.iloc[:shortest_time_series_len].rolling(window).corr(y.iloc[:shortest_time_series_len]).fillna(method='bfill'));
    
    # rotate x-axis labels so date labels all fit in properly
    plt.xticks(rotation=90);
    plt.title('20-day rolling correlation between '+str(x_name)+ ' and '+str(y_name))
    plt.savefig(str(filename),dpi=600)


# 2 Import data: HK sector indices and Hang Seng

In [6]:
HK50 = pd.read_csv('data/HS/HK50.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCC = pd.read_csv('data/HS/HSCC.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCE = pd.read_csv('data/HS/HSCE.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCIC = pd.read_csv('data/HS/HSCIC.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCICD = pd.read_csv('data/HS/HSCICD.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCICS = pd.read_csv('data/HS/HSCICS.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCIE = pd.read_csv('data/HS/HSCIE.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCIF = pd.read_csv('data/HS/HSCIF.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCIH = pd.read_csv('data/HS/HSCIH.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCIIG = pd.read_csv('data/HS/HSCIIG.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCIIT = pd.read_csv('data/HS/HSCIIT.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCIM = pd.read_csv('data/HS/HSCIM.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCIPC = pd.read_csv('data/HS/HSCIPC.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCIT = pd.read_csv('data/HS/HSCIT.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSCIU = pd.read_csv('data/HS/HSCIU.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')

Preprocess this data. Create a dataframe containing HSI, first put the above into a dictionary for easy iteration

In [7]:
HSI_dict = {"HSCC": HSCC,
            "HSCE": HSCE,
            "HSCIC": HSCIC,
            "HSCICD": HSCICD,
            "HSCICS": HSCICS,
            "HSCIE": HSCIE,
            "HSCIE": HSCIE,
            "HSCIF": HSCIF,
            "HSCIH": HSCIH,
            "HSCIIG": HSCIIG,
            "HSCIIT": HSCIIT,
            "HSCIM": HSCIM,
            "HSCIPC": HSCIPC,
            "HSCIT": HSCIT,
            "HSCIU": HSCIU}

HSI_df = pd.DataFrame()

for ind in HSI_dict.keys():
    HSI_df = pd.concat([HSI_df, HSI_dict[ind]['Price']], axis=1)
    
HSI_df.columns = HSI_dict.keys()

Calculate log returns. Convert dataframe index to datetime format (otherwise sklearn can run into issues).

In [8]:
HSI_returns_df =  np.log(HSI_df.dropna()) - np.log(HSI_df.dropna().shift(-1))
HK50['Return'] = np.log(HK50['Price']) - np.log(HK50['Price'].shift(-1))
HK50 = HK50.dropna()

HSI_returns_df.index = pd.to_datetime(HSI_returns_df.index)

# 3 Descriptive statistics for Hong Sector indices

In [9]:
HK50_mean = str(round(HK50['Return'].mean()*100.,5))+'%'
HK50_sdev = str(round(HK50['Return'].std()*100.,5))+'%'
HK50_count = HK50['Return'].size

# print
print("The mean, standard dev and count of HK50 returns are: "+str(HK50_mean)+' '+str(HK50_sdev)+' '+str(HK50_count))

The mean, standard dev and count of HK50 returns are: 0.00273% 1.27972% 2829


In [10]:
# to import into libreoffice calc, ctrl-shift-v then paste unformatted. Convert text-to-columns. This then goes into the report. 

for elem in HSI_returns_df.keys():
    mean = str(round(HSI_returns_df[elem].mean()*100.,5))+'%'
    sdev = str(round(HSI_returns_df[elem].std()*100.,5))+'%'
    count = HSI_returns_df[elem].size
    print("The mean, standard dev and count of "+str(elem)+" returns are: "+str(mean)+' '+str(sdev)+' '+str(count))

The mean, standard dev and count of HSCC returns are: -0.00867% 1.51073% 831
The mean, standard dev and count of HSCE returns are: -0.04452% 1.75764% 831
The mean, standard dev and count of HSCIC returns are: -0.03606% 1.46476% 831
The mean, standard dev and count of HSCICD returns are: 0.01509% 2.05191% 831
The mean, standard dev and count of HSCICS returns are: -0.00733% 1.56298% 831
The mean, standard dev and count of HSCIE returns are: 0.0057% 1.93187% 831
The mean, standard dev and count of HSCIF returns are: -0.01107% 1.38513% 831
The mean, standard dev and count of HSCIH returns are: -0.00482% 2.57401% 831
The mean, standard dev and count of HSCIIG returns are: -0.00198% 1.8395% 831
The mean, standard dev and count of HSCIIT returns are: 0.01012% 2.73956% 831
The mean, standard dev and count of HSCIM returns are: 0.05045% 2.34094% 831
The mean, standard dev and count of HSCIPC returns are: -0.05771% 1.71762% 831
The mean, standard dev and count of HSCIT returns are: -0.02795% 1.

## 3.1 HK50 vs sector index correlation 

In [11]:
HSI_returns_df.keys()

for elem in HSI_returns_df.keys():
    filename = "HK50_"+str(elem)+"_20corr.png"
    x_name = "HK50"
    y_name = str(elem)
    plot_cor(HK50['Return'], HSI_returns_df[elem], 20, filename, x_name, y_name)
    plt.close()


# 4 Linear regression

Inspect the data

In [31]:
HSI_returns_df.dropna()

Unnamed: 0,HSCC,HSCE,HSCIC,HSCICD,HSCICS,HSCIE,HSCIF,HSCIH,HSCIIG,HSCIIT,HSCIM,HSCIPC,HSCIT,HSCIU
2023-02-08,0.010649,-0.005958,0.014369,0.000062,-0.000140,0.007876,0.008265,-0.001266,0.001760,-0.014428,0.002442,0.006548,-0.003446,0.011128
2023-02-07,0.003958,0.005947,-0.003749,0.000337,-0.002919,0.012551,0.002230,0.007524,-0.001950,0.009034,0.007440,0.001699,-0.003116,0.001278
2023-02-06,-0.013832,-0.027121,-0.012363,-0.030311,-0.016533,-0.009576,-0.008704,-0.048956,-0.033097,-0.030508,-0.039940,-0.025736,-0.004875,-0.008257
2023-02-03,-0.013061,-0.015950,-0.017748,-0.010060,-0.013519,-0.023865,-0.016143,-0.015145,-0.013414,-0.009747,-0.021555,-0.008346,-0.000883,-0.007083
2023-02-02,-0.009237,-0.007249,-0.011150,-0.006160,0.001774,-0.002007,-0.006936,0.015662,-0.007124,-0.005774,-0.009322,-0.008289,0.000835,0.000679
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-09-17,-0.010373,-0.011915,-0.004734,-0.016643,-0.011244,-0.005095,-0.015320,-0.008885,-0.012190,-0.010805,-0.023708,-0.010862,-0.013217,-0.002892
2019-09-16,0.005239,-0.005583,-0.013457,-0.014020,-0.004171,0.036987,-0.010595,-0.005186,-0.004240,-0.008641,0.000300,-0.011050,-0.005887,-0.004659
2019-09-13,0.007474,0.008964,0.001008,0.019108,0.008152,0.006495,0.011142,0.007242,0.013390,0.004335,0.017621,0.013484,0.003505,0.008194
2019-09-12,0.000133,0.002586,-0.000803,0.007008,0.009571,-0.017801,-0.004690,0.010091,0.008404,0.011164,0.002103,0.000022,0.002105,0.004331


Linear regression. X - sector index. Y - HK50. No lag.

In [32]:
# old method using ffill which introduces look-ahead bias. 

sector_models_OLS_coinc = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem]["2019-09-10":"2023-02-08"].fillna(method='ffill')
    y = HK50['Return']["2019-09-10":"2023-02-08"].fillna(method='ffill')
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_coinc[elem] = sm.OLS(y, X).fit() 

In [41]:
# new method using dropna to avoid look-ahead bias

sector_models_OLS_coinc = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem]["2019-09-10":"2023-02-08"].dropna()
    y = HK50['Return']["2019-09-10":"2023-02-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_coinc[elem] = sm.OLS(y, X).fit() 

## 4.1 Sector indices are assumed to lead the Hang Seng index

In [None]:
# leading indicator variables and date ranges
#HSI_returns_df.shift(5).dropna() # 5 days. Dates ["2019-09-10":"2023-02-01"]
#HSI_returns_df.shift(20).dropna() # 20 days. Dates ["2019-09-10":"2023-01-06"]
#HSI_returns_df.shift(60).dropna() # 60 days. Dates ["2019-09-10":"2022-11-08"]

In [79]:
# sector indices leads the Hang Seng by 5 days. 

sector_models_OLS_X5R = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(5)["2019-09-10":"2023-02-01"].dropna()
    y = HK50['Return']["2019-09-10":"2023-02-01"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X5R[elem] = sm.OLS(y, X).fit() 

In [80]:
# sector indices leads the Hang Seng by 20 days. 

sector_models_OLS_X20R = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(20)["2019-09-10":"2023-01-06"].dropna()
    y = HK50['Return']["2019-09-10":"2023-01-06"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X20R[elem] = sm.OLS(y, X).fit() 

In [81]:
# sector indices leads the Hang Seng by 60 days. 

sector_models_OLS_X60R = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(60)["2019-09-10":"2022-11-08"].dropna()
    y = HK50['Return']["2019-09-10":"2022-11-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X60R[elem] = sm.OLS(y, X).fit() 

## 4.2 Sector indices are assumed to lag the Hang Seng index

In [34]:
# lagging indicator variables and date ranges
#HSI_returns_df.shift(-5).dropna() # 5 days. Dates ["2019-09-18":"2023-02-08"]
#HSI_returns_df.shift(-20).dropna() # 20 days. Dates ["2019-10-11":"2023-02-08"]
#HSI_returns_df.shift(-60).dropna() # 60 days. Dates ["2019-12-06":"2023-02-08"]

In [82]:
# sector indices lags the Hang Seng by 5 days. 

sector_models_OLS_X5F = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(-5)["2019-09-18":"2023-02-08"].dropna()
    y = HK50['Return']["2019-09-18":"2023-02-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X5F[elem] = sm.OLS(y, X).fit() 

In [83]:
# sector indices lags the Hang Seng by 20 days. 

sector_models_OLS_X20F = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(-20)["2019-10-11":"2023-02-08"].dropna()
    y = HK50['Return']["2019-10-11":"2023-02-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X20F[elem] = sm.OLS(y, X).fit() 

In [84]:
# sector indices lags the Hang Seng by 60 days. 

sector_models_OLS_X60F = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(-60)["2019-12-06":"2023-02-08"].dropna()
    y = HK50['Return']["2019-12-06":"2023-02-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X60F[elem] = sm.OLS(y, X).fit() 

# 5 Results

In [85]:
for elem in sector_models_OLS_coinc.keys():
    print('Exo variable: '+str(elem)+'. AIC: '+str(sector_models_OLS_coinc[elem].aic))    

Exo variable: HSCC. AIC: -5501.85862729232
Exo variable: HSCE. AIC: -6985.486560787861
Exo variable: HSCIC. AIC: -5086.995422214389
Exo variable: HSCICD. AIC: -5796.449748781751
Exo variable: HSCICS. AIC: -5333.82092524561
Exo variable: HSCIE. AIC: -4891.309350743385
Exo variable: HSCIF. AIC: -5712.907784415331
Exo variable: HSCIH. AIC: -5193.568316418795
Exo variable: HSCIIG. AIC: -5405.134202123646
Exo variable: HSCIIT. AIC: -5630.142894615218
Exo variable: HSCIM. AIC: -5040.984149832473
Exo variable: HSCIPC. AIC: -5341.760008663215
Exo variable: HSCIT. AIC: -4856.581792373936
Exo variable: HSCIU. AIC: -5002.735463603107


In [86]:
for elem in sector_models_OLS_X5R.keys():
    print('Exo variable (index leads +5): '+str(elem)+'. AIC: '+str(sector_models_OLS_X5R[elem].aic))    

Exo variable (index leads +5): HSCC. AIC: -4525.042754115662
Exo variable (index leads +5): HSCE. AIC: -4523.556725260432
Exo variable (index leads +5): HSCIC. AIC: -4526.842629981678
Exo variable (index leads +5): HSCICD. AIC: -4523.959093033912
Exo variable (index leads +5): HSCICS. AIC: -4523.250641438294
Exo variable (index leads +5): HSCIE. AIC: -4524.431630594039
Exo variable (index leads +5): HSCIF. AIC: -4523.817050604147
Exo variable (index leads +5): HSCIH. AIC: -4525.764979127087
Exo variable (index leads +5): HSCIIG. AIC: -4523.454412125082
Exo variable (index leads +5): HSCIIT. AIC: -4523.216293978901
Exo variable (index leads +5): HSCIM. AIC: -4526.071541701122
Exo variable (index leads +5): HSCIPC. AIC: -4524.3262489942945
Exo variable (index leads +5): HSCIT. AIC: -4524.329919552522
Exo variable (index leads +5): HSCIU. AIC: -4527.98844808069


In [87]:
for elem in sector_models_OLS_X20R.keys():
    print('Exo variable (index leads +20): '+str(elem)+'. AIC: '+str(sector_models_OLS_X20R[elem].aic))    

Exo variable (index leads +20): HSCC. AIC: -4436.446092136544
Exo variable (index leads +20): HSCE. AIC: -4436.417662473644
Exo variable (index leads +20): HSCIC. AIC: -4436.440838487722
Exo variable (index leads +20): HSCICD. AIC: -4436.172862907842
Exo variable (index leads +20): HSCICS. AIC: -4436.996058355863
Exo variable (index leads +20): HSCIE. AIC: -4436.213510272333
Exo variable (index leads +20): HSCIF. AIC: -4436.124406574287
Exo variable (index leads +20): HSCIH. AIC: -4436.899780622229
Exo variable (index leads +20): HSCIIG. AIC: -4436.152311590283
Exo variable (index leads +20): HSCIIT. AIC: -4436.151587062181
Exo variable (index leads +20): HSCIM. AIC: -4436.295473833491
Exo variable (index leads +20): HSCIPC. AIC: -4438.16312731162
Exo variable (index leads +20): HSCIT. AIC: -4436.1420257578875
Exo variable (index leads +20): HSCIU. AIC: -4436.544287177024


In [88]:
for elem in sector_models_OLS_X60R.keys():
    print('Exo variable (index leads +60): '+str(elem)+'. AIC: '+str(sector_models_OLS_X60R[elem].aic))    

Exo variable (index leads +60): HSCC. AIC: -4261.266732640457
Exo variable (index leads +60): HSCE. AIC: -4261.219750645516
Exo variable (index leads +60): HSCIC. AIC: -4261.766863587083
Exo variable (index leads +60): HSCICD. AIC: -4261.2980623043495
Exo variable (index leads +60): HSCICS. AIC: -4261.21993872725
Exo variable (index leads +60): HSCIE. AIC: -4267.845832058545
Exo variable (index leads +60): HSCIF. AIC: -4261.507530252111
Exo variable (index leads +60): HSCIH. AIC: -4261.106585596372
Exo variable (index leads +60): HSCIIG. AIC: -4261.643518168417
Exo variable (index leads +60): HSCIIT. AIC: -4261.1761691333995
Exo variable (index leads +60): HSCIM. AIC: -4264.514847998013
Exo variable (index leads +60): HSCIPC. AIC: -4261.217342737595
Exo variable (index leads +60): HSCIT. AIC: -4261.22591859336
Exo variable (index leads +60): HSCIU. AIC: -4261.103311241484


In [89]:
for elem in sector_models_OLS_X5F.keys():
    print('Exo variable (index lags +5): '+str(elem)+'. AIC: '+str(sector_models_OLS_X5F[elem].aic))    

Exo variable (index lags +5): HSCC. AIC: -4516.674916221706
Exo variable (index lags +5): HSCE. AIC: -4516.977312717524
Exo variable (index lags +5): HSCIC. AIC: -4517.667921517605
Exo variable (index lags +5): HSCICD. AIC: -4516.73126165697
Exo variable (index lags +5): HSCICS. AIC: -4516.765595122374
Exo variable (index lags +5): HSCIE. AIC: -4516.698997061835
Exo variable (index lags +5): HSCIF. AIC: -4517.999317530225
Exo variable (index lags +5): HSCIH. AIC: -4518.198223993762
Exo variable (index lags +5): HSCIIG. AIC: -4516.701013641834
Exo variable (index lags +5): HSCIIT. AIC: -4517.842717203826
Exo variable (index lags +5): HSCIM. AIC: -4518.665304191885
Exo variable (index lags +5): HSCIPC. AIC: -4517.006393891611
Exo variable (index lags +5): HSCIT. AIC: -4516.674903484768
Exo variable (index lags +5): HSCIU. AIC: -4517.071126872671


In [90]:
for elem in sector_models_OLS_X20F.keys():
    print('Exo variable (index lags +20): '+str(elem)+'. AIC: '+str(sector_models_OLS_X20F[elem].aic))    

Exo variable (index lags +20): HSCC. AIC: -4422.136062207921
Exo variable (index lags +20): HSCE. AIC: -4422.744499046782
Exo variable (index lags +20): HSCIC. AIC: -4422.146692819026
Exo variable (index lags +20): HSCICD. AIC: -4422.537616312783
Exo variable (index lags +20): HSCICS. AIC: -4422.597362808753
Exo variable (index lags +20): HSCIE. AIC: -4423.091308519577
Exo variable (index lags +20): HSCIF. AIC: -4422.131160654355
Exo variable (index lags +20): HSCIH. AIC: -4422.445621139576
Exo variable (index lags +20): HSCIIG. AIC: -4423.019679804093
Exo variable (index lags +20): HSCIIT. AIC: -4422.760048605236
Exo variable (index lags +20): HSCIM. AIC: -4422.730918136125
Exo variable (index lags +20): HSCIPC. AIC: -4422.1649804973395
Exo variable (index lags +20): HSCIT. AIC: -4428.990373405466
Exo variable (index lags +20): HSCIU. AIC: -4422.364626237678


In [91]:
for elem in sector_models_OLS_X60F.keys():
    print('Exo variable (index lags +60): '+str(elem)+'. AIC: '+str(sector_models_OLS_X60F[elem].aic))    

Exo variable (index lags +60): HSCC. AIC: -4181.469039401449
Exo variable (index lags +60): HSCE. AIC: -4181.276006200806
Exo variable (index lags +60): HSCIC. AIC: -4181.335363445103
Exo variable (index lags +60): HSCICD. AIC: -4181.397395560902
Exo variable (index lags +60): HSCICS. AIC: -4181.223995555003
Exo variable (index lags +60): HSCIE. AIC: -4181.814709344999
Exo variable (index lags +60): HSCIF. AIC: -4182.268475069295
Exo variable (index lags +60): HSCIH. AIC: -4181.629234079855
Exo variable (index lags +60): HSCIIG. AIC: -4181.306004167194
Exo variable (index lags +60): HSCIIT. AIC: -4181.150650228314
Exo variable (index lags +60): HSCIM. AIC: -4181.150325408664
Exo variable (index lags +60): HSCIPC. AIC: -4181.132170441839
Exo variable (index lags +60): HSCIT. AIC: -4182.77925575201
Exo variable (index lags +60): HSCIU. AIC: -4183.695376568108


# 6 Regression again but use a time frame that includes all lag periods

## 6.1 J-test

In [62]:
# leading indicator variables and date ranges
#HSI_returns_df.shift(5).dropna() # 5 days. Dates ["2019-09-10":"2023-02-01"]
#HSI_returns_df.shift(20).dropna() # 20 days. Dates ["2019-09-10":"2023-01-06"]
#HSI_returns_df.shift(60).dropna() # 60 days. Dates ["2019-09-10":"2022-11-08"]

# lagging indicator variables and date ranges
#HSI_returns_df.shift(-5).dropna() # 5 days. Dates ["2019-09-18":"2023-02-08"]
#HSI_returns_df.shift(-20).dropna() # 20 days. Dates ["2019-10-11":"2023-02-08"]
#HSI_returns_df.shift(-60).dropna() # 60 days. Dates ["2019-12-06":"2023-02-08"]

# time period which includes all: ["2019-12-06":"2022-11-08"]

In [63]:
# new method using dropna to avoid look-ahead bias

sector_models_OLS_coinc_J = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem]["2019-12-06":"2022-11-08"].dropna()
    y = HK50['Return']["2019-12-06":"2022-11-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_coinc_J[elem] = sm.OLS(y, X).fit() 

In [72]:
# sector indices leads the Hang Seng by 5 days. 

sector_models_OLS_X5R_J = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(5)["2019-12-06":"2022-11-08"].dropna()
    y = HK50['Return']["2019-12-06":"2022-11-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X5R_J[elem] = sm.OLS(y, X).fit() 

In [73]:
# sector indices leads the Hang Seng by 20 days. 

sector_models_OLS_X20R_J = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(20)["2019-12-06":"2022-11-08"].dropna()
    y = HK50['Return']["2019-12-06":"2022-11-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X20R_J[elem] = sm.OLS(y, X).fit() 

In [74]:
# sector indices leads the Hang Seng by 60 days. 

sector_models_OLS_X60R_J = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(60)["2019-12-06":"2022-11-08"].dropna()
    y = HK50['Return']["2019-12-06":"2022-11-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X60R_J[elem] = sm.OLS(y, X).fit() 

In [75]:
# sector indices lags the Hang Seng by 5 days. 

sector_models_OLS_X5F_J = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(-5)["2019-12-06":"2022-11-08"].dropna()
    y = HK50['Return']["2019-12-06":"2022-11-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X5F_J[elem] = sm.OLS(y, X).fit() 

In [76]:
# sector indices lags the Hang Seng by 20 days. 

sector_models_OLS_X20F_J = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(-20)["2019-12-06":"2022-11-08"].dropna()
    y = HK50['Return']["2019-12-06":"2022-11-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X20F_J[elem] = sm.OLS(y, X).fit() 

In [77]:
# sector indices lags the Hang Seng by 60 days. 

sector_models_OLS_X60F_J = {}

for elem in HSI_returns_df:
    X = HSI_returns_df[elem].shift(-60)["2019-12-06":"2022-11-08"].dropna()
    y = HK50['Return']["2019-12-06":"2022-11-08"].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    sector_models_OLS_X60F_J[elem] = sm.OLS(y, X).fit() 

In [78]:
compare_j(sector_models_OLS_coinc_J["HSCC"],sector_models_OLS_X60F_J["HSCC"])

ValueError: The exog in results_x and in results_z are nested. J comparison requires that models are non-nested.


In [None]:
# I got the time index wrong again. 

In [None]:
from itertools import combinations

sample_list = HSI_returns_df.columns
list_combinations = []

list_combinations += list(combinations(sample_list,2))
    

In [None]:
results = {}

for elem in list_combinations:
    #print(elem)
    print(elem)
    first_HK_index = elem[0]
    second_HK_index = elem[1]
    results[elem] = compare_j(sector_models_OLS[first_HK_index],sector_models_OLS[second_HK_index])
    
results = pd.DataFrame(results,index=list_combinations,columns=['J-test statistic', 'J-test p-value'])