# 1 Import packages and define useful functions

In [1]:
import pandas as pd
import numpy as np
from scipy import signal
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [2]:
def get_shortest_array(a,b):
    '''
    input - a,b arrays (pandas.values) arrays
    output - returns length of shortest array using first index of np.shape
    '''
    
    len_a = np.shape(a)[0]
    len_b = np.shape(b)[0]
    
    return min(len_a,len_b)

In [40]:
def plot_cor(x, y, window, filename, x_name, y_name, unit='day'):
    '''
    x (df) - first time series
    y (df) - second time series
    window (int) - period length
    filename - output filename as .png
    x_name - first time series name (for title)
    y_name - second time series name (for title)
    Note that x and y don't have to be the same length. the 'get_shortest_array' function will be used to automatically trim the time series
    '''
    shortest_time_series_len = get_shortest_array(x,y)
    plt.plot(x.iloc[:shortest_time_series_len].rolling(window).corr(y.iloc[:shortest_time_series_len]).fillna(method='bfill'));
    
    # rotate x-axis labels so date labels all fit in properly
    plt.xticks(rotation=90);
    plt.title(str(window)+'-'+str(unit)+' rolling correlation between '+str(x_name)+ ' and '+str(y_name))
    plt.savefig(str(filename),dpi=600)


In [4]:
def equity_d2m(dataframe):
    '''
    for a dataframe with daily OHLC equity data, convert this to monthly OHLC data.
    '''
    
    return pd.DataFrame({"Open":dataframe.Open.resample('MS').first(),
                         "High":dataframe.High.resample('MS').max(),
                         "Low":dataframe.Low.resample('MS').min(),
                         "Price":dataframe.Price.resample('MS').last()})

In [5]:
def equity_d2q(dataframe):
    '''
    for a dataframe with daily OHLC equity data, convert this to monthly OHLC data.
    '''
    
    return pd.DataFrame({"Open":dataframe.Open.resample('QS').first(),
                         "High":dataframe.High.resample('QS').max(),
                         "Low":dataframe.Low.resample('QS').min(),
                         "Price":dataframe.Price.resample('QS').last()})

In [6]:
def conv_dt_index_to_M(dataframe):
    '''
    Helper script to ensure that all monthly time series are aligned.
    '''
    
    dataframe.index = dataframe.index.values.astype('datetime64[M]')
    return dataframe

# 2 Import data: Hang Seng and macroeconomic indicators

In [7]:
# daily time series
HK50 = pd.read_csv('data/HS/HK50.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
DXY = pd.read_csv('data/macro/daily/DXY.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HSIL = pd.read_csv('data/macro/daily/^HSIL.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
SPY = pd.read_csv('data/macro/daily/SPY.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')

# currencies
AUDUSD = pd.read_csv('data/macro/daily/AUD_USD.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
AUDCNY = pd.read_csv('data/macro/daily/AUD_CNY.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
CNYHKD = pd.read_csv('data/macro/daily/CNY_HKD.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
USDHKD = pd.read_csv('data/macro/daily/USD_HKD.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HKDEUR = pd.read_csv('data/macro/daily/HKD_EUR.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
HKDAUD = pd.read_csv('data/macro/daily/HKD_AUD.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')

HK50.index = HK50.index.values.astype('datetime64[D]')
DXY.index = DXY.index.values.astype('datetime64[D]')
HSIL.index = HSIL.index.values.astype('datetime64[D]')
SPY.index = SPY.index.values.astype('datetime64[D]')
AUDUSD.index = AUDUSD.index.values.astype('datetime64[D]')
AUDCNY.index = AUDCNY.index.values.astype('datetime64[D]')
CNYHKD.index = CNYHKD.index.values.astype('datetime64[D]')
USDHKD.index = USDHKD.index.values.astype('datetime64[D]')
HKDEUR.index = HKDEUR.index.values.astype('datetime64[D]')
HKDAUD.index = HKDAUD.index.values.astype('datetime64[D]')

In [8]:
# monthly time series
US10Y = pd.read_csv('data/macro/monthly/US_10Y.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
CN10Y = pd.read_csv('data/macro/monthly/CN_10Y.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
CAIXIN = pd.read_csv('data/macro/monthly/Caixin_PMI.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')

# CPI
US_CPI_FRED_all = pd.read_csv('data/macro/monthly/CPIAUCSL.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')

US_CPI_OECD = pd.read_csv('data/macro/monthly/US_CPI_OECD.csv', index_col=0, thousands=',')
US_CPI_OECD.index = pd.to_datetime(US_CPI_OECD.index, format="%b-%y")

CN_CPI_OECD = pd.read_csv('data/macro/monthly/CN_CPI_OECD.csv', index_col=0, thousands=',')
CN_CPI_OECD.index = pd.to_datetime(CN_CPI_OECD.index, format="%b-%y")

# M3 money supply
US_M2 = pd.read_csv('data/macro/monthly/US_M2.csv', index_col=0, thousands=',')
US_M2.index = pd.to_datetime(US_M2.index, format="%b-%y")

CN_M2 = pd.read_csv('data/macro/monthly/CN_M2.csv', index_col=0, thousands=',')
CN_M2.index = pd.to_datetime(CN_M2.index, format="%b-%y")

EU_M2 = pd.read_csv('data/macro/monthly/EU_M2.csv', index_col=0, thousands=',')
EU_M2.index = pd.to_datetime(EU_M2.index, format="%b-%y")


# ensure all time series are indexed properly (i.e. to the first day of the month)

US10Y = conv_dt_index_to_M(US10Y)
CN10Y = conv_dt_index_to_M(CN10Y)
CAIXIN = conv_dt_index_to_M(CAIXIN)
US_CPI_FRED_all = conv_dt_index_to_M(US_CPI_FRED_all)
US_CPI_OECD = conv_dt_index_to_M(US_CPI_OECD)
CN_CPI_OECD = conv_dt_index_to_M(CN_CPI_OECD)
US_M2 = conv_dt_index_to_M(US_M2)
CN_M2 = conv_dt_index_to_M(CN_M2)
EU_M2 = conv_dt_index_to_M(EU_M2)

In [9]:
# quarterly time series
CN_GDP = pd.read_csv('data/macro/quarterly/CN_GDP.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')
US_GDP = pd.read_csv('data/macro/quarterly/US_GDP.csv', index_col=0, parse_dates=True, infer_datetime_format=True, thousands=',')


CN_GDP.index = CN_GDP.index.to_period('Q')
CN_GDP = CN_GDP.shift()

US_GDP.index = US_GDP.index.to_period('Q')

Preprocess this data. Create a dataframe containing macroeconomic indicators. 

In [10]:
macro_daily = {"HK50": HK50,
               "DXY": DXY,
            "HSIL": HSIL,
            "SPY": SPY,
            "AUDUSD": AUDUSD,
            "AUDCNY": AUDCNY,
            "CNYHKD": CNYHKD,
            "USDHKD": USDHKD,
            "HKDEUR": HKDEUR,
            "HKDAUD": HKDAUD}

macro_daily_df = pd.DataFrame()
macro_daily_df.index = pd.date_range(start='2000-01-03',end='2023-03-24',freq='D')

for ind in macro_daily.keys():
    macro_daily_df = pd.concat([macro_daily_df, macro_daily[ind]['Price']], axis=1)
    
macro_daily_df.columns = macro_daily.keys()
macro_daily_df = macro_daily_df.dropna()

In [11]:
macro_daily_df

Unnamed: 0,HK50,DXY,HSIL,SPY,AUDUSD,AUDCNY,CNYHKD,USDHKD,HKDEUR,HKDAUD
2013-03-25,22251.15,82.83,15.84,154.95,1.0467,6.4989,1.2497,7.7618,0.1002,0.1232
2013-03-26,22311.08,82.88,15.75,156.19,1.0484,6.5107,1.2494,7.7602,0.1003,0.1230
2013-03-27,22464.82,83.22,15.15,156.19,1.0444,6.4893,1.2494,7.7634,0.1008,0.1234
2013-03-28,22299.63,82.99,14.69,156.67,1.0412,6.4706,1.2491,7.7626,0.1005,0.1237
2013-04-02,22367.82,82.92,15.04,156.82,1.0448,6.4772,1.2522,7.7622,0.1005,0.1233
...,...,...,...,...,...,...,...,...,...,...
2023-01-30,22069.73,102.28,24.90,400.59,0.7059,4.7634,1.1606,7.8352,0.1176,0.1807
2023-01-31,21842.33,102.10,24.78,406.48,0.7054,4.7643,1.1606,7.8401,0.1174,0.1807
2023-02-01,22072.18,101.22,23.69,410.80,0.7135,4.8094,1.1630,7.8424,0.1160,0.1786
2023-02-02,21958.36,101.75,23.04,416.78,0.7076,4.7614,1.1646,7.8441,0.1168,0.1801


Reindex HK50 for monthly values, then add to macro_monthly dictionary.

In [12]:
HK50_monthly = equity_d2m(HK50)

In [13]:
macro_monthly = {"HK50": HK50_monthly['Price'],
                 "US10Y": US10Y['US Real 10yr Rate (10yr Benchmark - CPI All Items)'],
            "CN10Y": CN10Y['China Real 10yr Rate (10yr Benchmark - CPI All Items)'],
            "CAIXIN": CAIXIN['Caixin China Manufacturing PMI'],
            "US_CPI_FRED_all": US_CPI_FRED_all['CPIAUCSL'],
            "US_CPI_OECD": US_CPI_OECD['Index'],
            "CN_CPI_OECD": CN_CPI_OECD['Index'],
            "US_M2": US_M2['Index'],
            "CN_M2": CN_M2['Index'],
            "EU_M2": EU_M2['Index']}


macro_monthly_df = pd.DataFrame()
macro_monthly_df.index = pd.date_range(start='1947-01-01',end='2023-02-01',freq='MS')

for ind in macro_monthly.keys():
    macro_monthly_df = pd.concat([macro_monthly_df, macro_monthly[ind]], axis=1)
    
macro_monthly_df.columns = macro_monthly.keys()
#macro_monthly_df = macro_monthly_df.dropna()
macro_monthly_df = macro_monthly_df["03-01-2013":"12-01-2022"]

In [14]:
macro_monthly_df

Unnamed: 0,HK50,US10Y,CN10Y,CAIXIN,US_CPI_FRED_all,US_CPI_OECD,CN_CPI_OECD,US_M2,CN_M2,EU_M2
2013-03-01,22299.63,0.35,1.34,50.4,232.282,98.20941,96.37189,87.58736,76.81239,92.22491
2013-04-01,22737.01,0.56,1.10,51.6,231.797,98.10731,96.56242,87.88541,77.81362,92.68789
2013-05-01,22392.16,0.77,1.35,50.4,231.893,98.28198,95.98406,88.23659,78.28904,92.92949
2013-06-01,20803.29,0.80,0.90,49.2,232.445,98.51783,95.98406,88.70732,78.77515,92.90137
2013-07-01,21883.66,0.71,0.90,48.2,232.900,98.55664,96.07897,89.08839,79.37891,92.82501
...,...,...,...,...,...,...,...,...,...,...
2022-08-01,19954.39,-5.10,0.15,50.4,295.320,124.95770,115.00400,179.59060,,151.23070
2022-09-01,17222.83,-4.39,-0.02,49.5,296.539,125.22650,115.33870,178.52300,,152.08070
2022-10-01,14687.02,-3.66,0.55,48.1,297.987,125.73440,115.45020,177.79400,,152.23390
2022-11-01,18597.23,-3.44,1.32,49.2,298.598,125.60740,115.22710,177.29010,,152.17720


Reindex HK50 for quarterly values, then add to macro_quarterly dictionary.

In [15]:
HK50_quarterly = equity_d2q(HK50)
HK50_quarterly.index = HK50_quarterly.index.to_period('Q')

In [16]:
macro_quarterly = {"HK50": HK50_quarterly['Price'],
                   "CN_GDP": CN_GDP['Nominal GDP (100m Yuan)'],
            "US_GDP": US_GDP['GDP']}


macro_quarterly_df = pd.DataFrame()
#macro_quarterly_df.index = pd.date_range(start='1947Q1',end='2023Q1',freq='Q')

for ind in macro_quarterly.keys():
    macro_quarterly_df = pd.concat([macro_quarterly_df, macro_quarterly[ind]], axis=1)
    
macro_quarterly_df.columns = macro_quarterly.keys()
macro_quarterly_df = macro_quarterly_df.dropna()

In [17]:
macro_quarterly_df

Unnamed: 0,HK50,CN_GDP,US_GDP
2000Q4,15095.53,29194.3,10435.744
2001Q1,12760.64,24086.4,10470.231
2001Q2,13042.53,26726.6,10599.000
2001Q3,9950.70,28333.3,10598.020
2001Q4,11397.21,31716.8,10660.465
...,...,...,...
2021Q4,23397.67,325899.4,24349.121
2022Q1,21996.85,271509.2,24740.480
2022Q2,21859.79,293919.5,25248.476
2022Q3,17222.83,309270.6,25723.941


## 2.1 Calculate returns dataframes

In [18]:
list_of_returns = list(macro_daily_df.columns)
list_of_returns.remove('HSIL')

for elem in list_of_returns:
    macro_daily_df[elem] = np.log(macro_daily_df[elem].dropna()) - np.log(macro_daily_df[elem].dropna().shift(-1))
    
macro_daily_df.rename(columns={'DXY': 'DXY ret.',
                               'HK50': 'HK50 ret.',
                               'SPY': 'SPY ret.', 
                               'AUDUSD': 'AUDUSD ret.',
                               'AUDCNY': 'AUDCNY ret.',
                               'CNYHKD': 'CNYHKD ret.',
                               'USDHKD': 'USDHKD ret.',
                               'HKDEUR': 'HKDEUR ret.',
                               'HKDAUD': 'HKDAUD ret.'}, inplace=True)

In [19]:
macro_daily_df

Unnamed: 0,HK50 ret.,DXY ret.,HSIL,SPY ret.,AUDUSD ret.,AUDCNY ret.,CNYHKD ret.,USDHKD ret.,HKDEUR ret.,HKDAUD ret.
2013-03-25,-0.002690,-0.000603,15.84,-0.007971,-0.001623,-0.001814,0.000240,0.000206,-0.000998,0.001625
2013-03-26,-0.006867,-0.004094,15.75,0.000000,0.003823,0.003292,0.000000,-0.000412,-0.004973,-0.003247
2013-03-27,0.007380,0.002768,15.15,-0.003068,0.003069,0.002886,0.000240,0.000103,0.002981,-0.002428
2013-03-28,-0.003053,0.000844,14.69,-0.000957,-0.003452,-0.001019,-0.002479,0.000052,0.000000,0.003239
2013-04-02,0.001357,0.002415,15.04,0.010191,-0.001339,-0.001296,0.000399,0.000064,0.001992,0.000811
...,...,...,...,...,...,...,...,...,...,...
2023-01-30,0.010357,0.001761,24.90,-0.014596,0.000709,-0.000189,0.000000,-0.000625,0.001702,0.000000
2023-01-31,-0.010468,0.008656,24.78,-0.010572,-0.011417,-0.009422,-0.002066,-0.000293,0.011997,0.011690
2023-02-01,0.005170,-0.005222,23.69,-0.014452,0.008303,0.010031,-0.001375,-0.000217,-0.006873,-0.008364
2023-02-02,0.013659,-0.011433,23.04,0.010686,0.022438,0.015877,0.005770,-0.000344,-0.010222,-0.021424


In [20]:
list_of_returns = list(macro_monthly_df.columns)
list_of_returns.remove('US10Y')
list_of_returns.remove('CN10Y')
list_of_returns.remove('CAIXIN')

for elem in list_of_returns:
    macro_monthly_df[elem] = np.log(macro_monthly_df[elem].dropna()) - np.log(macro_monthly_df[elem].dropna().shift(-1))
    
macro_monthly_df.rename(columns={'US_CPI_FRED_all': 'US_CPI_FRED_all ret.',
                                 'HK50': 'HK50 ret.',
                                 'US_CPI_OECD': 'US_CPI_OECD ret.',
                                 'CN_CPI_OECD': 'CN_CPI_OECD ret.',
                                 'US_M2': 'US_M2 ret.',
                                 'CN_M2': 'CN_M2 ret.',
                                 'EU_M2': 'EU_M2 ret.'}, inplace=True)

In [21]:
macro_monthly_df

Unnamed: 0,HK50 ret.,US10Y,CN10Y,CAIXIN,US_CPI_FRED_all ret.,US_CPI_OECD ret.,CN_CPI_OECD ret.,US_M2 ret.,CN_M2 ret.,EU_M2 ret.
2013-03-01,-0.019424,0.35,1.34,50.4,0.002090,0.001040,-0.001975,-0.003397,-0.012951,-0.005008
2013-04-01,0.015283,0.56,1.10,51.6,-0.000414,-0.001779,0.006008,-0.003988,-0.006091,-0.002603
2013-05-01,0.073600,0.77,1.35,50.4,-0.002378,-0.002397,0.000000,-0.005321,-0.006190,0.000303
2013-06-01,-0.050629,0.80,0.90,49.2,-0.001956,-0.000394,-0.000988,-0.004287,-0.007635,0.000822
2013-07-01,0.006983,0.71,0.90,48.2,-0.002384,-0.001202,-0.005018,-0.005520,-0.010919,-0.001804
...,...,...,...,...,...,...,...,...,...,...
2022-08-01,0.147213,-5.10,0.15,50.4,-0.004119,-0.002149,-0.002906,0.005962,,-0.005605
2022-09-01,0.159272,-4.39,-0.02,49.5,-0.004871,-0.004048,-0.000966,0.004092,,-0.001007
2022-10-01,-0.236049,-3.66,0.55,48.1,-0.002048,0.001011,0.001934,0.002838,,0.000373
2022-11-01,-0.061730,-3.44,1.32,49.2,-0.001312,0.003075,0.000968,0.006926,,-0.001367


In [22]:
list_of_returns = list(macro_quarterly_df.columns)

for elem in list_of_returns:
    macro_quarterly_df[elem] = np.log(macro_quarterly_df[elem].dropna()) - np.log(macro_quarterly_df[elem].dropna().shift(-1))
    
macro_quarterly_df.rename(columns={'CN_GDP': 'CN_GDP ret.',
                                   'HK50': 'HK50 ret.',
                                 'US_GDP': 'US_GDP ret.'}, inplace=True)

In [23]:
macro_quarterly_df

Unnamed: 0,HK50 ret.,CN_GDP ret.,US_GDP ret.
2000Q4,0.168033,0.192326,-0.003299
2001Q1,-0.021850,-0.104012,-0.012224
2001Q2,0.270573,-0.058378,0.000092
2001Q3,-0.135726,-0.112809,-0.005875
2001Q4,0.032485,0.187468,-0.011475
...,...,...,...
2021Q4,0.061737,0.182593,-0.015945
2022Q1,0.006250,-0.079310,-0.020325
2022Q2,0.238413,-0.050911,-0.018656
2022Q3,-0.138507,-0.081429,-0.016234


# 3 Descriptive statistics

## 3.1 Daily indicators

In [27]:
for elem in macro_daily_df.keys():
    mean = str(round(macro_daily_df[elem].mean()*100.,5))+'%'
    sdev = str(round(macro_daily_df[elem].std()*100.,5))+'%'
    count = macro_daily_df[elem].size
    print("The mean, standard dev and count of "+str(elem)+" are: "+str(mean)+' '+str(sdev)+' '+str(count))

The mean, standard dev and count of HK50 ret. are: 0.00113% 1.29038% 2372
The mean, standard dev and count of DXY ret. are: -0.00916% 0.44546% 2372
The mean, standard dev and count of HSIL are: 2050.9296% 609.21473% 2372
The mean, standard dev and count of SPY ret. are: -0.04128% 1.13149% 2372
The mean, standard dev and count of AUDUSD ret. are: 0.01746% 0.66382% 2372
The mean, standard dev and count of AUDCNY ret. are: 0.01379% 0.62466% 2372
The mean, standard dev and count of CNYHKD ret. are: 0.00322% 0.23994% 2372
The mean, standard dev and count of USDHKD ret. are: -0.00046% 0.03789% 2372
The mean, standard dev and count of HKDEUR ret. are: -0.0069% 0.51315% 2372
The mean, standard dev and count of HKDAUD ret. are: -0.01692% 0.66058% 2372


## 3.2 Monthly indicators

In [31]:
for elem in macro_monthly_df.keys():
    mean = str(round(macro_monthly_df[elem].mean()*100.,5))+'%'
    sdev = str(round(macro_monthly_df[elem].std()*100.,5))+'%'
    count = macro_monthly_df[elem].size
    print("The mean, standard dev and count of "+str(elem)+" are: "+str(mean)+' '+str(sdev)+' '+str(count))

The mean, standard dev and count of HK50 ret. are: 0.10242% 5.65415% 118
The mean, standard dev and count of US10Y are: -32.89744% 217.65741% 118
The mean, standard dev and count of CN10Y are: 134.87179% 106.13252% 118
The mean, standard dev and count of CAIXIN are: 5015.76271% 168.3373% 118
The mean, standard dev and count of US_CPI_FRED_all ret. are: -0.21578% 0.28959% 118
The mean, standard dev and count of US_CPI_OECD ret. are: -0.20768% 0.35999% 118
The mean, standard dev and count of CN_CPI_OECD ret. are: -0.1519% 0.50879% 118
The mean, standard dev and count of US_M2 ret. are: -0.59677% 0.79895% 118
The mean, standard dev and count of CN_M2 ret. are: -0.86035% 0.2949% 118
The mean, standard dev and count of EU_M2 ret. are: -0.42922% 0.3361% 118


## 3.3 Quarterly indicators

In [30]:
for elem in macro_quarterly_df.keys():
    mean = str(round(macro_quarterly_df[elem].mean()*100.,5))+'%'
    sdev = str(round(macro_quarterly_df[elem].std()*100.,5))+'%'
    count = macro_quarterly_df[elem].size
    print("The mean, standard dev and count of "+str(elem)+" are: "+str(mean)+' '+str(sdev)+' '+str(count))

The mean, standard dev and count of HK50 ret. are: -0.30721% 10.67568% 89
The mean, standard dev and count of CN_GDP ret. are: -2.77463% 11.62301% 89
The mean, standard dev and count of US_GDP ret. are: -1.04366% 1.56603% 89


# 4 Correlation

## 4.1 Daily indicators

In [None]:
plot_cor(x, y, window, filename, x_name, y_name):

In [41]:
list_of_exo = list(macro_daily_df.keys())
list_of_exo.remove('HK50 ret.')
list_of_exo

for elem in list_of_exo:
    filename = "HK50_"+str(elem)+"_20corr_daily.png"
    x_name = "HK50"
    y_name = str(elem)
    plot_cor(macro_daily_df['HK50 ret.'], macro_daily_df[elem], 20, filename, x_name, y_name, unit='day')
    plt.close()

## 4.2 Monthly indicators

In [None]:
list_of_exo = list(macro_monthly_df.keys())
list_of_exo.remove('HK50 ret.')
list_of_exo

for elem in list_of_exo:
    filename = "HK50_"+str(elem)+"_12corr_monthly.png"
    x_name = "HK50"
    y_name = str(elem)
    plot_cor(macro_monthly_df['HK50 ret.'], macro_monthly_df[elem], 12, filename, x_name, y_name, unit='month')
    plt.close()

# 5 Regression

## 5.1 Daily models

In [45]:
macro_daily_df['2013-03-25':'2023-02-02']

Unnamed: 0,HK50 ret.,DXY ret.,HSIL,SPY ret.,AUDUSD ret.,AUDCNY ret.,CNYHKD ret.,USDHKD ret.,HKDEUR ret.,HKDAUD ret.
2013-03-25,-0.002690,-0.000603,15.84,-0.007971,-0.001623,-0.001814,0.000240,0.000206,-0.000998,0.001625
2013-03-26,-0.006867,-0.004094,15.75,0.000000,0.003823,0.003292,0.000000,-0.000412,-0.004973,-0.003247
2013-03-27,0.007380,0.002768,15.15,-0.003068,0.003069,0.002886,0.000240,0.000103,0.002981,-0.002428
2013-03-28,-0.003053,0.000844,14.69,-0.000957,-0.003452,-0.001019,-0.002479,0.000052,0.000000,0.003239
2013-04-02,0.001357,0.002415,15.04,0.010191,-0.001339,-0.001296,0.000399,0.000064,0.001992,0.000811
...,...,...,...,...,...,...,...,...,...,...
2023-01-27,0.027669,-0.003428,22.45,0.012626,0.006214,0.011314,-0.006050,-0.000766,-0.000851,-0.004993
2023-01-30,0.010357,0.001761,24.90,-0.014596,0.000709,-0.000189,0.000000,-0.000625,0.001702,0.000000
2023-01-31,-0.010468,0.008656,24.78,-0.010572,-0.011417,-0.009422,-0.002066,-0.000293,0.011997,0.011690
2023-02-01,0.005170,-0.005222,23.69,-0.014452,0.008303,0.010031,-0.001375,-0.000217,-0.006873,-0.008364


In [47]:
# new method using dropna to avoid look-ahead bias
list_of_exo = list(macro_daily_df.keys())
list_of_exo.remove('HK50 ret.')
list_of_exo

daily_models_OLS_coinc = {}

for elem in list_of_exo:
    X = macro_daily_df[elem]['2013-03-25':'2023-02-02'].dropna()
    y = macro_daily_df['HK50 ret.']['2013-03-25':'2023-02-02'].dropna()
    y = y.reindex(X.index)
    
    X = sm.add_constant(X)
    X = StandardScaler().fit_transform(X)
    daily_models_OLS_coinc[elem] = sm.OLS(y, X).fit() 

In [None]:
macro_daily_df

In [None]:


for elem in list_of_exo:
    filename = "HK50_"+str(elem)+"_20corr_daily.png"
    x_name = "HK50"
    y_name = str(elem)
    plot_cor(macro_daily_df['HK50 ret.'], macro_daily_df[elem], 20, filename, x_name, y_name, unit='day')
    plt.close()

In [None]:
HSI_returns_df =  np.log(HSI_df.dropna()) - np.log(HSI_df.dropna().shift(-1))
HK50['Return'] = np.log(HK50['Price']) - np.log(HK50['Price'].shift(-1))
HK50 = HK50.dropna()

HSI_returns_df.index = pd.to_datetime(HSI_returns_df.index)