In [1]:
import datetime
import numpy as np
import pandas as pd
from tqdm import tqdm 
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
from pandas.tseries.offsets import *
from contrarianTrading import top_bottom_50
from statsmodels.stats.diagnostic import het_white
from statsmodels.stats.weightstats import ttest_ind
from finance_byu.fama_macbeth import fama_macbeth, fama_macbeth_parallel, fm_summary, fama_macbeth_numba

# Data Importation and Cleaning

In [2]:
#########################
# importing the LSPD Monthly dataset
########################

lspd_m = pd.read_csv('/users/henry/desktop/Uni Work/Final Year/Dissertation/Data/LSPD_M4.csv')

lspd_m['R21'] = pd.to_datetime(lspd_m['R21']) #converting dates to datetime

lspd_m['R21'] = lspd_m['R21'] + MonthEnd(0)

lspd_m['R6'] = pd.to_datetime(lspd_m['R6']) #converting start date to datetime

lspd_m['R6'] = lspd_m['R6'] + MonthEnd(0)

lspd_m = lspd_m.rename(columns={'R21':'date', 'P4' : 'prc', 'R22' : 'logret', 'G1' : 'compNam', 'A4' : 'mrktCp'}) #re-naming some of the columns for legibility 

  lspd_m = pd.read_csv('/users/henry/desktop/Uni Work/Final Year/Dissertation/Data/LSPD_M4.csv')


In [None]:
#########################
# cleaning the LSPD dataset as in the paper
########################

lspd_m = lspd_m.sort_values(by=['compNam', 'date'])

lspd_m['rawRet'] = np.exp(lspd_m['logret'])-1 #taking the expoential of the logreturns to convert to simple returns

lspd_m['rawRet_bfill'] = lspd_m.groupby('compNam')['rawRet'].bfill().tolist() #if there is a return after this then fill backwards to use in filtering

lspd_m['rawRetShift'] = lspd_m.groupby('compNam')['rawRet'].shift() #shifting forward by 1 

lspd_m['rawRetBack'] = lspd_m.groupby('compNam')['rawRet'].shift(-1) #shifting backward by 1 

lspd_m['P8Bfill'] = lspd_m.groupby('compNam')['P8'].bfill().tolist() #backfilling price collection status 

lspd_m.loc[(lspd_m['P8'] == 4) & (lspd_m['rawRet_bfill'].isna()) & (lspd_m['G10'].isin([7,10,14, 16,20,21])) & (~lspd_m['rawRetShift'].isna()), 'rawRet'] = -0.99 #if an asset is suspended and ultimatley delisted with no value set the return in the next peiord to -0.999 (allows for dealing with logs)

lspd_m.loc[(abs(lspd_m['rawRet']) > 0) & (lspd_m['prc']==0) & (lspd_m['P8'] != 4),'rawRet'] = 0 #deal with incorrect dividend adjustments 

lspd_m = lspd_m[~(((lspd_m.P8 == 4.0) | (lspd_m.P8 == 0)) & (lspd_m.rawRet_bfill.isna()) & (lspd_m.rawRet.isna()))] #any suspended asset that cannot be traded in future as it is delisted is removed 

lspd_m = lspd_m[~lspd_m.P8Bfill.isna()] #if there was never a recorded accurate price after this remove the rows 

lspd_m['start_month_year'] = lspd_m.apply(lambda row: 1 if (row['date'].year == row['R6'].year) & (row['date'].month == row['R6'].month) else 0, axis = 1) #this marks all rows where the data is the first entry 

lspd_m = lspd_m[lspd_m.start_month_year != 1] #this removes the first entry as returns is empty 

lspd_m['rawRet'] = lspd_m.rawRet.fillna(0) #the assumption here can go back to the fact we are using transacted prices - hence if the price is suspended then the ret is 0 

lspd_m.loc[lspd_m['rawRet'] > 7, 'rawRet'] = 0 #removes outliers

lspd_m['logret'] = np.log(1+lspd_m['rawRet'])  #converting the cleaned data back into logs for addition at a later stage 

lspd_m.loc[(lspd_m['P8'] == 4) & (lspd_m['rawRet_bfill'].isna()) & (lspd_m['G10'].isin([7,14, 10,16,20,21])) & (~lspd_m['rawRetShift'].isna()), 'rawRet'] = -1 #setting to -100% once logs are dealt with 

lspd_m['mrktCp'] = lspd_m['mrktCp'] / 1000 #converting the market value into millions from '000s 

lspd_m['mrktCp'] = lspd_m['mrktCp'].apply(lambda x: np.NaN if x == 0 else x)

lspd_m['mrktCp'] = lspd_m['mrktCp'].ffill() #forward filling market cap when it equals zero 

lspd_m = lspd_m[~lspd_m.G16.isin([99,98,97,95,37,0])] # finally taking only LSE lists stocks (AIM and OTC are too illiquid to be considered here) and getting rid of investment trusts 


### Computing market returns, redisual returns and excess returns

In [None]:
#########################
# calculating market return and residual returns 
#########################


#calculating the EWI of LSE stocks per month  
lseRets = lspd_m.groupby('date')['rawRet'].mean().reset_index() 
lseRets = lseRets.rename(columns={'rawRet':'mrktRet'})


#residual return of individual stocks 
mean_ret = lspd_m.groupby('compNam')['rawRet'].mean().reset_index()
mean_ret = mean_ret.rename(columns={'rawRet' : 'mean_ret'})
lspd_m = lspd_m.merge(mean_ret, on='compNam', how='inner')
lspd_m['residualRet'] = lspd_m['rawRet'] - lspd_m['mean_ret']

In [None]:
#########################
# forming temporary tables as in Dreschler 2023 
#########################

umd = lspd_m[['date', 'compNam', 'G18','G16', 'P8', 'R18', 'prc', 'residualRet', 'logret', 'mrktCp', 'rawRet']]

#adding marketcaps
_tmp_mrktcp = lspd_m[['date', 'compNam', 'mrktCp']]
_tmp_mrktcp['mrktRnk'] = _tmp_mrktcp.groupby('date')['mrktCp'].transform(lambda x: pd.qcut(x, 3, labels=False)).tolist()
_tmp_mrktcp['mrktRnk'] = _tmp_mrktcp['mrktRnk'] + 1 #indexed at zero so add 1 
_tmp_mrktcp = _tmp_mrktcp[['date', 'compNam', 'mrktRnk']]

#removing stocks with fewer than 37 months of continuous return data 
counts = umd.groupby('compNam').count()
counts = counts[counts.date < 37]
umd = umd[~umd.compNam.isin(counts.index)]

#removing stocks with less than 36 months of return data
umd = lspd_m
counts = umd.groupby('compNam').count()
counts = counts[counts.date < 37]
umd = umd[~umd.compNam.isin(counts.index)]

umd = umd.merge(_tmp_mrktcp, on=['date', 'compNam'], how='inner')

# Paper Analysis
# Section IV: Empirical Test

In [6]:
#########################
# Adding in the residual returns at different periods 
#########################

umd2 = umd.copy(deep=True) #deep cpoy avoids altering the frame 
umd2 = umd2.sort_values(by=['compNam', 'date']) 

diffs = list(range(1,13,1))

for diff in diffs:
    
    umd2['diff_{}'.format(str(diff))] = umd2.groupby('compNam')['rawRet'].shift(diff).tolist()
    
    
umd2['diff_24'] = umd2.groupby('compNam')['rawRet'].shift(24) 


umd2['diff_36'] = umd2.groupby('compNam')['rawRet'].shift(36)

    
umd2 = umd2.dropna(subset='diff_36') #removes all stocks in month t for which there is no return for the past 3 years (ensuring that we only compute regressions on avaialble stocks) 

In [7]:
umd2 = umd2[(umd2.date >= pd.to_datetime('01-01-1994')) & (umd2.date < pd.to_datetime('01-01-2024'))] #confining to testing period 

In [8]:
umd2 = umd2.merge(_tmp_mrktcp, on=['date', 'compNam'], how='inner')

In [9]:
#results for the full sample of all months 

results = fama_macbeth(umd2, 'date', 'residualRet', ['diff_1', 'diff_2', 'diff_3', 'diff_4', 'diff_5', 'diff_6',
       'diff_7', 'diff_8', 'diff_9', 'diff_10', 'diff_11', 'diff_12', 'diff_24', 'diff_36'], intercept = True)

summary = fm_summary(results)
summary

Unnamed: 0,mean,std_error,tstat
intercept,-0.003359,0.001854,-1.811668
diff_1,-0.057295,0.005665,-10.113709
diff_2,-0.001507,0.005215,-0.288964
diff_3,0.001234,0.004928,0.250352
diff_4,0.007152,0.005309,1.347172
diff_5,-0.001114,0.004584,-0.243117
diff_6,0.003067,0.004814,0.637131
diff_7,0.006288,0.004893,1.285026
diff_8,0.002985,0.004452,0.670434
diff_9,0.00154,0.004584,0.335952


In [10]:
#marking the month of January in the data
umd2['month'] = [i.month for i in umd2['date'].tolist()]
umd3 = umd2[umd2.month == 1] #january returns
umd4 = umd2[umd2.month != 1] #non january returns

In [11]:
#results for the full sample of January only months 

results_jan = fama_macbeth(umd3, 'date', 'residualRet', ['diff_1', 'diff_2', 'diff_3', 'diff_4', 'diff_5', 'diff_6',
       'diff_7', 'diff_8', 'diff_9', 'diff_10', 'diff_11', 'diff_12', 'diff_24', 'diff_36'], intercept = True)

summary = fm_summary(results_jan)
summary

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['intercept'] = 1


Unnamed: 0,mean,std_error,tstat
intercept,0.002555,0.00638,0.400449
diff_1,-0.095304,0.01521,-6.265739
diff_2,-0.007787,0.019846,-0.392353
diff_3,-0.023057,0.019849,-1.161624
diff_4,0.004368,0.021011,0.207883
diff_5,-0.004844,0.019777,-0.24494
diff_6,-0.005085,0.014563,-0.349174
diff_7,-0.002545,0.016413,-0.15504
diff_8,0.015117,0.011889,1.271517
diff_9,-0.00382,0.011542,-0.330975


In [12]:
#results for the full sample of non January months 

results_non_jan = fama_macbeth(umd4, 'date', 'residualRet', ['diff_1', 'diff_2', 'diff_3', 'diff_4', 'diff_5', 'diff_6',
       'diff_7', 'diff_8', 'diff_9', 'diff_10', 'diff_11', 'diff_12', 'diff_24', 'diff_36'], intercept = True)

summary = fm_summary(results_non_jan)
summary

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['intercept'] = 1


Unnamed: 0,mean,std_error,tstat
intercept,-0.003896,0.001938,-2.010794
diff_1,-0.053839,0.005993,-8.984029
diff_2,-0.000936,0.005404,-0.173209
diff_3,0.003442,0.005057,0.680658
diff_4,0.007405,0.005478,1.351748
diff_5,-0.000775,0.004677,-0.165779
diff_6,0.003809,0.005086,0.74876
diff_7,0.007091,0.00513,1.382159
diff_8,0.001882,0.004735,0.39748
diff_9,0.002027,0.004893,0.414357


# Section V: Contrarian Trading


In [None]:
#########################
# Testing the contrarian trading strategy using the custom function top_bottom_50 
#########################
top50Rets_all, mom_output_all, ewretdat2_all, port_all, ewretdat_all, ewretdat3_all = top_bottom_50(umd, lspd_m, '1994-01-01', '2024-01-01')

In [None]:
#portfolio returns

mom_output_all

In [None]:
#average market return for the sample period 
marketSamp = lseRets[(lseRets['date'] > pd.to_datetime('1994-01-01')) & (lseRets['date'] < pd.to_datetime('2024-01-01'))]
marketSamp['mrktRet'].mean()

### Comission Impact Test

In [None]:
ewretdat3_all['losers_comm'] = ewretdat3_all['losers'] - 0.01 #removing 1% per month from the strategy returns to check for transaction costs

In [33]:
ewretdat3_all['Contrarian Returns Minus Comission'] = ewretdat3_all['losers_comm'] - ewretdat3_all['winners']
ewretdat3_all['Contrarian Returns Minus Comission'].describe()[['count','mean', 'std']].reset_index()

Unnamed: 0,index,Contrarian Returns Minus Comission
0,count,360.0
1,mean,0.012797
2,std,0.067988


In [34]:
zero_mean_alternate = [0.0 for i in range(len(ewretdat3_all))]


print('T-Statistic post-transaction costs: ', ttest_ind(np.array(ewretdat3_all['long_short_comm']),zero_mean_alternate)[0])

T-Statistic post-transaction costs:  3.571342930445273


### Seasonality Tests

In [45]:
ewretdat4 = ewretdat3_all.reset_index()
ewretdat4['month'] = ewretdat4['date'].apply(lambda x: x.month)
ewretdat4['non_jan'] = ewretdat4['month'].apply(lambda x: 'Jan.' if x == 1 else 'Feb.-Dec.')


monthly_rets = ewretdat4.groupby('month')['long_short'].describe()[['count','mean', 'std']]
monthly_rets_grouped_all = ewretdat4.groupby('non_jan')['long_short'].describe()[['mean', 'std']]
monthly_rets_grouped_losers = ewretdat4.groupby('non_jan')['losers'].describe()[['mean', 'std']]
monthly_rets_grouped_winners = ewretdat4.groupby('non_jan')['winners'].describe()[['mean', 'std']]

jans = ewretdat4[ewretdat4.non_jan == 'Jan.']
non_jans = ewretdat4[ewretdat4.non_jan != 'Jan.']

t_losers_jan = ttest_ind(jans['losers'], np.zeros(len(jans['losers'])))[0]
t_winners_jan = ttest_ind(jans['winners'], np.zeros(len(jans['winners'])))[0]
t_long_short_jan = ttest_ind(jans['long_short'], np.zeros(len(jans['long_short'])))[0]
                         
t_losers_nonjan = ttest_ind(non_jans['losers'], np.zeros(len(non_jans['losers'])))[0]
t_winners_nonjan = ttest_ind(non_jans['winners'], np.zeros(len(non_jans['winners'])))[0]
t_long_short_nonjan = ttest_ind(non_jans['long_short'], np.zeros(len(non_jans['long_short'])))[0]

In [183]:
ttest_ind(jans['losers'], np.zeros(len(jans['losers']))) #t, p, obs

(3.6539446700314233, 0.0005569364483243858, 58.0)

In [46]:
#average returns per month 
monthly_rets

Unnamed: 0_level_0,count,mean,std
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,30.0,0.047859,0.066748
2,30.0,0.010366,0.093139
3,30.0,0.031158,0.081481
4,30.0,0.049776,0.082993
5,30.0,0.01207,0.06691
6,30.0,0.020173,0.062463
7,30.0,0.00744,0.047758
8,30.0,0.03568,0.055161
9,30.0,0.018374,0.045807
10,30.0,0.021219,0.052433


In [47]:
#returns in Jan only sample and outside Jan for the contrarian strategy 
monthly_rets_grouped_all['T-stat'] = [t_long_short_nonjan, t_long_short_jan]
monthly_rets_grouped_all

Unnamed: 0_level_0,mean,std,T-stat
non_jan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Feb.-Dec.,0.020519,0.067739,5.502568
Jan.,0.047859,0.066748,3.927264


In [48]:
monthly_rets_grouped_losers['T-stat'] = [t_losers_nonjan, t_losers_jan]
monthly_rets_grouped_losers

Unnamed: 0_level_0,mean,std,T-stat
non_jan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Feb.-Dec.,0.023201,0.085959,4.903086
Jan.,0.058429,0.087585,3.653945


In [49]:
monthly_rets_grouped_winners['T-stat'] = [t_winners_nonjan, t_winners_jan]
monthly_rets_grouped_winners

Unnamed: 0_level_0,mean,std,T-stat
non_jan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Feb.-Dec.,0.002682,0.065855,0.739855
Jan.,0.01057,0.053094,1.090397


# Section VI: Risk Adjustments

### Market Based Risk Adjustment with CAPM

In [137]:
#loading in European Fama-French Dataset 
EurFF3 = pd.read_csv('/users/henry/downloads/Europe_5_Factors.csv')
EurFF3['date'] = [str(i)[:4]+'-'+str(i)[-2:] for i in EurFF3['date'].tolist()]
EurFF3['date'] = pd.to_datetime(EurFF3.date) 
EurFF3['date'] = EurFF3['date'] + MonthEnd(0)

#joining the portfolio data with the Fama-French factors
ewexcretdat = ewretdat3_all.copy(deep=True).reset_index()
ewexcretdat['date'] = ewexcretdat['date'] + MonthEnd(0)
ewexcretdat = ewexcretdat.merge(EurFF3, how='left', left_on='date', right_on='date')
ewexcretdat['RF']= ewexcretdat['RF'].bfill()

#computing excess returns 
ewexcretdat['excess_ret'] = ewexcretdat['long_short']  
lseRets['date'] = lseRets['date'] + MonthEnd(0)
ewexcretdat = ewexcretdat.merge(lseRets, how='inner', on=['date'])
ewexcretdat['mrkt_excess'] = ewexcretdat['mrktRet'] - ewexcretdat['RF']
ewexcretdat['excess_losers'] = ewexcretdat['losers'] - ewexcretdat['RF']
ewexcretdat['excess_winners'] = ewexcretdat['winners'] - ewexcretdat['RF']

#computing market varying beta 
ewexcretdat['d'] = ewexcretdat.mrkt_excess.apply(lambda x: 1 if x > 0 else 0) #d == 1 in a down market 
ewexcretdat['excess_d'] = ewexcretdat['mrkt_excess'] * ewexcretdat['d'] 
ewexcretdat['excess_d_minus_1'] = ewexcretdat['mrkt_excess'] * (1-ewexcretdat['d'])


CAPM_data = ewexcretdat[['date', 'excess_ret', 'mrkt_excess', 'excess_losers', 'excess_winners', 'excess_d', 'Mkt-RF','excess_d_minus_1', 'SMB', 'HML']]

In [138]:
# results of the model R_p,t = alpah_p + beta_P(R_m,t - R_f,t) + error

In [139]:
#contrarian strategy

x = np.array(CAPM_data[['mrkt_excess']])
x = sm.add_constant(x) 
y = np.array(CAPM_data['excess_ret'])
CAPM_model = sm.OLS(y, x)
results = CAPM_model.fit(cov_type='HC3') #using HC3 which is based off work by Holigan and Welsch (1973) and incorprated by MacKinnon and White (1985) -- all regressions further use this estimate 
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                    0.1189
Date:                Fri, 03 May 2024   Prob (F-statistic):              0.730
Time:                        13:39:45   Log-Likelihood:                 457.59
No. Observations:                 360   AIC:                            -911.2
Df Residuals:                     358   BIC:                            -903.4
Df Model:                           1                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0214      0.005      4.171      0.0

In [140]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 6.731137030306074


In [141]:
#loser portfolio
x = np.array(CAPM_data[['mrkt_excess']])
x = sm.add_constant(x) 
y = np.array(CAPM_data['excess_losers'])
CAPM_model = sm.OLS(y, x)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.926
Model:                            OLS   Adj. R-squared:                  0.925
Method:                 Least Squares   F-statistic:                     3192.
Date:                Fri, 03 May 2024   Prob (F-statistic):          2.01e-180
Time:                        13:39:46   Log-Likelihood:                 545.02
No. Observations:                 360   AIC:                            -1086.
Df Residuals:                     358   BIC:                            -1078.
Df Model:                           1                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0194      0.005      4.286      0.0

In [142]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 24.438786087754636


In [143]:
#winner portfolio
y = np.array(CAPM_data['excess_winners'])
CAPM_model = sm.OLS(y, x)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.960
Model:                            OLS   Adj. R-squared:                  0.960
Method:                 Least Squares   F-statistic:                     8239.
Date:                Fri, 03 May 2024   Prob (F-statistic):          3.42e-249
Time:                        13:39:47   Log-Likelihood:                 662.87
No. Observations:                 360   AIC:                            -1322.
Df Residuals:                     358   BIC:                            -1314.
Df Model:                           1                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0020      0.002     -0.881      0.3

In [144]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 1.9242615629199777


In [145]:
#model is now R_P,t = alpha_p + Beta_U,P * (R_M,t - R_F,t)*D + Beta_D,P * (R_M,t - R_F,t)*(1-D) + error

In [146]:
#contrarian strategy

x1 = np.array(CAPM_data[['excess_d', 'excess_d_minus_1']])
x1 = sm.add_constant(x1)
y1 = np.array(CAPM_data['excess_ret'])
CAPM_model = sm.OLS(y1, x1)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.059
Model:                            OLS   Adj. R-squared:                  0.054
Method:                 Least Squares   F-statistic:                     6.515
Date:                Fri, 03 May 2024   Prob (F-statistic):            0.00166
Time:                        13:39:52   Log-Likelihood:                 468.42
No. Observations:                 360   AIC:                            -930.8
Df Residuals:                     357   BIC:                            -919.2
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0069      0.005      1.301      0.1

In [147]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 5.086793729691479


In [148]:
#loser portfolio

x1 = np.array(CAPM_data[['excess_d', 'excess_d_minus_1']])
x1 = sm.add_constant(x1)
y1 = np.array(CAPM_data['excess_losers'])
CAPM_model = sm.OLS(y1, x1)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.934
Model:                            OLS   Adj. R-squared:                  0.934
Method:                 Least Squares   F-statistic:                     2186.
Date:                Fri, 03 May 2024   Prob (F-statistic):          5.26e-201
Time:                        13:39:54   Log-Likelihood:                 567.60
No. Observations:                 360   AIC:                            -1129.
Df Residuals:                     357   BIC:                            -1118.
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0032      0.004      0.755      0.4

In [149]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 2.8780374988203983


In [150]:
#winner portfolio

x1 = np.array(CAPM_data[['excess_d', 'excess_d_minus_1']])
x1 = sm.add_constant(x1)
y1 = np.array(CAPM_data['excess_winners'])
CAPM_model = sm.OLS(y1, x1)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.961
Model:                            OLS   Adj. R-squared:                  0.960
Method:                 Least Squares   F-statistic:                     4121.
Date:                Fri, 03 May 2024   Prob (F-statistic):          2.23e-247
Time:                        13:39:55   Log-Likelihood:                 663.31
No. Observations:                 360   AIC:                            -1321.
Df Residuals:                     357   BIC:                            -1309.
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0037      0.003     -1.316      0.1

In [151]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 2.7837296378788823


## Size Based Risk Adjustemnt with Terclie Portfolios

In [152]:
#average market cap of stocks in portfolios 

_tmp_mrktcp = lspd_m[['date', 'compNam', 'mrktCp']]
umd_filter = umd.copy(deep=True)
umd_filter = umd_filter[['date', 'compNam', 'P8']] #only including 
_tmp_mrktcp = pd.merge(_tmp_mrktcp, umd_filter, on=['date', 'compNam'], how='inner') 
_tmp_mrktcp['mrktRnk'] = _tmp_mrktcp.groupby('date')['mrktCp'].transform(lambda x: pd.qcut(x, 10, labels=False)).tolist()
_tmp_mrktcp['mrktRnk'] = _tmp_mrktcp['mrktRnk'] + 1 #indexed at zero so add 1 
_tmp_mrktcp = _tmp_mrktcp[['date', 'compNam', 'mrktCp', 'mrktRnk']]

#joining the two
port_a = port_all.merge(_tmp_mrktcp, left_on=['form_date', 'compNam'], right_on=['date', 'compNam'], how='inner')
port_a = port_a.rename(columns={'date_x' : 'date'})
port_a = port_a[(port_a['date'] > pd.to_datetime('1993-01-01')) & (port_a['date'] < pd.to_datetime('2023-01-01'))]
umd2 = port_a.sort_values(by=['date','momr','form_date','compNam']).drop_duplicates()
rnks = umd2.groupby(['date','momr','form_date'])['mrktRnk'].mean().reset_index()
rnks.groupby(['momr'])['mrktRnk'].mean().reset_index()

umd2 = port_a.sort_values(by=['date','momr','form_date','compNam']).drop_duplicates()
rnks = umd2.groupby(['date','momr','form_date'])['mrktCp'].mean().reset_index()
rnks = rnks.groupby(['momr'])['mrktCp'].mean().reset_index()
rnks['Portfolio'] = ['Loser', 'Not Included', 'Winner']
rnks = rnks[['Portfolio', 'mrktCp']]
rnks

Unnamed: 0,Portfolio,mrktCp
0,Loser,856.562215
1,Not Included,2762.568591
2,Winner,1137.042591


In [153]:
#average LSE market cap
print('Mean LSE Market Cap: ', lspd_m.mrktCp.mean())

Mean LSE Market Cap:  1575.006110929179


In [165]:
#Size Based Regression Results
monthly_rets = ewretdat3_all.copy(deep=True).reset_index()
monthly_rets['year'] = [i.year for i in monthly_rets['date'].to_list()] 
monthly_rets['month'] = [i.month for i in monthly_rets['date'].to_list()]
monthly_rets2 = monthly_rets

lrgRet = umd[umd.mrktRnk == 3]
smlRet = umd[umd.mrktRnk == 1]
medRet = umd[umd.mrktRnk == 2]
lrgRet = lrgRet.groupby('date')['rawRet'].mean().reset_index()
smlRet = smlRet.groupby('date')['rawRet'].mean().reset_index()
medRet = medRet.groupby('date')['rawRet'].mean().reset_index()
lrgRet = lrgRet.rename(columns={'rawRet' : 'lrgRet'})
smlRet = smlRet.rename(columns={'rawRet' : 'smlRet'})
medRet = medRet.rename(columns={'rawRet' : 'medret'})
monthly_rets2 = monthly_rets2.merge(lrgRet, how='left', on='date')
monthly_rets2 = monthly_rets2.merge(smlRet, how='left', on='date')
monthly_rets2 = monthly_rets2.merge(medRet, how='left', on='date')
monthly_rets2['jan'] = monthly_rets2['month'].apply(lambda x: 1 if x == 1 else 0)

In [166]:
#model R_P,t = alpha + Beta_S,P * R_S,t + Beta_M,P * R_M,t + Beta_L,P * R_L,t + error

In [167]:
#contrarian strategy
x = np.array(monthly_rets2[['smlRet', 'medret','lrgRet']])
y = np.array(monthly_rets2['long_short'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.114
Model:                            OLS   Adj. R-squared:                  0.106
Method:                 Least Squares   F-statistic:                     10.88
Date:                Fri, 03 May 2024   Prob (F-statistic):           7.40e-07
Time:                        13:41:47   Log-Likelihood:                 479.26
No. Observations:                 360   AIC:                            -950.5
Df Residuals:                     356   BIC:                            -935.0
Df Model:                           3                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0224      0.004      6.042      0.0

In [168]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 61.60883602677794


In [169]:
#loser portfolio
x = np.array(monthly_rets2[['smlRet', 'medret','lrgRet']])
y = np.array(monthly_rets2['losers'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.729
Model:                            OLS   Adj. R-squared:                  0.727
Method:                 Least Squares   F-statistic:                     212.5
Date:                Fri, 03 May 2024   Prob (F-statistic):           5.65e-79
Time:                        13:41:48   Log-Likelihood:                 605.69
No. Observations:                 360   AIC:                            -1203.
Df Residuals:                     356   BIC:                            -1188.
Df Model:                           3                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0157      0.003      5.773      0.0

In [170]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 12.164385290098657


In [171]:
#winner portfolio
x = np.array(monthly_rets2[['smlRet', 'medret','lrgRet']])
y = np.array(monthly_rets2['winners'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.650
Model:                            OLS   Adj. R-squared:                  0.647
Method:                 Least Squares   F-statistic:                     128.3
Date:                Fri, 03 May 2024   Prob (F-statistic):           2.39e-56
Time:                        13:41:48   Log-Likelihood:                 663.62
No. Observations:                 360   AIC:                            -1319.
Df Residuals:                     356   BIC:                            -1304.
Df Model:                           3                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0068      0.002     -3.465      0.0

In [172]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 75.32350060158056


# Section VII: Market Efficiency and Illiquidty

In [173]:
std = pd.read_csv('/users/henry/desktop/Uni Work/Final Year/Dissertation/Data/all_share_index.csv')
std['date'] = pd.to_datetime(std['DATE'])
std['month'] = [i.month for i in std['date']]
std['year'] = [i.year for i in std['date']]
std['pct_change'] = std['FTARI'].pct_change()
std = std.dropna(subset='pct_change')
std = std.groupby(['year', 'month'])['pct_change'].std().reset_index()
std = std.rename(columns={'pct_change' : 'mrktStd'})
std['mrktStd'] = std['mrktStd'] 
std = std[['month', 'year', 'mrktStd']]


lseRets['year'] = [i.year for i in lseRets['date'].tolist()]
lseRets['month'] = [i.month for i in lseRets['date'].tolist()]
rets = lseRets.copy(deep=True)
rets = lseRets[['year', 'month', 'mrktRet']]

monthly_rets2 = monthly_rets2.merge(std, how='left', on=['year', 'month'])
monthly_rets2 = monthly_rets2.merge(rets, how='left', on=['year','month'])


In [176]:
#contrarian strategy
x = np.array(monthly_rets2[['mrktStd', 'smlRet']])
y = np.array(monthly_rets2['long_short'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.138
Model:                            OLS   Adj. R-squared:                  0.133
Method:                 Least Squares   F-statistic:                     18.25
Date:                Fri, 03 May 2024   Prob (F-statistic):           2.84e-08
Time:                        13:41:55   Log-Likelihood:                 484.14
No. Observations:                 360   AIC:                            -962.3
Df Residuals:                     357   BIC:                            -950.6
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0002      0.008     -0.022      0.9

In [177]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 10.219599864451713


In [178]:
#loser portfolio 

x = np.array(monthly_rets2[['mrktStd', 'smlRet']])
y = np.array(monthly_rets2['losers'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.727
Model:                            OLS   Adj. R-squared:                  0.726
Method:                 Least Squares   F-statistic:                     552.0
Date:                Fri, 03 May 2024   Prob (F-statistic):          5.75e-110
Time:                        13:41:56   Log-Likelihood:                 604.48
No. Observations:                 360   AIC:                            -1203.
Df Residuals:                     357   BIC:                            -1191.
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0041      0.005     -0.759      0.4

In [179]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 31.868839316270424


In [180]:
#winner portfolio 

x = np.array(monthly_rets2[['mrktStd', 'smlRet']])
y = np.array(monthly_rets2['winners'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.592
Model:                            OLS   Adj. R-squared:                  0.590
Method:                 Least Squares   F-statistic:                     155.1
Date:                Fri, 03 May 2024   Prob (F-statistic):           3.28e-49
Time:                        13:41:57   Log-Likelihood:                 636.00
No. Observations:                 360   AIC:                            -1266.
Df Residuals:                     357   BIC:                            -1254.
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0039      0.006     -0.649      0.5

In [182]:
#heteroscedasticity test 

white_test = het_white(results.resid,  results.model.exog)

print('White\'s Test Statistic', white_test[0])

White's Test Statistic 7.204784088680296


# Section VIII: Limitations and Robustness

In [60]:
top50Rets_94_to_04, mom_output_94_to_04, ewretdat2, port, ewretdat, ewretdat3 = top_bottom_50(umd, lspd_m, '1994-01-01', '2004-01-01')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _tmp_ret['ret'] = np.exp(_tmp_ret.logret)-1 #unlogging the return


In [61]:
mom_output_94_to_04

Unnamed: 0,momr,mean,t-stat,p-value
0,winners,-0.000247,-0.034044,0.9728994
1,losers,0.041688,5.098876,1.303903e-06
2,long_short,0.041935,5.691216,9.22669e-08


In [62]:
top50Rets_04_to_14, mom_output_04_to_14, ewretdat2, port, ewretdat, ewretdat3 = top_bottom_50(umd, lspd_m, '2004-01-01', '2014-01-01')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _tmp_ret['ret'] = np.exp(_tmp_ret.logret)-1 #unlogging the return


In [63]:
mom_output_04_to_14

Unnamed: 0,momr,mean,t-stat,p-value
0,winners,0.007082,1.287556,0.200399
1,losers,0.018753,2.370849,0.019354
2,long_short,0.011671,2.2416,0.026841


In [64]:
top50Rets_12_to_24, mom_output_14_to_24, ewretdat2, port, ewretdat, ewretdat3 = top_bottom_50(umd, lspd_m, '2014-01-01', '2024-01-01')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _tmp_ret['ret'] = np.exp(_tmp_ret.logret)-1 #unlogging the return


In [65]:
mom_output_14_to_24

Unnamed: 0,momr,mean,t-stat,p-value
0,winners,0.003184,0.671538,0.503179
1,losers,0.017969,2.406719,0.017634
2,long_short,0.014785,2.692806,0.008108
