In [1]:
import datetime
import numpy as np
import pandas as pd
from tqdm import tqdm 
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
from pandas.tseries.offsets import *
from statsmodels.stats.diagnostic import het_white
from finance_byu.fama_macbeth import fama_macbeth, fama_macbeth_parallel, fm_summary, fama_macbeth_numba

### Data Importation and Cleaning

In [3]:
#load in the data
lspd_m1 = pd.read_csv('/users/henry/desktop/Uni Work/Final Year/Dissertation/Data/LSPD_M4.csv')

  lspd_m1 = pd.read_csv('/users/henry/desktop/Uni Work/Final Year/Dissertation/Data/LSPD_M4.csv')


In [4]:
lspd_m = lspd_m1.copy(deep=True) # this allows us to go back to the original unedited file if needed 

lspd_m['R21'] = pd.to_datetime(lspd_m['R21']) #converting dates to datetime

lspd_m['R21'] = lspd_m['R21'] + MonthEnd(0)

lspd_m['R6'] = pd.to_datetime(lspd_m['R6']) #converting start date to datetime

lspd_m['R6'] = lspd_m['R6'] + MonthEnd(0)

lspd_m = lspd_m.rename(columns={'R21':'date', 'P4' : 'prc', 'R22' : 'logret', 'G1' : 'compNam', 'A4' : 'mrktCp'}) #re-naming some of the columns for legibility 

In [5]:
# cleaning the data. See LSPD docs fod column names and information

lspd_m = lspd_m.sort_values(by=['compNam', 'date'])

lspd_m['rawRet'] = np.exp(lspd_m['logret'])-1 #taking the expoential of the logreturns to convert to simple returns

lspd_m['rawRet_bfill'] = lspd_m.groupby('compNam')['rawRet'].bfill().tolist() #if there is a return after this then fill backwards to use in filtering

lspd_m['rawRetShift'] = lspd_m.groupby('compNam')['rawRet'].shift() #shifting forward by 1 

lspd_m['rawRetBack'] = lspd_m.groupby('compNam')['rawRet'].shift(-1) #shifting backward by 1 

lspd_m['P8Bfill'] = lspd_m.groupby('compNam')['P8'].bfill().tolist() #backfilling price collection status 

lspd_m.loc[(lspd_m['P8'] == 4) & (lspd_m['rawRet_bfill'].isna()) & (lspd_m['G10'].isin([7,10,14, 16,20,21])) & (~lspd_m['rawRetShift'].isna()), 'rawRet'] = -0.99 #if an asset is suspended and ultimatley delisted with no value set the return in the next peiord to -0.999 (allows for dealing with logs)

lspd_m.loc[(abs(lspd_m['rawRet']) > 0) & (lspd_m['prc']==0) & (lspd_m['P8'] != 4),'rawRet'] = 0 #deal with incorrect dividend adjustments 

lspd_m = lspd_m[~(((lspd_m.P8 == 4.0) | (lspd_m.P8 == 0)) & (lspd_m.rawRet_bfill.isna()) & (lspd_m.rawRet.isna()))] #any suspended asset that cannot be traded in future as it is delisted is removed 

lspd_m = lspd_m[~lspd_m.P8Bfill.isna()] #if there was never a recorded accurate price after this remove the rows 

lspd_m['start_month_year'] = lspd_m.apply(lambda row: 1 if (row['date'].year == row['R6'].year) & (row['date'].month == row['R6'].month) else 0, axis = 1) #this marks all rows where the data is the first entry 

lspd_m = lspd_m[lspd_m.start_month_year != 1] #this removes the first entry as returns is empty 

lspd_m['rawRet'] = lspd_m.rawRet.fillna(0) #the assumption here can go back to the fact we are using transacted prices - hence if the price is suspended then the ret is 0 

lspd_m.loc[lspd_m['rawRet'] > 7, 'rawRet'] = 0 #removes outliers

lspd_m['logret'] = np.log(1+lspd_m['rawRet'])  #converting the cleaned data back into logs for addition at a later stage 

lspd_m.loc[(lspd_m['P8'] == 4) & (lspd_m['rawRet_bfill'].isna()) & (lspd_m['G10'].isin([7,14, 10,16,20,21])) & (~lspd_m['rawRetShift'].isna()), 'rawRet'] = -1 #setting to -100% once logs are dealt with 

lspd_m['mrktCp'] = lspd_m['mrktCp'] / 1000 #converting the market value into millions from '000s 

lspd_m['mrktCp'] = lspd_m['mrktCp'].apply(lambda x: np.NaN if x == 0 else x)

lspd_m['mrktCp'] = lspd_m['mrktCp'].ffill() #forward filling market cap when it equals zero 

lspd_m = lspd_m[~lspd_m.G16.isin([99,98,97,95,37,0])] # finally taking only LSE lists stocks (AIM and OTC are too illiquid to be considered here) and getting rid of investment trusts 


### Computing market returns, redisual returns and excess returns

In [6]:
#equal weight market return 
lseRets = lspd_m.groupby('date')['rawRet'].mean().reset_index() 
lseRets = lseRets.rename(columns={'rawRet':'mrktRet'})

In [7]:
#residual return (asset return minus mean return)
mean_ret = lspd_m.groupby('compNam')['rawRet'].mean().reset_index()
mean_ret = mean_ret.rename(columns={'rawRet' : 'mean_ret'})
lspd_m = lspd_m.merge(mean_ret, on='compNam', how='inner')
lspd_m['residualRet'] = lspd_m['rawRet'] - lspd_m['mean_ret']

In [8]:
#excess return (asset return minus market return)
lspd_m = lspd_m.merge(lseRets, how='inner', on='date')
lspd_m['excessRet'] = lspd_m['rawRet'] - lspd_m['mrktRet']

In [9]:
umd = lspd_m[['date', 'compNam', 'G18','G16', 'P8', 'R18', 'prc', 'residualRet', 'excessRet', 'logret', 'mrktCp', 'rawRet']]

#adding marketcaps
_tmp_mrktcp = lspd_m[['date', 'compNam', 'mrktCp']]
_tmp_mrktcp['mrktRnk'] = _tmp_mrktcp.groupby('date')['mrktCp'].transform(lambda x: pd.qcut(x, 3, labels=False)).tolist()
_tmp_mrktcp['mrktRnk'] = _tmp_mrktcp['mrktRnk'] + 1 #indexed at zero so add 1 
_tmp_mrktcp = _tmp_mrktcp[['date', 'compNam', 'mrktRnk']]

#removing stocks with fewer than 37 months of continuous return data 
counts = umd.groupby('compNam').count()
counts = counts[counts.date < 37]
umd = umd[~umd.compNam.isin(counts.index)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _tmp_mrktcp['mrktRnk'] = _tmp_mrktcp.groupby('date')['mrktCp'].transform(lambda x: pd.qcut(x, 3, labels=False)).tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _tmp_mrktcp['mrktRnk'] = _tmp_mrktcp['mrktRnk'] + 1 #indexed at zero so add 1


In [11]:
#removing stocks with less than 36 months of return data
umd = lspd_m
counts = umd.groupby('compNam').count()
counts = counts[counts.date < 37]
umd = umd[~umd.compNam.isin(counts.index)]

## testing autocorrelation of residuals

In [78]:
umd2 = umd.copy(deep=True) #deep cpoy avoids altering the frame 

In [79]:
umd2 = umd2.sort_values(by=['compNam', 'date']) 

In [80]:
#calculating the returns over the past 1-14, 24 and 36 months using log returns 
diffs = list(range(1,13,1))

for diff in diffs:
    
    umd2['diff_{}'.format(str(diff))] = umd2.groupby('compNam')['logret'].rolling(diff, min_periods=diff).sum().tolist() #calculating the rolling return and shifting down 1 
    umd2['diff_{}'.format(str(diff))] = umd2.groupby('compNam')['diff_{}'.format(str(diff))].shift().tolist() #shifting 1
    umd2['diff_{}'.format(str(diff))] = np.exp(umd2['diff_{}'.format(str(diff))])-1 #convert back to simple return
    
umd2['diff_24'] = umd2.groupby('compNam')['logret'].rolling(24, min_periods=24).sum().tolist()
umd2['diff_24'] = umd2.groupby('compNam')['diff_24'].shift().tolist() #calculating the rolling return and shifting down 1 
umd2['diff_24'] = np.exp(umd2['diff_24'])-1

umd2['diff_36'] = umd2.groupby('compNam')['logret'].rolling(36, min_periods=36).sum().tolist() 
umd2['diff_36'] = umd2.groupby('compNam')['diff_36'].shift().tolist() #calculating the rolling return and shifting down 1 
umd2['diff_36'] = np.exp(umd2['diff_36'])-1

    
    
umd2 = umd2.dropna(subset='diff_36') #removes all stocks in month t for which there is no return for the past 3 years (ensuring that we only compute regressions on avaialble stocks) 


In [81]:
umd2 = umd2[(umd2.date >= pd.to_datetime('01-01-1994')) & (umd2.date < pd.to_datetime('01-01-2024'))] #confining to testing period 

In [82]:
umd2 = umd2.merge(_tmp_mrktcp, on=['date', 'compNam'], how='inner')

In [83]:
results = fama_macbeth(umd2, 'date', 'residualRet', ['diff_1', 'diff_2', 'diff_3', 'diff_4', 'diff_5', 'diff_6',
       'diff_7', 'diff_8', 'diff_9', 'diff_10', 'diff_11', 'diff_12', 'diff_24', 'diff_36'], intercept = True)

summary = fm_summary(results)
summary

Unnamed: 0,mean,std_error,tstat
intercept,-0.002055,0.001876,-1.095431
diff_1,-0.057262,0.006413,-8.928729
diff_2,-0.004021,0.005454,-0.737274
diff_3,-0.004221,0.005867,-0.719438
diff_4,0.0059,0.005108,1.154875
diff_5,-0.005666,0.005243,-1.080691
diff_6,-0.002956,0.005595,-0.528279
diff_7,0.004163,0.005628,0.739786
diff_8,-0.000616,0.005007,-0.123104
diff_9,0.001676,0.00468,0.358238


In [84]:
summary.to_csv('/users/henry/desktop/summary_1.csv')

In [85]:
#marking the month of January in the data
umd2['month'] = [i.month for i in umd2['date'].tolist()]
umd3 = umd2[umd2.month == 1] #january returns
umd4 = umd2[umd2.month != 1] #non january returns

In [86]:
results_jan = fama_macbeth(umd3, 'date', 'residualRet', ['diff_1', 'diff_2', 'diff_3', 'diff_4', 'diff_5', 'diff_6',
       'diff_7', 'diff_8', 'diff_9', 'diff_10', 'diff_11', 'diff_12', 'diff_24', 'diff_36'], intercept = True)

summary = fm_summary(results_jan)
summary

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['intercept'] = 1


Unnamed: 0,mean,std_error,tstat
intercept,0.005486,0.006818,0.804605
diff_1,-0.088507,0.01689,-5.2401
diff_2,0.004107,0.018997,0.216208
diff_3,-0.0165,0.020436,-0.807401
diff_4,0.000701,0.014177,0.049446
diff_5,0.014281,0.016068,0.888792
diff_6,-0.028159,0.01684,-1.672131
diff_7,0.00226,0.017423,0.129726
diff_8,-0.009442,0.017467,-0.540575
diff_9,0.019014,0.014852,1.280205


In [87]:
summary.to_csv('/users/henry/desktop/summary_2.csv')

In [88]:
results_jan = fama_macbeth(umd4, 'date', 'residualRet', ['diff_1', 'diff_2', 'diff_3', 'diff_4', 'diff_5', 'diff_6',
       'diff_7', 'diff_8', 'diff_9', 'diff_10', 'diff_11', 'diff_12', 'diff_24', 'diff_36'], intercept = True)

summary = fm_summary(results_jan)
summary

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['intercept'] = 1


Unnamed: 0,mean,std_error,tstat
intercept,-0.002741,0.00195,-1.405837
diff_1,-0.054422,0.00681,-7.991117
diff_2,-0.00476,0.005701,-0.835
diff_3,-0.003105,0.006131,-0.506435
diff_4,0.006372,0.005426,1.174299
diff_5,-0.007479,0.005525,-1.353589
diff_6,-0.000665,0.0059,-0.112669
diff_7,0.004336,0.005939,0.730188
diff_8,0.000186,0.005232,0.035548
diff_9,0.0001,0.00492,0.020393


In [89]:
summary.to_csv('/users/henry/desktop/summary_3.csv')

## umd = umd.merge(_tmp_mrktcp, on=['date', 'compNam'], how='inner')

## Portfolio Formation

In [22]:
#running the algorithm with all the data 
top50Rets_all, mom_output_all, ewretdat2_all, port_all, ewretdat_all, ewretdat3_all = top_bottom_50(umd, lspd_m, '1994-01-01', '2024-01-01')

  umd = umd.groupby('date').apply(assign_momr)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _tmp_ret['ret'] = np.exp(_tmp_ret.logret)-1 #unlogging the return
100%|███████████████████████████████████████████| 43/43 [00:35<00:00,  1.20it/s]


In [23]:
top50Rets_all

Unnamed: 0_level_0,count,mean,std
momr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,360.0,0.026136,0.086523
2,360.0,0.009215,0.044719
3,360.0,0.003339,0.064861


In [24]:
mom_output_all

Unnamed: 0,momr,mean,t-stat,p-value
0,winners,0.003339,0.976877,0.3292881
1,losers,0.026136,5.731498,2.111193e-08
2,long_short,0.022797,6.362093,6.067066e-10


In [25]:
lseRets['mrktRet'].mean()

0.01098116678495507

#### Comission Check

In [26]:
ewretdat3_all['losers_comm'] = ewretdat3_all['losers'] - 0.01

In [27]:
ewretdat3_all['long_short_comm'] = ewretdat3_all['losers_comm'] - ewretdat3_all['winners']#removing comission or 0.5% per trade (full trade cost)
ewretdat3_all['long_short_comm'].describe()[['count','mean', 'std']].reset_index()

Unnamed: 0,index,long_short_comm
0,count,360.0
1,mean,0.012797
2,std,0.067988


In [28]:
pd.Series(stats.ttest_1samp(ewretdat3_all['long_short_comm'],0.0)).to_frame().T

Unnamed: 0,0,1
0,3.571343,0.000404


## Seasonality

In [90]:
ewretdat4 = ewretdat3_all.reset_index()
ewretdat4['month'] = ewretdat4['date'].apply(lambda x: x.month)
ewretdat4['non_jan'] = ewretdat4['month'].apply(lambda x: 'Jan.' if x == 1 else 'Feb.-Dec.')
#ewretdat4['non_jan'] = ewretdat4['month'].apply(lambda x: 'Jan.' if x == 1 else ('Feb-Sept.' if (x > 1) & (x<10) else 'Oct-Dec.'))

monthly_rets = ewretdat4.groupby('month')['long_short'].describe()[['count','mean', 'std']]
monthly_rets_grouped_all = ewretdat4.groupby('non_jan')['long_short'].describe()[['mean', 'std']]
monthly_rets_grouped_losers = ewretdat4.groupby('non_jan')['losers'].describe()[['mean', 'std']]
monthly_rets_grouped_winners = ewretdat4.groupby('non_jan')['winners'].describe()[['mean', 'std']]

jans = ewretdat4[ewretdat4.non_jan == 'Jan.']
non_jans = ewretdat4[ewretdat4.non_jan != 'Jan.']

t_losers_jan = pd.Series(stats.ttest_1samp(jans['losers'],0.0)).to_frame().T
t_winners_jan = pd.Series(stats.ttest_1samp(jans['winners'],0.0)).to_frame().T
t_long_short_jan = pd.Series(stats.ttest_1samp(jans['long_short'],0.0)).to_frame().T

t_losers_nonjan = pd.Series(stats.ttest_1samp(non_jans['losers'],0.0)).to_frame().T
t_winners_nonjan = pd.Series(stats.ttest_1samp(non_jans['winners'],0.0)).to_frame().T
t_long_short_nonjan = pd.Series(stats.ttest_1samp(non_jans['long_short'],0.0)).to_frame().T

print('Losers Jan: ', t_losers_jan[0][0])
print('Winners Jan: ', t_winners_jan[0][0])
print('Long Short Jan: ', t_long_short_jan[0][0])

print('Losers Non Jan: ', t_losers_nonjan[0][0])
print('Winners Non Jan: ', t_winners_nonjan[0][0])
print('Long Short Non Jan: ', t_long_short_nonjan[0][0])

Losers Jan:  3.6539446700314224
Winners Jan:  1.0903973657066743
Long Short Jan:  3.927263865668229
Losers Non Jan:  4.903086401891032
Winners Non Jan:  0.7398548048574216
Long Short Non Jan:  5.502567697087983


In [37]:
monthly_rets

Unnamed: 0_level_0,count,mean,std
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,30.0,0.047859,0.066748
2,30.0,0.010366,0.093139
3,30.0,0.031158,0.081481
4,30.0,0.049776,0.082993
5,30.0,0.01207,0.06691
6,30.0,0.020173,0.062463
7,30.0,0.00744,0.047758
8,30.0,0.03568,0.055161
9,30.0,0.018374,0.045807
10,30.0,0.021219,0.052433


In [38]:
monthly_rets_grouped_all

Unnamed: 0_level_0,mean,std
non_jan,Unnamed: 1_level_1,Unnamed: 2_level_1
Feb.-Dec.,0.020519,0.067739
Jan.,0.047859,0.066748


In [39]:
monthly_rets_grouped_losers

Unnamed: 0_level_0,mean,std
non_jan,Unnamed: 1_level_1,Unnamed: 2_level_1
Feb.-Dec.,0.023201,0.085959
Jan.,0.058429,0.087585


In [40]:
monthly_rets_grouped_winners

Unnamed: 0_level_0,mean,std
non_jan,Unnamed: 1_level_1,Unnamed: 2_level_1
Feb.-Dec.,0.002682,0.065855
Jan.,0.01057,0.053094


## Size Effect

In [41]:
_tmp_mrktcp = lspd_m[['date', 'compNam', 'mrktCp']]
umd_filter = umd.copy(deep=True)
umd_filter = umd_filter[['date', 'compNam', 'P8']] #only including 
_tmp_mrktcp = pd.merge(_tmp_mrktcp, umd_filter, on=['date', 'compNam'], how='inner') 
_tmp_mrktcp['mrktRnk'] = _tmp_mrktcp.groupby('date')['mrktCp'].transform(lambda x: pd.qcut(x, 10, labels=False)).tolist()
_tmp_mrktcp['mrktRnk'] = _tmp_mrktcp['mrktRnk'] + 1 #indexed at zero so add 1 
_tmp_mrktcp = _tmp_mrktcp[['date', 'compNam', 'mrktCp', 'mrktRnk']]

#joining the two
port_a = port_all.merge(_tmp_mrktcp, left_on=['form_date', 'compNam'], right_on=['date', 'compNam'], how='inner')
port_a = port_a.rename(columns={'date_x' : 'date'})
port_a = port_a[(port_a['date'] > pd.to_datetime('1993-01-01')) & (port_a['date'] < pd.to_datetime('2023-01-01'))]
umd2 = port_a.sort_values(by=['date','momr','form_date','compNam']).drop_duplicates()
rnks = umd2.groupby(['date','momr','form_date'])['mrktRnk'].mean().reset_index()
rnks.groupby(['momr'])['mrktRnk'].mean().reset_index()


Unnamed: 0,momr,mrktRnk
0,1,3.902824
1,2,5.689041
2,3,4.523705


In [42]:
umd2 = port_a.sort_values(by=['date','momr','form_date','compNam']).drop_duplicates()
rnks = umd2.groupby(['date','momr','form_date'])['mrktCp'].mean().reset_index()
rnks.groupby(['momr'])['mrktCp'].mean().reset_index()

Unnamed: 0,momr,mrktCp
0,1,856.562215
1,2,2762.568591
2,3,1137.042591


In [43]:
lspd_m.mrktCp.mean()

1575.006110929179

## CAPM Alpha

In [45]:
#loading in European Fama-French Dataset 
EurFF3 = pd.read_csv('/users/henry/downloads/Europe_5_Factors.csv')
EurFF3['date'] = [str(i)[:4]+'-'+str(i)[-2:] for i in EurFF3['date'].tolist()]
EurFF3['date'] = pd.to_datetime(EurFF3.date) 
EurFF3['date'] = EurFF3['date'] + MonthEnd(0)

#joining the portfolio data with the Fama-French factors
ewexcretdat = ewretdat3_all.copy(deep=True).reset_index()
ewexcretdat['date'] = ewexcretdat['date'] + MonthEnd(0)
ewexcretdat = ewexcretdat.merge(EurFF3, how='left', left_on='date', right_on='date')
ewexcretdat['RF']= ewexcretdat['RF'].bfill()

#computing excess returns 
ewexcretdat['excess_ret'] = ewexcretdat['long_short']  
lseRets['date'] = lseRets['date'] + MonthEnd(0)
ewexcretdat = ewexcretdat.merge(lseRets, how='inner', on=['date'])
ewexcretdat['mrkt_excess'] = ewexcretdat['mrktRet'] - ewexcretdat['RF']
ewexcretdat['excess_losers'] = ewexcretdat['losers'] - ewexcretdat['RF']
ewexcretdat['excess_winners'] = ewexcretdat['winners'] - ewexcretdat['RF']

#computing market varying beta 
ewexcretdat['d'] = ewexcretdat.mrkt_excess.apply(lambda x: 1 if x > 0 else 0) #d == 1 in a down market 
ewexcretdat['excess_d'] = ewexcretdat['mrkt_excess'] * ewexcretdat['d'] 
ewexcretdat['excess_d_minus_1'] = ewexcretdat['mrkt_excess'] * (1-ewexcretdat['d'])


CAPM_data = ewexcretdat[['date', 'excess_ret', 'mrkt_excess', 'excess_losers', 'excess_winners', 'excess_d', 'Mkt-RF','excess_d_minus_1', 'SMB', 'HML']]

In [94]:
x = np.array(CAPM_data[['mrkt_excess']])
x = sm.add_constant(x) 
y = np.array(CAPM_data['excess_ret'])
CAPM_model = sm.OLS(y, x)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                    0.1189
Date:                Mon, 29 Apr 2024   Prob (F-statistic):              0.730
Time:                        16:46:39   Log-Likelihood:                 457.59
No. Observations:                 360   AIC:                            -911.2
Df Residuals:                     358   BIC:                            -903.4
Df Model:                           1                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0214      0.005      4.171      0.0

In [95]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 6.731137030306074, 'Test Statistic p-value': 0.034542372672448834, 'F-Statistic': 3.4011148047675848, 'F-Test p-value': 0.03441996819329876}


In [97]:
x = np.array(CAPM_data[['mrkt_excess']])
x = sm.add_constant(x) 
y = np.array(CAPM_data['excess_losers'])
CAPM_model = sm.OLS(y, x)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.926
Model:                            OLS   Adj. R-squared:                  0.925
Method:                 Least Squares   F-statistic:                     3192.
Date:                Mon, 29 Apr 2024   Prob (F-statistic):          2.01e-180
Time:                        16:47:55   Log-Likelihood:                 545.02
No. Observations:                 360   AIC:                            -1086.
Df Residuals:                     358   BIC:                            -1078.
Df Model:                           1                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0194      0.005      4.286      0.0

In [98]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 24.438786087754636, 'Test Statistic p-value': 4.933839627770679e-06, 'F-Statistic': 13.000082058962331, 'F-Test p-value': 3.5502767156534678e-06}


In [99]:
y = np.array(CAPM_data['excess_winners'])
CAPM_model = sm.OLS(y, x)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.960
Model:                            OLS   Adj. R-squared:                  0.960
Method:                 Least Squares   F-statistic:                     8239.
Date:                Mon, 29 Apr 2024   Prob (F-statistic):          3.42e-249
Time:                        16:48:16   Log-Likelihood:                 662.87
No. Observations:                 360   AIC:                            -1322.
Df Residuals:                     358   BIC:                            -1314.
Df Model:                           1                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0020      0.002     -0.881      0.3

In [100]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 1.9242615629199777, 'Test Statistic p-value': 0.38207789350629034, 'F-Statistic': 0.9592403285417476, 'F-Test p-value': 0.38416923905551514}


In [103]:
x1 = np.array(CAPM_data[['excess_d', 'excess_d_minus_1']])
x1 = sm.add_constant(x1)
y1 = np.array(CAPM_data['excess_ret'])
CAPM_model = sm.OLS(y1, x1)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.059
Model:                            OLS   Adj. R-squared:                  0.054
Method:                 Least Squares   F-statistic:                     6.515
Date:                Mon, 29 Apr 2024   Prob (F-statistic):            0.00166
Time:                        16:54:18   Log-Likelihood:                 468.42
No. Observations:                 360   AIC:                            -930.8
Df Residuals:                     357   BIC:                            -919.2
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0069      0.005      1.301      0.1

In [104]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 5.086793729691479, 'Test Statistic p-value': 0.27850729349969494, 'F-Statistic': 1.2720094252178469, 'F-Test p-value': 0.28058079716505335}


In [106]:
x1 = np.array(CAPM_data[['excess_d', 'excess_d_minus_1']])
x1 = sm.add_constant(x1)
y1 = np.array(CAPM_data['excess_losers'])
CAPM_model = sm.OLS(y1, x1)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.934
Model:                            OLS   Adj. R-squared:                  0.934
Method:                 Least Squares   F-statistic:                     2186.
Date:                Mon, 29 Apr 2024   Prob (F-statistic):          5.26e-201
Time:                        16:55:07   Log-Likelihood:                 567.60
No. Observations:                 360   AIC:                            -1129.
Df Residuals:                     357   BIC:                            -1118.
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0032      0.004      0.755      0.4

In [107]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 2.8780374988203983, 'Test Statistic p-value': 0.5784385604755522, 'F-Statistic': 0.7152341632292312, 'F-Test p-value': 0.5819519253998915}


In [108]:
x1 = np.array(CAPM_data[['excess_d', 'excess_d_minus_1']])
x1 = sm.add_constant(x1)
y1 = np.array(CAPM_data['excess_winners'])
CAPM_model = sm.OLS(y1, x1)
results = CAPM_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.961
Model:                            OLS   Adj. R-squared:                  0.960
Method:                 Least Squares   F-statistic:                     4121.
Date:                Mon, 29 Apr 2024   Prob (F-statistic):          2.23e-247
Time:                        16:56:47   Log-Likelihood:                 663.31
No. Observations:                 360   AIC:                            -1321.
Df Residuals:                     357   BIC:                            -1309.
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0037      0.003     -1.316      0.1

In [109]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 2.7837296378788823, 'Test Statistic p-value': 0.5946445194570532, 'F-Statistic': 0.691614648770906, 'F-Test p-value': 0.5981527167706878}


In [58]:
std = pd.read_csv('/users/henry/desktop/Uni Work/Final Year/Dissertation/Data/all_share_index.csv')
std['date'] = pd.to_datetime(std['DATE'])
std['month'] = [i.month for i in std['date']]
std['year'] = [i.year for i in std['date']]
std['pct_change'] = std['FTARI'].pct_change()
std = std.dropna(subset='pct_change')
std = std.groupby(['year', 'month'])['pct_change'].std().reset_index()
std = std.rename(columns={'pct_change' : 'mrktStd'})
std['mrktStd'] = std['mrktStd'] 
std = std[['month', 'year', 'mrktStd']]


lseRets['year'] = [i.year for i in lseRets['date'].tolist()]
lseRets['month'] = [i.month for i in lseRets['date'].tolist()]
rets = lseRets.copy(deep=True)
rets = lseRets[['year', 'month', 'mrktRet']]


monthly_rets = ewretdat3_all.copy(deep=True).reset_index()
monthly_rets['year'] = [i.year for i in monthly_rets['date'].to_list()] 
monthly_rets['month'] = [i.month for i in monthly_rets['date'].to_list()]

monthly_rets2 = monthly_rets.merge(std, how='left', on=['year', 'month'])
monthly_rets2 = monthly_rets2.merge(rets, how='left', on=['year','month'])


In [59]:
x = np.array(monthly_rets2[['mrktStd', 'mrktRet']]) #to counter negative correlation you need to include both market std and return
y = np.array(monthly_rets2['long_short'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
#results = vol_model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.110
Model:                            OLS   Adj. R-squared:                  0.105
Method:                 Least Squares   F-statistic:                     13.49
Date:                Mon, 29 Apr 2024   Prob (F-statistic):           2.24e-06
Time:                        12:42:20   Log-Likelihood:                 478.46
No. Observations:                 360   AIC:                            -950.9
Df Residuals:                     357   BIC:                            -939.3
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0002      0.009     -0.026      0.9

In [60]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 12.179409204689025, 'Test Statistic p-value': 0.03241046184610721, 'F-Statistic': 2.479157917937757, 'F-Test p-value': 0.03170467693404771}


In [61]:
x = np.array(monthly_rets2[['mrktStd', 'mrktRet']])
y = np.array(monthly_rets2['losers'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.728
Model:                            OLS   Adj. R-squared:                  0.727
Method:                 Least Squares   F-statistic:                     432.5
Date:                Mon, 29 Apr 2024   Prob (F-statistic):           4.02e-96
Time:                        12:42:27   Log-Likelihood:                 605.10
No. Observations:                 360   AIC:                            -1204.
Df Residuals:                     357   BIC:                            -1193.
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0123      0.006     -1.999      0.0

In [62]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 18.6507185048901, 'Test Statistic p-value': 0.0022322963087943147, 'F-Statistic': 3.8683862592666265, 'F-Test p-value': 0.0019966144005494173}


In [63]:
x = np.array(monthly_rets2[['mrktStd', 'mrktRet']])
y = np.array(monthly_rets2['winners'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.653
Model:                            OLS   Adj. R-squared:                  0.651
Method:                 Least Squares   F-statistic:                     204.6
Date:                Mon, 29 Apr 2024   Prob (F-statistic):           6.26e-60
Time:                        12:42:28   Log-Likelihood:                 665.03
No. Observations:                 360   AIC:                            -1324.
Df Residuals:                     357   BIC:                            -1312.
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0120      0.004     -2.722      0.0

## Size Again

In [64]:
monthly_rets2['month'] = [i.month for i in monthly_rets2['date']]

lrgRet = umd[umd.mrktRnk == 3]
smlRet = umd[umd.mrktRnk == 1]
medRet = umd[umd.mrktRnk == 2]
lrgRet = lrgRet.groupby('date')['rawRet'].mean().reset_index()
smlRet = smlRet.groupby('date')['rawRet'].mean().reset_index()
medRet = medRet.groupby('date')['rawRet'].mean().reset_index()
lrgRet = lrgRet.rename(columns={'rawRet' : 'lrgRet'})
smlRet = smlRet.rename(columns={'rawRet' : 'smlRet'})
medRet = medRet.rename(columns={'rawRet' : 'medret'})
monthly_rets2 = monthly_rets2.merge(lrgRet, how='left', on='date')
monthly_rets2 = monthly_rets2.merge(smlRet, how='left', on='date')
monthly_rets2 = monthly_rets2.merge(medRet, how='left', on='date')
monthly_rets2['jan'] = monthly_rets2['month'].apply(lambda x: 1 if x == 1 else 0)

In [110]:
x = np.array(monthly_rets2[['mrktStd','smlRet']])
y = np.array(monthly_rets2['long_short'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.138
Model:                            OLS   Adj. R-squared:                  0.133
Method:                 Least Squares   F-statistic:                     18.25
Date:                Mon, 29 Apr 2024   Prob (F-statistic):           2.84e-08
Time:                        16:57:58   Log-Likelihood:                 484.14
No. Observations:                 360   AIC:                            -962.3
Df Residuals:                     357   BIC:                            -950.6
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0002      0.008     -0.022      0.9

In [None]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

In [111]:
x = np.array(monthly_rets2[['smlRet', 'medret','lrgRet']])
y = np.array(monthly_rets2['long_short'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.114
Model:                            OLS   Adj. R-squared:                  0.106
Method:                 Least Squares   F-statistic:                     10.88
Date:                Mon, 29 Apr 2024   Prob (F-statistic):           7.40e-07
Time:                        16:58:07   Log-Likelihood:                 479.26
No. Observations:                 360   AIC:                            -950.5
Df Residuals:                     356   BIC:                            -935.0
Df Model:                           3                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0224      0.004      6.042      0.0

In [112]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 61.60883602677794, 'Test Statistic p-value': 6.558541105608147e-10, 'F-Statistic': 8.02939050512284, 'F-Test p-value': 7.912744052002447e-11}


In [113]:
x = np.array(monthly_rets2[['smlRet', 'medret','lrgRet']])
y = np.array(monthly_rets2['losers'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.729
Model:                            OLS   Adj. R-squared:                  0.727
Method:                 Least Squares   F-statistic:                     212.5
Date:                Mon, 29 Apr 2024   Prob (F-statistic):           5.65e-79
Time:                        16:58:49   Log-Likelihood:                 605.69
No. Observations:                 360   AIC:                            -1203.
Df Residuals:                     356   BIC:                            -1188.
Df Model:                           3                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0157      0.003      5.773      0.0

In [114]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 12.164385290098657, 'Test Statistic p-value': 0.20419992187100536, 'F-Statistic': 1.3600086015999757, 'F-Test p-value': 0.20495368900795113}


In [115]:
x = np.array(monthly_rets2[['smlRet', 'medret','lrgRet']])
y = np.array(monthly_rets2['winners'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.650
Model:                            OLS   Adj. R-squared:                  0.647
Method:                 Least Squares   F-statistic:                     128.3
Date:                Mon, 29 Apr 2024   Prob (F-statistic):           2.39e-56
Time:                        16:59:37   Log-Likelihood:                 663.62
No. Observations:                 360   AIC:                            -1319.
Df Residuals:                     356   BIC:                            -1304.
Df Model:                           3                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0068      0.002     -3.465      0.0

In [116]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 75.32350060158056, 'Test Statistic p-value': 1.3641221377803907e-12, 'F-Statistic': 10.289740290495105, 'F-Test p-value': 4.1526002411844407e-14}


In [68]:
x = np.array(monthly_rets2[['mrktStd']])
y = np.array(monthly_rets2['losers'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
#results = vol_model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.060
Model:                            OLS   Adj. R-squared:                  0.058
Method:                 Least Squares   F-statistic:                     7.084
Date:                Mon, 29 Apr 2024   Prob (F-statistic):            0.00813
Time:                        12:42:49   Log-Likelihood:                 381.92
No. Observations:                 360   AIC:                            -759.8
Df Residuals:                     358   BIC:                            -752.1
Df Model:                           1                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0632      0.012      5.161      0.0

In [118]:
x = np.array(monthly_rets2[['mrktStd', 'smlRet']])
y = np.array(monthly_rets2['long_short'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.138
Model:                            OLS   Adj. R-squared:                  0.133
Method:                 Least Squares   F-statistic:                     18.25
Date:                Mon, 29 Apr 2024   Prob (F-statistic):           2.84e-08
Time:                        17:02:59   Log-Likelihood:                 484.14
No. Observations:                 360   AIC:                            -962.3
Df Residuals:                     357   BIC:                            -950.6
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0002      0.008     -0.022      0.9

In [119]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 10.219599864451713, 'Test Statistic p-value': 0.06924679324942007, 'F-Statistic': 2.068576941769144, 'F-Test p-value': 0.06877374370808499}


In [122]:
x = np.array(monthly_rets2[['mrktStd', 'smlRet']])
y = np.array(monthly_rets2['losers'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.727
Model:                            OLS   Adj. R-squared:                  0.726
Method:                 Least Squares   F-statistic:                     552.0
Date:                Mon, 29 Apr 2024   Prob (F-statistic):          5.75e-110
Time:                        17:04:34   Log-Likelihood:                 604.48
No. Observations:                 360   AIC:                            -1203.
Df Residuals:                     357   BIC:                            -1191.
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0041      0.005     -0.759      0.4

In [123]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 31.868839316270424, 'Test Statistic p-value': 6.30733785616773e-06, 'F-Statistic': 6.876255881612846, 'F-Test p-value': 3.850074225342466e-06}


In [125]:
x = np.array(monthly_rets2[['mrktStd', 'smlRet']])
y = np.array(monthly_rets2['winners'])
x = sm.add_constant(x) 
vol_model = sm.OLS(y, x)
results = vol_model.fit(cov_type='HC3')
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.592
Model:                            OLS   Adj. R-squared:                  0.590
Method:                 Least Squares   F-statistic:                     155.1
Date:                Mon, 29 Apr 2024   Prob (F-statistic):           3.28e-49
Time:                        17:05:29   Log-Likelihood:                 636.00
No. Observations:                 360   AIC:                            -1266.
Df Residuals:                     357   BIC:                            -1254.
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0039      0.006     -0.649      0.5

In [126]:
white_test = het_white(results.resid,  results.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']
#print results of White's test

print(dict(zip(labels, white_test)))

{'Test Statistic': 7.204784088680296, 'Test Statistic p-value': 0.20585031841208906, 'F-Statistic': 1.4458776379971845, 'F-Test p-value': 0.20711556877447174}


## Rolling Periods

In [73]:
top50Rets_93_to_02, mom_output_93_to_02, ewretdat2, port, ewretdat, ewretdat3 = top_bottom_50(umd, lspd_m, '1994-01-01', '2004-01-01')

  umd = umd.groupby('date').apply(assign_momr)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _tmp_ret['ret'] = np.exp(_tmp_ret.logret)-1 #unlogging the return
100%|███████████████████████████████████████████| 43/43 [00:36<00:00,  1.16it/s]


In [109]:
mom_output_93_to_02

Unnamed: 0,momr,mean,t-stat,p-value
0,winners,-0.000247,-0.034044,0.9728994
1,losers,0.041688,5.098876,1.303903e-06
2,long_short,0.041935,5.691216,9.22669e-08


In [145]:
top50Rets_02_to_13, mom_output_02_to_13, ewretdat2, port, ewretdat, ewretdat3 = top_bottom_50(umd, lspd_m, '2004-01-01', '2014-01-01')

  umd = umd.groupby('date').apply(assign_momr)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _tmp_ret['ret'] = np.exp(_tmp_ret.logret)-1 #unlogging the return
100%|███████████████████████████████████████████| 43/43 [00:35<00:00,  1.20it/s]


In [146]:
mom_output_02_to_13

Unnamed: 0,momr,mean,t-stat,p-value
0,winners,0.007082,1.287556,0.200399
1,losers,0.018753,2.370849,0.019354
2,long_short,0.011671,2.2416,0.026841


In [143]:
top50Rets_12_to_22, mom_output_12_to_22, ewretdat2, port, ewretdat, ewretdat3 = top_bottom_50(umd, lspd_m, '2014-01-01', '2024-01-01')

KeyboardInterrupt: 

In [144]:
mom_output_12_to_22

Unnamed: 0,momr,mean,t-stat,p-value
0,winners,0.003184,0.671538,0.503179
1,losers,0.017969,2.406719,0.017634
2,long_short,0.014785,2.692806,0.008108


## Testing Function

In [2]:
def top_bottom_50(umd, data, filter_1, filter_2, K=1):
    
    umd = umd.groupby('date').apply(assign_momr)
    umd.reset_index(inplace=True, drop=True)
    umd['momr']=umd.momr.astype(int)
    umd['momr'] = umd['momr']+1 #indexed at 0 so increase by 1 
    umd['form_date'] = umd['date']
    umd['medate'] = umd['date']+MonthEnd(0)
    umd['hdate1']= umd['medate']+MonthBegin(1)
    umd['hdate2']= umd['medate']+MonthEnd(K)
    umd = umd[['compNam', 'form_date','momr','hdate1','hdate2']]
    
    _tmp_ret = data[['compNam','date','logret']]
    _tmp_ret['ret'] = np.exp(_tmp_ret.logret)-1 #unlogging the return 
   
    #joining the ranking and return dataframes
    #chunking the join to reduce RAM load
    
    chunk_size = 10000  
    chunks = [umd[i:i+chunk_size] for i in range(0, umd.shape[0], chunk_size)]

    port = pd.DataFrame()
    
    for chunk in tqdm(chunks):
        
        merged_chunk = pd.merge(_tmp_ret, chunk, on=['compNam'], how='inner', sort=False)
        port = pd.concat([port, merged_chunk], ignore_index=True)
        port = port[(port['hdate1']<=port['date']) & (port['date']<=port['hdate2'])]
        
    
    umd2 = port.sort_values(by=['date','momr','form_date','compNam']).drop_duplicates()
    umd3 = umd2.groupby(['date','momr','form_date'])['ret'].mean().reset_index()

    # reduce sample based on size 
    umd3 = umd3[umd3['date'] < pd.to_datetime(filter_2)]
    umd3 = umd3[umd3['date'] >= pd.to_datetime(filter_1)]
    umd3 = umd3.sort_values(by=['date','momr'])
    
    # Create one return series per MOM group every month
    ewret = umd3.groupby(['date','momr'])['ret'].mean().reset_index()
    ewstd = umd3.groupby(['date','momr'])['ret'].std().reset_index()
    ewret = ewret.rename(columns={'ret':'ewret'})
    ewstd = ewstd.rename(columns={'ret':'ewretstd'})
    ewretdat = pd.merge(ewret, ewstd, on=['date','momr'], how='inner')
    ewretdat = ewretdat.sort_values(by=['momr'])
    
    #summarising the portfolio returns
    decileRets = ewretdat.groupby(['momr'])['ewret'].describe()[['count','mean', 'std']]
    
    # Transpose portfolio layout to have columns as portfolio returns
    ewretdat2 = ewretdat.pivot(index='date', columns='momr', values='ewret')
   
    # Add prefix port in front of each column
    ewretdat2 = ewretdat2.add_prefix('port')
    ewretdat2 = ewretdat2.rename(columns={'port3' :'winners', 'port1':'losers', 'port2' : 'middle'})
    ewretdat2['long_short'] = ewretdat2['losers'] - ewretdat2['winners']

    # Compute Long-Short Portfolio Cumulative Returns
    ewretdat3 = ewretdat2
    ewretdat3['1+losers']=1+ewretdat3['losers']
    ewretdat3['1+winners']=1+ewretdat3['winners']
    ewretdat3['1+ls'] = 1+ewretdat3['long_short']

    ewretdat3['cumret_winners']=ewretdat3['1+winners'].cumprod()-1
    ewretdat3['cumret_losers']=ewretdat3['1+losers'].cumprod()-1
    ewretdat3['cumret_long_short']=ewretdat3['1+ls'].cumprod()-1

    #################################
    # Portfolio Summary Statistics  #
    ################################# 

    # Mean 
    mom_mean = ewretdat3[['winners', 'losers', 'long_short']].mean().to_frame()
    mom_mean = mom_mean.rename(columns={0:'mean'}).reset_index()

    # T-Value and P-Value
    t_losers = pd.Series(stats.ttest_1samp(ewretdat3['losers'],0.0)).to_frame().T
    t_winners = pd.Series(stats.ttest_1samp(ewretdat3['winners'],0.0)).to_frame().T
    t_long_short = pd.Series(stats.ttest_1samp(ewretdat3['long_short'],0.0)).to_frame().T

    t_losers['momr']='losers'
    t_winners['momr']='winners'
    t_long_short['momr']='long_short'

    t_output =pd.concat([t_winners, t_losers, t_long_short])\
        .rename(columns={0:'t-stat', 1:'p-value'})

    # Combine mean, t and p
    mom_output1 = pd.merge(mom_mean, t_output, on=['momr'], how='inner')
    
    return decileRets, mom_output1, ewretdat2, port, ewretdat, ewretdat3

def assign_momr(group):
    
    mean_ret = group['rawRet'].mean()
    
    group['rawRet'] = [i if i > -1 else 0 for i in group['rawRet'].tolist()] #this line stops the stocks that were delisted in the previous period from being included in the 'bottom 50' portfolio 
    
    # Sort the group by 'ret' to identify top and bottom values
    sorted_group = group.sort_values(by='rawRet')

    # Get the top and bottom 50 values
    top_50_values = sorted_group['rawRet'].tail(50)
    bottom_50_values = sorted_group['rawRet'].head(50)

    # Function to assign momr value
    def get_momr_value(row):
        if row['rawRet'] in bottom_50_values.values:
            return 0
        elif row['rawRet'] in top_50_values.values:
            return 2
        else:
            return 1

    # Apply the function to assign 'momr'
    group['momr'] = group.apply(get_momr_value, axis=1)

    return group