In [88]:
import pandas as pd
import numpy as np
from finance_byu.summarize import summary
import matplotlib.pyplot as plt
import seaborn as sns

In [89]:
# Read in crsp monthly data

df = pd.read_parquet("~/Data/crsp_monthly_clean.parquet")

df.head()

Unnamed: 0,permno,caldt,shrcd,exchcd,ticker,prc,ret,shrout
0,10000,1986-02-28,10.0,3.0,OMFGA,3.25,-0.257143,3680.0
1,10000,1986-03-31,10.0,3.0,OMFGA,4.4375,0.365385,3680.0
2,10000,1986-04-30,10.0,3.0,OMFGA,4.0,-0.098592,3793.0
3,10000,1986-05-30,10.0,3.0,OMFGA,3.10938,-0.222656,3793.0
4,10000,1986-06-30,10.0,3.0,OMFGA,3.09375,-0.005025,3793.0


In [90]:
# Cleaning

df = df[['permno', 'ticker', 'caldt', 'prc', 'ret']]

df.head()

Unnamed: 0,permno,ticker,caldt,prc,ret
0,10000,OMFGA,1986-02-28,3.25,-0.257143
1,10000,OMFGA,1986-03-31,4.4375,0.365385
2,10000,OMFGA,1986-04-30,4.0,-0.098592
3,10000,OMFGA,1986-05-30,3.10938,-0.222656
4,10000,OMFGA,1986-06-30,3.09375,-0.005025


In [91]:
# Calculate momentum feature
window = 11
skip = 1
holding_period = 6
holding_period_var = f'ret_{holding_period}'

# Log Returns
df['logret'] = np.log1p(df['ret'])

# Momentum from t-6 to t-1
df['mom'] = df.groupby('permno')['logret'].rolling(window,window).sum().reset_index(drop=True)
df['mom'] = df.groupby('permno')['mom'].shift(skip + 1) # Once for skip and once for lag

# Holding period returns
df[holding_period_var] = df.groupby('permno')['logret'].rolling(holding_period,holding_period).sum().reset_index(drop=True)
df[holding_period_var] = df.groupby('permno')[holding_period_var].shift(-(holding_period-1))

df

Unnamed: 0,permno,ticker,caldt,prc,ret,logret,mom,ret_6
0,10000,OMFGA,1986-02-28,3.25000,-0.257143,-0.297252,,-0.430783
1,10000,OMFGA,1986-03-31,4.43750,0.365385,0.311436,,-1.089044
2,10000,OMFGA,1986-04-30,4.00000,-0.098592,-0.103797,,-1.459321
3,10000,OMFGA,1986-05-30,3.10938,-0.222656,-0.251872,,-1.633155
4,10000,OMFGA,1986-06-30,3.09375,-0.005025,-0.005038,,-1.323014
...,...,...,...,...,...,...,...,...
3755649,93436,TSLA,2023-08-31,258.07999,-0.034962,-0.035588,-0.126770,
3755650,93436,TSLA,2023-09-29,250.22000,-0.030456,-0.030929,-0.030128,
3755651,93436,TSLA,2023-10-31,200.84000,-0.197346,-0.219832,-0.027402,
3755652,93436,TSLA,2023-11-30,240.08000,0.195379,0.178463,0.095016,


In [92]:
# Filtering by prc greater than 5 and months that momentum is known

df['prclag'] = df.groupby('permno')['prc'].shift(1)

df = df.query("mom == mom and prclag >= 5").reset_index(drop=True)

df

Unnamed: 0,permno,ticker,caldt,prc,ret,logret,mom,ret_6,prclag
0,10001,GFGC,1987-02-27,6.25000,-0.074074,-0.076961,0.196692,-0.083732,6.75000
1,10001,GFGC,1987-03-31,6.37500,0.036800,0.036139,0.140122,0.073271,6.25000
2,10001,GFGC,1987-04-30,6.12500,-0.039216,-0.040006,0.038273,0.014572,6.37500
3,10001,GFGC,1987-05-29,5.68750,-0.071429,-0.074108,0.064560,0.074380,6.12500
4,10001,GFGC,1987-06-30,5.87500,0.051429,0.050150,0.034407,0.118635,5.68750
...,...,...,...,...,...,...,...,...,...
2601186,93436,TSLA,2023-08-31,258.07999,-0.034962,-0.035588,-0.126770,,267.42999
2601187,93436,TSLA,2023-09-29,250.22000,-0.030456,-0.030929,-0.030128,,258.07999
2601188,93436,TSLA,2023-10-31,200.84000,-0.197346,-0.219832,-0.027402,,250.22000
2601189,93436,TSLA,2023-11-30,240.08000,0.195379,0.178463,0.095016,,200.84000


In [93]:
# Momentum decile bins

df['mombins'] = df.groupby("caldt")['mom'].transform(lambda x: pd.qcut(x, 10, labels=False))

df

Unnamed: 0,permno,ticker,caldt,prc,ret,logret,mom,ret_6,prclag,mombins
0,10001,GFGC,1987-02-27,6.25000,-0.074074,-0.076961,0.196692,-0.083732,6.75000,6
1,10001,GFGC,1987-03-31,6.37500,0.036800,0.036139,0.140122,0.073271,6.25000,5
2,10001,GFGC,1987-04-30,6.12500,-0.039216,-0.040006,0.038273,0.014572,6.37500,3
3,10001,GFGC,1987-05-29,5.68750,-0.071429,-0.074108,0.064560,0.074380,6.12500,4
4,10001,GFGC,1987-06-30,5.87500,0.051429,0.050150,0.034407,0.118635,5.68750,4
...,...,...,...,...,...,...,...,...,...,...
2601186,93436,TSLA,2023-08-31,258.07999,-0.034962,-0.035588,-0.126770,,267.42999,3
2601187,93436,TSLA,2023-09-29,250.22000,-0.030456,-0.030929,-0.030128,,258.07999,3
2601188,93436,TSLA,2023-10-31,200.84000,-0.197346,-0.219832,-0.027402,,250.22000,3
2601189,93436,TSLA,2023-11-30,240.08000,0.195379,0.178463,0.095016,,200.84000,6


In [94]:
# Form portfolios on momentum bins

# Portfolio df for summary stats
port = df.groupby(['caldt', 'mombins'])['ret_6'].mean().unstack(level=['mombins'])*100

# Drop rows from port where we don't know the holding return
port = port.dropna()

# Spread portfolios
port['spread'] = port[9] - port[0]

port.head()

mombins,0,1,2,3,4,5,6,7,8,9,spread
caldt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1927-01-31,-5.197556,-2.448131,11.643533,7.621118,9.571692,9.629253,8.758233,12.608271,4.248059,11.160383,16.357939
1927-02-28,4.796626,6.306724,12.770708,14.709732,7.759037,11.383104,14.379886,13.885929,15.132996,18.785181,13.988556
1927-03-31,0.992669,0.054121,4.02228,1.124624,4.647746,7.662631,13.231114,3.950478,10.189064,16.929125,15.936456
1927-04-30,9.624278,3.136658,10.154872,6.75645,10.302408,11.572056,18.326651,12.597155,13.19139,22.028354,12.404076
1927-05-31,-0.633419,-4.947915,2.559859,6.167719,9.59134,14.768194,9.544738,14.170407,14.90445,7.388392,8.021811


In [95]:
# Summary

summary(port).loc[['count','mean','std','tstat']].round(3)

mombins,0,1,2,3,4,5,6,7,8,9,spread
count,1159.0,1159.0,1159.0,1159.0,1159.0,1159.0,1159.0,1159.0,1159.0,1159.0,1159.0
mean,-4.241,-0.026,1.619,2.335,3.091,3.412,3.748,3.798,3.643,2.201,6.442
std,23.622,19.812,17.993,16.929,16.186,15.709,15.678,16.022,17.102,20.325,12.974
tstat,-6.112,-0.045,3.064,4.696,6.501,7.394,8.14,8.069,7.252,3.687,16.904
