In [253]:
import pandas as pd
import numpy as np
from finance_byu.summarize import summary
import statsmodels.formula.api as smf
from finance_byu.regtables import Regtable

In [254]:
df = pd.read_csv("../crsp_daily_cut.csv", index_col=0)
df.head(5)

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr
109545,10026,2023-01-05,11,3,2052,149.6,-0.010713,60472.0,19229.0
109546,10026,2023-01-06,11,3,2052,152.64,0.020321,57034.0,19229.0
109547,10026,2023-01-09,11,3,2052,149.61,-0.019851,63214.0,19229.0
109548,10026,2023-01-10,11,3,2052,149.87,0.001738,112759.0,19229.0
109549,10026,2023-01-11,11,3,2052,152.39,0.016815,57350.0,19229.0


##### Lag Price

In [255]:
df['prclag'] = df['prc'].shift(1)
df['retlag'] = df['ret'].shift(1)
df.head()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag
109545,10026,2023-01-05,11,3,2052,149.6,-0.010713,60472.0,19229.0,,
109546,10026,2023-01-06,11,3,2052,152.64,0.020321,57034.0,19229.0,149.6,-0.010713
109547,10026,2023-01-09,11,3,2052,149.61,-0.019851,63214.0,19229.0,152.64,0.020321
109548,10026,2023-01-10,11,3,2052,149.87,0.001738,112759.0,19229.0,149.61,-0.019851
109549,10026,2023-01-11,11,3,2052,152.39,0.016815,57350.0,19229.0,149.87,0.001738


##### Up and down days

In [256]:
df['up'] = df['retlag'] > 0
df['up'] = df['up'].astype(int)
df['down'] = df['retlag'] < 0
df['down'] = df['down'].astype(int)
df.head()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,up,down
109545,10026,2023-01-05,11,3,2052,149.6,-0.010713,60472.0,19229.0,,,0,0
109546,10026,2023-01-06,11,3,2052,152.64,0.020321,57034.0,19229.0,149.6,-0.010713,0,1
109547,10026,2023-01-09,11,3,2052,149.61,-0.019851,63214.0,19229.0,152.64,0.020321,1,0
109548,10026,2023-01-10,11,3,2052,149.87,0.001738,112759.0,19229.0,149.61,-0.019851,0,1
109549,10026,2023-01-11,11,3,2052,152.39,0.016815,57350.0,19229.0,149.87,0.001738,1,0


##### Calculate percent down - percent up

In [257]:
df['up_rolling'] = df.groupby('permno')['up'].rolling(231,231).sum().reset_index(drop=True)
df['down_rolling'] = df.groupby('permno')['down'].rolling(231,231).sum().reset_index(drop=True)

total_days = df['up_rolling'] + df['down_rolling']

df['n-p'] = (df['down_rolling'] - df['up_rolling']) / total_days
df.head()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,up,down,up_rolling,down_rolling,n-p
109545,10026,2023-01-05,11,3,2052,149.6,-0.010713,60472.0,19229.0,,,0,0,,,
109546,10026,2023-01-06,11,3,2052,152.64,0.020321,57034.0,19229.0,149.6,-0.010713,0,1,,,
109547,10026,2023-01-09,11,3,2052,149.61,-0.019851,63214.0,19229.0,152.64,0.020321,1,0,,,
109548,10026,2023-01-10,11,3,2052,149.87,0.001738,112759.0,19229.0,149.61,-0.019851,0,1,,,
109549,10026,2023-01-11,11,3,2052,152.39,0.016815,57350.0,19229.0,149.87,0.001738,1,0,,,


##### Convert to monthly data

In [258]:
df['caldt'] = pd.to_datetime(df['caldt'])
df['mdt'] = df['caldt'].dt.to_period('M')
df = df.groupby(['permno','mdt']).apply(lambda x: x.iloc[-1]).reset_index(drop=True)
df = df.drop(columns=['up', 'down', 'up_rolling', 'down_rolling'])
df

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,n-p,mdt
0,10026,2023-01-31,11,3,2052,143.30,-0.039029,351006.0,19229.0,149.12,-0.003475,,2023-01
1,10026,2023-02-28,11,3,2052,141.21,-0.016095,80246.0,19229.0,143.52,-0.001600,,2023-02
2,10026,2023-03-31,11,3,2052,148.22,0.013124,117741.0,19252.0,146.30,0.003223,,2023-03
3,10026,2023-04-28,11,3,2052,153.20,0.001046,114493.0,19252.0,153.04,0.021970,,2023-04
4,10026,2023-05-31,11,3,2052,153.95,0.009641,103310.0,19252.0,152.48,-0.025376,,2023-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...
113743,93436,2023-08-31,11,3,9999,258.08,0.004593,108818000.0,3173990.0,256.90,-0.001089,,2023-08
113744,93436,2023-09-29,11,3,9999,250.22,0.015586,128429000.0,3179000.0,246.38,0.024449,,2023-09
113745,93436,2023-10-31,11,3,9999,200.84,0.017633,117996000.0,3178920.0,197.36,-0.047950,,2023-10
113746,93436,2023-11-30,11,3,9999,240.08,-0.016630,131686000.0,3178920.0,244.14,-0.010457,,2023-11


##### Calculate momentum

In [263]:
df['logret'] = np.log(1 + df['retlag'])
df['mom'] = df.groupby('permno')['logret'].rolling(11,11).sum().reset_index(drop=True, level=0)
df['mom'] = df.groupby('permno')['mom'].shift(2)
df

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,n-p,mdt,logret,mom
0,10026,2023-01-31,11,3,2052,143.30,-0.039029,351006.0,19229.0,149.12,-0.003475,,2023-01,-0.003481,
1,10026,2023-02-28,11,3,2052,141.21,-0.016095,80246.0,19229.0,143.52,-0.001600,,2023-02,-0.001601,
2,10026,2023-03-31,11,3,2052,148.22,0.013124,117741.0,19252.0,146.30,0.003223,,2023-03,0.003218,
3,10026,2023-04-28,11,3,2052,153.20,0.001046,114493.0,19252.0,153.04,0.021970,,2023-04,0.021732,
4,10026,2023-05-31,11,3,2052,153.95,0.009641,103310.0,19252.0,152.48,-0.025376,,2023-05,-0.025704,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113743,93436,2023-08-31,11,3,9999,258.08,0.004593,108818000.0,3173990.0,256.90,-0.001089,,2023-08,-0.001090,
113744,93436,2023-09-29,11,3,9999,250.22,0.015586,128429000.0,3179000.0,246.38,0.024449,,2023-09,0.024155,
113745,93436,2023-10-31,11,3,9999,200.84,0.017633,117996000.0,3178920.0,197.36,-0.047950,,2023-10,-0.049138,
113746,93436,2023-11-30,11,3,9999,240.08,-0.016630,131686000.0,3178920.0,244.14,-0.010457,,2023-11,-0.010512,
