In [390]:
import pandas as pd
import numpy as np
from finance_byu.summarize import summary
import statsmodels.formula.api as smf
from finance_byu.regtables import Regtable

In [391]:
df = pd.read_csv("../crsp_daily_cut_2.csv", index_col=0)
df.head(5)

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr
8248315,12490,1925-12-31,11,1,3570,144.5,,300.0,193.0
8248316,12490,1926-01-02,11,1,3570,147.5,0.020761,200.0,193.0
8248317,12490,1926-01-04,11,1,3570,145.75,-0.011864,100.0,193.0
8248318,12490,1926-01-05,11,1,3570,143.0,-0.018868,800.0,193.0
8248319,12490,1926-01-06,11,1,3570,141.0,-0.013986,1100.0,193.0


##### Lag price and return

In [392]:
df['prclag'] = df['prc'].shift(1)
df['retlag'] = df['ret'].shift(1)
df.head()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag
8248315,12490,1925-12-31,11,1,3570,144.5,,300.0,193.0,,
8248316,12490,1926-01-02,11,1,3570,147.5,0.020761,200.0,193.0,144.5,
8248317,12490,1926-01-04,11,1,3570,145.75,-0.011864,100.0,193.0,147.5,0.020761
8248318,12490,1926-01-05,11,1,3570,143.0,-0.018868,800.0,193.0,145.75,-0.011864
8248319,12490,1926-01-06,11,1,3570,141.0,-0.013986,1100.0,193.0,143.0,-0.018868


##### Up and down days

In [393]:
df['up'] = df['retlag'] > 0
df['up'] = df['up'].astype(int)
df['down'] = df['retlag'] < 0
df['down'] = df['down'].astype(int)
df.head()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,up,down
8248315,12490,1925-12-31,11,1,3570,144.5,,300.0,193.0,,,0,0
8248316,12490,1926-01-02,11,1,3570,147.5,0.020761,200.0,193.0,144.5,,0,0
8248317,12490,1926-01-04,11,1,3570,145.75,-0.011864,100.0,193.0,147.5,0.020761,1,0
8248318,12490,1926-01-05,11,1,3570,143.0,-0.018868,800.0,193.0,145.75,-0.011864,0,1
8248319,12490,1926-01-06,11,1,3570,141.0,-0.013986,1100.0,193.0,143.0,-0.018868,0,1


##### Calculate percent down - percent up

In [394]:
up_rolling = df.groupby('permno')['up'].rolling(231,231).sum().reset_index(drop=True)
down_rolling = df.groupby('permno')['down'].rolling(231,231).sum().reset_index(drop=True)
mask = (down_rolling + up_rolling) != 0
df['n-p'] = np.where(mask, (down_rolling - up_rolling) / (down_rolling + up_rolling), np.nan)
df.dropna(inplace=True) ##?
df

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,up,down,n-p
8248545,12490,1926-10-07,11,1,3570,49.000,0.012920,1100.0,579.0,48.375,-0.030075,0,1,-0.004926
8248546,12490,1926-10-08,11,1,3570,48.000,-0.020408,2600.0,579.0,49.000,0.012920,1,0,-0.009804
8248547,12490,1926-10-09,11,1,3570,47.625,-0.007813,300.0,579.0,48.000,-0.020408,0,1,-0.004878
8248548,12490,1926-10-11,11,1,3570,48.000,0.007874,1900.0,579.0,47.625,-0.007813,0,1,0.004878
8248549,12490,1926-10-13,11,1,3570,47.875,-0.002604,800.0,579.0,48.000,0.007874,1,0,-0.004878
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13655785,14593,2023-12-22,11,3,3571,193.600,-0.005548,36702500.0,15552800.0,194.680,-0.000770,0,1,-0.113043
13655786,14593,2023-12-26,11,3,3571,193.050,-0.002841,28541200.0,15552800.0,193.600,-0.005548,0,1,-0.104348
13655787,14593,2023-12-27,11,3,3571,193.150,0.000518,47538700.0,15552800.0,193.050,-0.002841,0,1,-0.104348
13655788,14593,2023-12-28,11,3,3571,193.580,0.002226,33691700.0,15552800.0,193.150,0.000518,1,0,-0.104348


##### Convert to monthly data

In [395]:
df['caldt'] = pd.to_datetime(df['caldt'])
df['mdt'] = df['caldt'].dt.to_period('M')
df = df.groupby(['permno','mdt']).apply(lambda x: x.iloc[-1]).reset_index(drop=True)
df = df.drop(columns=['up', 'down'])
df.head()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,n-p,mdt
0,12490,1926-10-30,11,1,3570,52.0,0.019608,700.0,579.0,51.0,-0.004878,-0.019608,1926-10
1,12490,1926-11-30,11,1,3570,53.625,0.014184,1100.0,579.0,52.875,-0.016279,-0.014925,1926-11
2,12490,1926-12-31,11,1,3570,54.25,0.014019,100.0,579.0,53.5,-0.006961,-0.059406,1926-12
3,12490,1927-01-31,11,1,3570,55.5,0.023042,500.0,579.0,54.25,-0.004587,-0.049505,1927-01
4,12490,1927-02-28,11,1,3570,60.0,-0.004149,2000.0,579.0,60.25,-0.004132,-0.083744,1927-02


##### Calculate momentum

In [396]:
df['logret'] = np.log(1 + df['retlag'])
df['mom'] = df.groupby('permno')['logret'].rolling(11,11).sum().reset_index(drop=True, level=0)
df['mom'] = df.groupby('permno')['mom'].shift(2)
df.tail()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,n-p,mdt,logret,mom
1655,14593,2023-08-31,11,3,3571,187.87,0.001172,60621800.0,15634200.0,187.65,0.019172,-0.06087,2023-08,0.018991,0.051771
1656,14593,2023-09-29,11,3,3571,171.21,0.003046,51786600.0,15550100.0,170.69,0.001526,-0.06087,2023-09,0.001525,0.080613
1657,14593,2023-10-31,11,3,3571,170.77,0.002819,44809800.0,15552800.0,170.29,0.012305,-0.078261,2023-10,0.01223,0.14997
1658,14593,2023-11-30,11,3,3571,189.95,0.003063,48408200.0,15552800.0,189.37,-0.00541,-0.13913,2023-11,-0.005425,0.07866
1659,14593,2023-12-29,11,3,3571,192.53,-0.005424,42120700.0,15552800.0,193.58,0.002226,-0.104348,2023-12,0.002224,0.112265


##### Calculate information discreteness

In [397]:
df['id'] = np.sign(df['mom']) * df['n-p']
df.tail()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,n-p,mdt,logret,mom,id
1655,14593,2023-08-31,11,3,3571,187.87,0.001172,60621800.0,15634200.0,187.65,0.019172,-0.06087,2023-08,0.018991,0.051771,-0.06087
1656,14593,2023-09-29,11,3,3571,171.21,0.003046,51786600.0,15550100.0,170.69,0.001526,-0.06087,2023-09,0.001525,0.080613,-0.06087
1657,14593,2023-10-31,11,3,3571,170.77,0.002819,44809800.0,15552800.0,170.29,0.012305,-0.078261,2023-10,0.01223,0.14997,-0.078261
1658,14593,2023-11-30,11,3,3571,189.95,0.003063,48408200.0,15552800.0,189.37,-0.00541,-0.13913,2023-11,-0.005425,0.07866,-0.13913
1659,14593,2023-12-29,11,3,3571,192.53,-0.005424,42120700.0,15552800.0,193.58,0.002226,-0.104348,2023-12,0.002224,0.112265,-0.104348


##### Bin on momentum

In [398]:
df['mombins'] = df.groupby("permno")['mom'].transform(pd.cut,[-100,0,100], labels=False)
df.tail()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,n-p,mdt,logret,mom,id,mombins
1655,14593,2023-08-31,11,3,3571,187.87,0.001172,60621800.0,15634200.0,187.65,0.019172,-0.06087,2023-08,0.018991,0.051771,-0.06087,1.0
1656,14593,2023-09-29,11,3,3571,171.21,0.003046,51786600.0,15550100.0,170.69,0.001526,-0.06087,2023-09,0.001525,0.080613,-0.06087,1.0
1657,14593,2023-10-31,11,3,3571,170.77,0.002819,44809800.0,15552800.0,170.29,0.012305,-0.078261,2023-10,0.01223,0.14997,-0.078261,1.0
1658,14593,2023-11-30,11,3,3571,189.95,0.003063,48408200.0,15552800.0,189.37,-0.00541,-0.13913,2023-11,-0.005425,0.07866,-0.13913,1.0
1659,14593,2023-12-29,11,3,3571,192.53,-0.005424,42120700.0,15552800.0,193.58,0.002226,-0.104348,2023-12,0.002224,0.112265,-0.104348,1.0


In [399]:
df['idbins'] = df.groupby("permno")['id'].transform(pd.cut,5, labels=False)
df.tail()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,n-p,mdt,logret,mom,id,mombins,idbins
1655,14593,2023-08-31,11,3,3571,187.87,0.001172,60621800.0,15634200.0,187.65,0.019172,-0.06087,2023-08,0.018991,0.051771,-0.06087,1.0,2.0
1656,14593,2023-09-29,11,3,3571,171.21,0.003046,51786600.0,15550100.0,170.69,0.001526,-0.06087,2023-09,0.001525,0.080613,-0.06087,1.0,2.0
1657,14593,2023-10-31,11,3,3571,170.77,0.002819,44809800.0,15552800.0,170.29,0.012305,-0.078261,2023-10,0.01223,0.14997,-0.078261,1.0,1.0
1658,14593,2023-11-30,11,3,3571,189.95,0.003063,48408200.0,15552800.0,189.37,-0.00541,-0.13913,2023-11,-0.005425,0.07866,-0.13913,1.0,1.0
1659,14593,2023-12-29,11,3,3571,192.53,-0.005424,42120700.0,15552800.0,193.58,0.002226,-0.104348,2023-12,0.002224,0.112265,-0.104348,1.0,1.0


##### Portfolio level

In [400]:
port = df.groupby(['caldt','mombins','idbins'])['retlag'].mean()*100
port

caldt       mombins  idbins
1927-10-31  1.0      1.0      -1.22950
1927-11-30  1.0      1.0       1.01780
1927-12-31  1.0      1.0       3.15220
1928-01-31  1.0      1.0      -0.60300
1928-02-29  1.0      1.0       1.39720
                                ...   
2023-09-29  1.0      1.0      -1.11060
                     2.0       0.15260
2023-10-31  1.0      1.0       0.65385
2023-11-30  1.0      1.0      -0.02635
2023-12-29  1.0      1.0       0.20000
Name: retlag, Length: 1521, dtype: float64

In [401]:
port.unstack(level=['idbins'])

Unnamed: 0_level_0,idbins,0.0,1.0,2.0,3.0,4.0
caldt,mombins,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1927-10-31,1.0,,-1.22950,,,
1927-11-30,1.0,,1.01780,,,
1927-12-31,1.0,,3.15220,,,
1928-01-31,1.0,,-0.60300,,,
1928-02-29,1.0,,1.39720,,,
...,...,...,...,...,...,...
2023-08-31,1.0,,0.28000,1.9172,,
2023-09-29,1.0,,-1.11060,0.1526,,
2023-10-31,1.0,,0.65385,,,
2023-11-30,1.0,,-0.02635,,,
