In [348]:
import pandas as pd
import numpy as np
from finance_byu.summarize import summary
import statsmodels.formula.api as smf
from finance_byu.regtables import Regtable

In [349]:
df = pd.read_csv("../crsp_daily_cut_2.csv", index_col=0)
df.head(5)

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr
8248315,12490,1925-12-31,11,1,3570,144.5,,300.0,193.0
8248316,12490,1926-01-02,11,1,3570,147.5,0.020761,200.0,193.0
8248317,12490,1926-01-04,11,1,3570,145.75,-0.011864,100.0,193.0
8248318,12490,1926-01-05,11,1,3570,143.0,-0.018868,800.0,193.0
8248319,12490,1926-01-06,11,1,3570,141.0,-0.013986,1100.0,193.0


##### Lag price and return

In [350]:
df['prclag'] = df['prc'].shift(1)
df['retlag'] = df['ret'].shift(1)
df.head()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag
8248315,12490,1925-12-31,11,1,3570,144.5,,300.0,193.0,,
8248316,12490,1926-01-02,11,1,3570,147.5,0.020761,200.0,193.0,144.5,
8248317,12490,1926-01-04,11,1,3570,145.75,-0.011864,100.0,193.0,147.5,0.020761
8248318,12490,1926-01-05,11,1,3570,143.0,-0.018868,800.0,193.0,145.75,-0.011864
8248319,12490,1926-01-06,11,1,3570,141.0,-0.013986,1100.0,193.0,143.0,-0.018868


##### Up and down days

In [351]:
df['up'] = df['retlag'] > 0
df['up'] = df['up'].astype(int)
df['down'] = df['retlag'] < 0
df['down'] = df['down'].astype(int)
df.head()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,up,down
8248315,12490,1925-12-31,11,1,3570,144.5,,300.0,193.0,,,0,0
8248316,12490,1926-01-02,11,1,3570,147.5,0.020761,200.0,193.0,144.5,,0,0
8248317,12490,1926-01-04,11,1,3570,145.75,-0.011864,100.0,193.0,147.5,0.020761,1,0
8248318,12490,1926-01-05,11,1,3570,143.0,-0.018868,800.0,193.0,145.75,-0.011864,0,1
8248319,12490,1926-01-06,11,1,3570,141.0,-0.013986,1100.0,193.0,143.0,-0.018868,0,1


##### Calculate percent down - percent up

In [352]:
up_rolling = df.groupby('permno')['up'].rolling(231,231).sum().reset_index(drop=True)
down_rolling = df.groupby('permno')['down'].rolling(231,231).sum().reset_index(drop=True)
mask = (down_rolling + up_rolling) != 0
df['n-p'] = np.where(mask, (down_rolling - up_rolling) / (down_rolling + up_rolling), np.nan)
df.tail()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,up,down,n-p
13655785,14593,2023-12-22,11,3,3571,193.6,-0.005548,36702500.0,15552800.0,194.68,-0.00077,0,1,-0.113043
13655786,14593,2023-12-26,11,3,3571,193.05,-0.002841,28541200.0,15552800.0,193.6,-0.005548,0,1,-0.104348
13655787,14593,2023-12-27,11,3,3571,193.15,0.000518,47538700.0,15552800.0,193.05,-0.002841,0,1,-0.104348
13655788,14593,2023-12-28,11,3,3571,193.58,0.002226,33691700.0,15552800.0,193.15,0.000518,1,0,-0.104348
13655789,14593,2023-12-29,11,3,3571,192.53,-0.005424,42120700.0,15552800.0,193.58,0.002226,1,0,-0.104348


##### Convert to monthly data

In [353]:
df['caldt'] = pd.to_datetime(df['caldt'])
df['mdt'] = df['caldt'].dt.to_period('M')
df = df.groupby(['permno','mdt']).apply(lambda x: x.iloc[-1]).reset_index(drop=True)
df = df.drop(columns=['up', 'down'])
df.head()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,n-p,mdt
0,12490,1925-12-31,11,1,3570,144.5,,300.0,193.0,,,,1925-12
1,12490,1926-01-30,11,1,3570,143.0,-0.015491,800.0,193.0,145.25,0.013962,,1926-01
2,12490,1926-02-27,11,1,3570,139.5,-0.007117,500.0,193.0,140.5,-0.017483,,1926-02
3,12490,1926-03-31,11,1,3570,40.375,0.059016,1700.0,579.0,38.125,-0.022436,,1926-03
4,12490,1926-04-30,11,1,3570,44.0,-0.002833,100.0,579.0,44.125,0.014368,,1926-04


##### Calculate momentum

In [354]:
df['logret'] = np.log(1 + df['retlag'])
df['mom'] = df.groupby('permno')['logret'].rolling(11,11).sum().reset_index(drop=True, level=0)
df['mom'] = df.groupby('permno')['mom'].shift(2)
df.tail()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,n-p,mdt,logret,mom
1689,14593,2023-08-31,11,3,3571,187.87,0.001172,60621800.0,15634200.0,187.65,0.019172,-0.06087,2023-08,0.018991,0.051771
1690,14593,2023-09-29,11,3,3571,171.21,0.003046,51786600.0,15550100.0,170.69,0.001526,-0.06087,2023-09,0.001525,0.080613
1691,14593,2023-10-31,11,3,3571,170.77,0.002819,44809800.0,15552800.0,170.29,0.012305,-0.078261,2023-10,0.01223,0.14997
1692,14593,2023-11-30,11,3,3571,189.95,0.003063,48408200.0,15552800.0,189.37,-0.00541,-0.13913,2023-11,-0.005425,0.07866
1693,14593,2023-12-29,11,3,3571,192.53,-0.005424,42120700.0,15552800.0,193.58,0.002226,-0.104348,2023-12,0.002224,0.112265


##### Calculate information discreteness

In [355]:
df['id'] = np.sign(df['mom']) * df['n-p']
df.tail()

Unnamed: 0,permno,caldt,shrcd,excd,siccd,prc,ret,vol,shr,prclag,retlag,n-p,mdt,logret,mom,id
1689,14593,2023-08-31,11,3,3571,187.87,0.001172,60621800.0,15634200.0,187.65,0.019172,-0.06087,2023-08,0.018991,0.051771,-0.06087
1690,14593,2023-09-29,11,3,3571,171.21,0.003046,51786600.0,15550100.0,170.69,0.001526,-0.06087,2023-09,0.001525,0.080613,-0.06087
1691,14593,2023-10-31,11,3,3571,170.77,0.002819,44809800.0,15552800.0,170.29,0.012305,-0.078261,2023-10,0.01223,0.14997,-0.078261
1692,14593,2023-11-30,11,3,3571,189.95,0.003063,48408200.0,15552800.0,189.37,-0.00541,-0.13913,2023-11,-0.005425,0.07866,-0.13913
1693,14593,2023-12-29,11,3,3571,192.53,-0.005424,42120700.0,15552800.0,193.58,0.002226,-0.104348,2023-12,0.002224,0.112265,-0.104348
