In [1]:
import pandas as pd
import numpy as np
from finance_byu.summarize import summary
import statsmodels.formula.api as smf
from finance_byu.regtables import Regtable

Read in parsed monthly data

In [2]:
df = pd.read_csv("parsed_monthly.csv",index_col=0)
df.tail()

Unnamed: 0,permno,mdt,prc,up,down,total
4918548,93436,2023-08,258.08,9,14,23
4918549,93436,2023-09,250.22,8,12,20
4918550,93436,2023-10,200.84,10,12,22
4918551,93436,2023-11,240.08,15,6,21
4918552,93436,2023-12,248.48,11,9,20


Calculate %neg - %pos

In [3]:
df['%neg-%pos'] = (df['down'] - df['up']) / df['total']
df.head()

Unnamed: 0,permno,mdt,prc,up,down,total,%neg-%pos
0,10000,1986-01,4.375,8,2,18,-0.333333
1,10000,1986-02,3.25,2,10,19,0.421053
2,10000,1986-03,4.4375,6,2,20,-0.2
3,10000,1986-04,4.0,3,8,22,0.227273
4,10000,1986-05,3.10938,4,13,21,0.428571


Lag return for momentum calculation

In [4]:
df['ret'] = np.log(df['prc'] / df['prc'].shift(1))
df['ret_lag'] = df.groupby('permno')['ret'].shift(1)
df.head()

Unnamed: 0,permno,mdt,prc,up,down,total,%neg-%pos,ret,ret_lag
0,10000,1986-01,4.375,8,2,18,-0.333333,,
1,10000,1986-02,3.25,2,10,19,0.421053,-0.297252,
2,10000,1986-03,4.4375,6,2,20,-0.2,0.311436,-0.297252
3,10000,1986-04,4.0,3,8,22,0.227273,-0.103797,0.311436
4,10000,1986-05,3.10938,4,13,21,0.428571,-0.251871,-0.103797


Calculate t-2 to t+11 momentum

In [5]:
df['mom'] = df.groupby('permno')['ret_lag'].rolling(11,11).sum().reset_index(drop=True, level=0)
df['mom'] = df.groupby('permno')['mom'].shift(2)
df.dropna(inplace=True)
df.tail()

Unnamed: 0,permno,mdt,prc,up,down,total,%neg-%pos,ret,ret_lag,mom
4918548,93436,2023-08,258.08,9,14,23,0.217391,-0.035588,0.021392,-1.194592
4918549,93436,2023-09,250.22,8,12,20,0.2,-0.030929,-0.035588,-1.225383
4918550,93436,2023-10,200.84,10,12,22,0.090909,-0.219832,-0.030929,-0.030129
4918551,93436,2023-11,240.08,15,6,21,-0.428571,0.178464,-0.219832,-0.027403
4918552,93436,2023-12,248.48,11,9,20,-0.1,0.03439,0.178464,0.095014


Calculate ID

In [6]:
df['id'] = np.sign(df['mom']) * df['%neg-%pos']
df.tail()

Unnamed: 0,permno,mdt,prc,up,down,total,%neg-%pos,ret,ret_lag,mom,id
4918548,93436,2023-08,258.08,9,14,23,0.217391,-0.035588,0.021392,-1.194592,-0.217391
4918549,93436,2023-09,250.22,8,12,20,0.2,-0.030929,-0.035588,-1.225383,-0.2
4918550,93436,2023-10,200.84,10,12,22,0.090909,-0.219832,-0.030929,-0.030129,-0.090909
4918551,93436,2023-11,240.08,15,6,21,-0.428571,0.178464,-0.219832,-0.027403,0.428571
4918552,93436,2023-12,248.48,11,9,20,-0.1,0.03439,0.178464,0.095014,-0.1


Bin on momentum. Losers = 0. Winners = 1.

In [7]:
df['mombins'] = df.groupby("mdt")['mom'].transform(pd.cut,[-100,0,100], labels=False)
df.tail()

Unnamed: 0,permno,mdt,prc,up,down,total,%neg-%pos,ret,ret_lag,mom,id,mombins
4918548,93436,2023-08,258.08,9,14,23,0.217391,-0.035588,0.021392,-1.194592,-0.217391,0
4918549,93436,2023-09,250.22,8,12,20,0.2,-0.030929,-0.035588,-1.225383,-0.2,0
4918550,93436,2023-10,200.84,10,12,22,0.090909,-0.219832,-0.030929,-0.030129,-0.090909,0
4918551,93436,2023-11,240.08,15,6,21,-0.428571,0.178464,-0.219832,-0.027403,0.428571,0
4918552,93436,2023-12,248.48,11,9,20,-0.1,0.03439,0.178464,0.095014,-0.1,1


Bin on ID. Continuous (high quality) = 0. Discrete (low quality) = 4.

In [8]:
df['idbins'] = df.groupby(["mdt", "mombins"])['id'].transform(pd.cut,5, labels=False)
df.tail()

Unnamed: 0,permno,mdt,prc,up,down,total,%neg-%pos,ret,ret_lag,mom,id,mombins,idbins
4918548,93436,2023-08,258.08,9,14,23,0.217391,-0.035588,0.021392,-1.194592,-0.217391,0,1
4918549,93436,2023-09,250.22,8,12,20,0.2,-0.030929,-0.035588,-1.225383,-0.2,0,1
4918550,93436,2023-10,200.84,10,12,22,0.090909,-0.219832,-0.030929,-0.030129,-0.090909,0,2
4918551,93436,2023-11,240.08,15,6,21,-0.428571,0.178464,-0.219832,-0.027403,0.428571,0,3
4918552,93436,2023-12,248.48,11,9,20,-0.1,0.03439,0.178464,0.095014,-0.1,1,2


Group by month, id, and momentum. Calculate spread portfolio

In [9]:
port = df.groupby(['mdt','idbins','mombins'])['ret'].mean()*100
port = port.unstack(level=['mombins'])
port['spread'] = port[1] - port[0]
port

Unnamed: 0_level_0,mombins,0,1,spread
mdt,idbins,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1927-02,0,-5.778231,20.186325,25.964557
1927-02,1,2.597685,7.875589,5.277904
1927-02,2,6.246135,6.066611,-0.179524
1927-02,3,8.214620,2.119098,-6.095521
1927-02,4,20.842783,-1.065414,-21.908197
...,...,...,...,...
2023-12,0,13.250947,7.917390,-5.333557
2023-12,1,0.960784,8.254292,7.293508
2023-12,2,8.227642,5.102848,-3.124795
2023-12,3,14.132753,-7.545131,-21.677884


Results

In [10]:
port = port.unstack(level=['idbins'])
port.stack('idbins').groupby("idbins").mean()

mombins,0,1,spread
idbins,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,-14.735485,10.128565,24.86405
1,-8.558764,7.531275,16.081586
2,0.197527,0.511464,0.323469
3,8.667292,-7.100685,-15.79775
4,11.977296,-12.741692,-24.718988


In [11]:
summary(port).loc[['count','mean','std','tstat']].round(3) #Negative ID is continuous. Bin 0 is the most continuous

mombins,0,0,0,0,0,1,1,1,1,1,spread,spread,spread,spread,spread
idbins,0,1,2,3,4,0,1,2,3,4,0,1,2,3,4
count,1163.0,1162.0,1163.0,1160.0,1163.0,1163.0,1157.0,1161.0,1159.0,1163.0,1163.0,1156.0,1161.0,1156.0,1163.0
mean,-14.735,-8.559,0.198,8.667,11.977,10.129,7.531,0.511,-7.101,-12.742,24.864,16.082,0.323,-15.798,-24.719
std,19.21,8.911,8.155,9.043,14.439,8.653,7.746,6.76,7.631,14.784,20.221,9.182,5.809,9.95,19.126
tstat,-26.159,-32.74,0.826,32.642,28.289,39.918,33.071,2.578,-31.678,-29.392,41.934,59.551,1.897,-53.982,-44.074
