In [94]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from finance_byu.summarize import summary

In [95]:
# Read in the data
df = pd.read_csv('crsp_daily_cut.csv')
# Drop unnamed column
df = df.drop(df.columns[0], axis=1)
# drop shrcd excd siccd vol shr 
df = df.drop(['shrcd', 'excd', 'siccd', 'vol', 'shr'], axis=1)
df.head()

Unnamed: 0,permno,caldt,prc,ret
0,10026,2023-01-05,149.6,-0.010713
1,10026,2023-01-06,152.64,0.020321
2,10026,2023-01-09,149.61,-0.019851
3,10026,2023-01-10,149.87,0.001738
4,10026,2023-01-11,152.39,0.016815


In [96]:
# Calculate the return type (positive or negative)
df['return_type'] = pd.cut(df['ret'], bins=[-float('inf'), 0, float('inf')], labels=['Negative', 'Positive'])

# Group by 'permno' and calculate cumulative sums of positive and negative returns
df['cumulative_pos'] = df.groupby('permno')['return_type'].transform(lambda x: (x == 'Positive').cumsum())
df['cumulative_neg'] = df.groupby('permno')['return_type'].transform(lambda x: (x == 'Negative').cumsum())

# Calculate the ratio of cumulative positive return days to cumulative negative return days for each permno on each day
df['Ratio'] = df['cumulative_pos'] / df['cumulative_neg'].replace(0, 1)  # Replace 0 denominator with 1 to avoid division by zero

# drop cumulatives
df = df.drop(['cumulative_pos', 'cumulative_neg', 'return_type'], axis=1)

df.head()

Unnamed: 0,permno,caldt,prc,ret,Ratio
0,10026,2023-01-05,149.6,-0.010713,0.0
1,10026,2023-01-06,152.64,0.020321,1.0
2,10026,2023-01-09,149.61,-0.019851,0.5
3,10026,2023-01-10,149.87,0.001738,1.0
4,10026,2023-01-11,152.39,0.016815,1.5


In [97]:
# Create a copy of the DataFrame
df1 = df.copy()

# Calculate the return type (positive or negative)
df1['return_type'] = pd.cut(df1['ret'], bins=[-float('inf'), 0, float('inf')], labels=['Negative', 'Positive'])

# Group by 'permno' and calculate cumulative sums of positive and negative returns
df1['cumulative_pos'] = df1.groupby('permno')['return_type'].transform(lambda x: (x == 'Positive').cumsum())
df1['cumulative_neg'] = df1.groupby('permno')['return_type'].transform(lambda x: (x == 'Negative').cumsum())

# Calculate the cumulative average of positive and negative returns for each company up to each day
df1['cumulative_pos_avg'] = df1.groupby('permno')['ret'].transform(lambda x: (x * (x >= 0)).expanding().mean())
df1['cumulative_neg_avg'] = df1.groupby('permno')['ret'].transform(lambda x: (x * (x < 0)).expanding().mean())

# Calculate the ratio of cumulative positive return days to cumulative negative return days for each permno on each day
df1['Ratio'] = df1['cumulative_pos']*df1['cumulative_pos_avg'] / abs(df1['cumulative_neg_avg'])*df1['cumulative_neg'].replace(0, 1)  # Replace 0 denominator with 1 to avoid division by zero

# Drop intermediate columns
df1 = df1.drop(['cumulative_pos', 'cumulative_neg', 'return_type', 'cumulative_pos_avg', 'cumulative_neg_avg'], axis=1)

df1.head()

Unnamed: 0,permno,caldt,prc,ret,Ratio
0,10026,2023-01-05,149.6,-0.010713,-0.0
1,10026,2023-01-06,152.64,0.020321,1.896854
2,10026,2023-01-09,149.61,-0.019851,1.329734
3,10026,2023-01-10,149.87,0.001738,2.886926
4,10026,2023-01-11,152.39,0.016815,7.631331


In [98]:
df['logret'] = np.log(1 + df['ret'])
df['mom'] = df.groupby('permno')['logret'].rolling(11,11).sum().reset_index(drop=True)
df['mom'] = df.groupby('permno')['mom'].shift(2)
df['prclag'] = df.groupby('permno')['prc'].shift()
df = df.query("mom == mom and prclag >= 5").reset_index(drop=True)

df['mom_bin'] = df.groupby('caldt')['mom'].transform(pd.qcut,3,labels=False)
df['ratio_bin'] = df.groupby('caldt')['Ratio'].transform(lambda x: pd.qcut(x, 3, labels=False, duplicates='drop'))

In [99]:
df1['logret'] = np.log(1 + df1['ret'])
df1['mom'] = df1.groupby('permno')['logret'].rolling(11,11).sum().reset_index(drop=True)
df1['mom'] = df1.groupby('permno')['mom'].shift(2)
df1['prclag'] = df1.groupby('permno')['prc'].shift()
df1 = df1.query("mom == mom and prclag >= 5").reset_index(drop=True)

df1['mom_bin'] = df1.groupby('caldt')['mom'].transform(pd.qcut,3,labels=False)
df1['ratio_bin'] = df1.groupby('caldt')['Ratio'].transform(lambda x: pd.qcut(x, 3, labels=False, duplicates='drop'))

  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)


In [100]:
port = df.groupby(['caldt','mom_bin','ratio_bin'])['ret'].mean()*100
port = port.unstack(level=['mom_bin','ratio_bin'])
port.head()

mom_bin,0,0,0,1,1,1,2,2,2
ratio_bin,0,1,2,0,1,2,0,1,2
caldt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
2023-01-24,-0.002701,0.281792,0.199749,-0.134059,-0.054423,0.094659,-1.12077,-0.555797,-0.212369
2023-01-25,0.120575,0.571927,0.370524,-0.012306,0.277225,0.368189,-0.261961,0.201115,0.647292
2023-01-26,-0.049151,0.53785,0.470857,0.344135,0.682268,0.684754,0.279968,0.999053,1.343204
2023-01-27,-0.080122,0.326716,0.453945,-0.031365,0.199427,0.214736,0.368772,0.879932,1.026651
2023-01-30,-0.459219,-0.351711,-0.054752,-1.025112,-0.890902,-0.637692,-1.967299,-1.869798,-1.442674


In [101]:
port1 = df1.groupby(['caldt','mom_bin','ratio_bin'])['ret'].mean()*100
port1 = port1.unstack(level=['mom_bin','ratio_bin'])
port1.head()

mom_bin,0,0,0,1,1,1,2,2,2
ratio_bin,0,1,2,0,1,2,0,1,2
caldt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
2023-01-24,-0.009516,0.315158,0.173209,-0.595141,0.058897,0.165606,-3.00021,-0.711715,-0.031414
2023-01-25,0.124013,0.3798,0.23106,-0.21957,0.258852,0.308802,-1.070762,0.067255,0.613645
2023-01-26,-0.018146,0.537755,0.30276,0.125415,0.703402,0.706768,-0.158372,0.920829,1.311192
2023-01-27,-0.091397,0.227184,0.180698,-0.222378,0.273355,0.147962,-0.11166,0.935037,0.976272
2023-01-30,-0.563685,-0.272659,0.007476,-1.440506,-0.941582,-0.473443,-2.724828,-2.226729,-1.225075


In [102]:
summary(port).loc[['mean','std','tstat']].round(3)

mom_bin,0,0,0,1,1,1,2,2,2
ratio_bin,0,1,2,0,1,2,0,1,2
mean,-0.086,0.094,0.206,-0.027,0.045,0.102,-0.114,0.036,0.151
std,0.975,0.989,0.946,0.667,0.738,0.719,1.002,0.932,0.869
tstat,-1.359,1.456,3.345,-0.611,0.933,2.176,-1.752,0.595,2.68


In [103]:
summary(port1).loc[['mean','std','tstat']].round(3)

mom_bin,0,0,0,1,1,1,2,2,2
ratio_bin,0,1,2,0,1,2,0,1,2
mean,-0.151,0.113,0.346,-0.099,0.041,0.151,-0.276,-0.019,0.21
std,1.022,0.933,0.991,0.767,0.715,0.664,1.073,0.94,0.87
tstat,-2.276,1.861,5.358,-1.988,0.871,3.501,-3.957,-0.306,3.703
