In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from finance_byu.summarize import summary



In [2]:
crsp_daily1 = pd.read_feather('C:/Users/benja/desktop/ACME_Senior/Fin585/Final Project/crsp_daily.ftr')

crsp_monthly1 = pd.read_feather('C:/Users/benja/desktop/ACME_Senior/Fin585/Final Project/crsp_monthly.ftr')

In [3]:
# for both datasets keep only the data post jan 1 2000 from caldt
crsp_daily2 = crsp_daily1[crsp_daily1['caldt'] >= '2010-01-01']
crsp_monthly2 = crsp_monthly1[crsp_monthly1['caldt'] >= '2010-01-01']

In [4]:
crsp_daily = crsp_daily2.copy()
crsp_monthly = crsp_monthly2.copy()

In [5]:
# Cleaning the daily dataset
crsp_daily['prc'] = abs(crsp_daily['prc'])
crsp_daily['prc_lag'] = crsp_daily.groupby('permno')['prc'].shift(1)
crsp_daily = crsp_daily[crsp_daily['ret'] > -1]
crsp_daily = crsp_daily[crsp_daily['prc_lag'] > 5]
crsp_daily.sort_values(by = ['permno', 'caldt'], inplace = True)
crsp_daily.drop(columns = ['shrcd', 'excd', 'siccd', 'vol', 'shr', 'prc_lag'], inplace = True)

# Adding column for positive and negative returns
crsp_daily['ret_class'] = np.where(crsp_daily['ret'].shift(1) >= 0, '1', '0')

# Calculate rolling yearly number of positive and negative days for each stock
n = 252
crsp_daily['pos_days'] = crsp_daily.groupby('permno').rolling(window = n, min_periods = n)['ret_class'].sum().reset_index(level=0, drop=True)
crsp_daily['neg_days'] = n - crsp_daily['pos_days']
crsp_daily['%pos'] = crsp_daily['pos_days'] / n
crsp_daily['%neg'] = crsp_daily['neg_days'] / n
crsp_daily['%neg - %pos'] = crsp_daily['%neg'] - crsp_daily['%pos']
crsp_daily.drop(columns = ['ret_class', 'pos_days', 'neg_days', '%pos', '%neg'], inplace = True)
crsp_daily.rename(columns = {'ret': 'daily_ret'}, inplace = True)
crsp_daily.dropna(inplace = True)

In [6]:
crsp_daily.head()

Unnamed: 0,permno,caldt,prc,daily_ret,%neg - %pos
6663,10001,2011-01-03,10.45,-0.006654,-0.119048
6664,10001,2011-01-04,10.44,-0.000957,-0.119048
6665,10001,2011-01-05,10.4,-0.003831,-0.119048
6666,10001,2011-01-06,10.43,0.002885,-0.111111
6667,10001,2011-01-07,10.49,0.005753,-0.119048


In [7]:
crsp_monthly.drop(columns = ['cusip', 'ticker', 'shrcd', 'excd', 'siccd', 'vol', 'shr', 'cumfacshr'], inplace = True)

# keep ret >-1
crsp_monthly = crsp_monthly[crsp_monthly['ret'] > -1]
crsp_monthly.rename(columns = {'ret': 'monthly_ret'}, inplace = True)
crsp_monthly.head()

Unnamed: 0,permno,caldt,prc,monthly_ret
305,10001,2010-01-29,10.06,-0.018932
306,10001,2010-02-26,10.0084,-0.000656
307,10001,2010-03-31,10.17,0.020643
308,10001,2010-04-30,11.39,0.124385
309,10001,2010-05-28,11.4,0.004829


In [8]:
crsp_monthly['logret'] = np.log(1 + crsp_monthly['monthly_ret'])
crsp_monthly['mom'] = crsp_monthly.groupby('permno')['logret'].rolling(11,11).sum().reset_index(drop=True, level=0)
crsp_monthly['mom'] = crsp_monthly.groupby('permno')['mom'].shift(2)
crsp_monthly.dropna(inplace=True)
crsp_monthly.tail()

Unnamed: 0,permno,caldt,prc,monthly_ret,logret,mom
4889699,93436,2023-08-31,258.07999,-0.034962,-0.035588,-0.12677
4889700,93436,2023-09-29,250.22,-0.030456,-0.030929,-0.030128
4889701,93436,2023-10-31,200.84,-0.197346,-0.219832,-0.027402
4889702,93436,2023-11-30,240.08,0.195379,0.178463,0.095016
4889703,93436,2023-12-29,248.48,0.034988,0.03439,0.03105


In [14]:
# I want to merge the two datasets on permno and caldt and keep everymonthly observation
crsp = pd.merge(crsp_daily, crsp_monthly, on = ['permno', 'caldt'])

# crsp = pd.merge(crsp_daily, crsp_monthly, on = ['permno', 'caldt'], how = 'left')

#fill any nans with the data that follows
# crsp.fillna(method = 'bfill', inplace = True)

crsp.drop(columns = ['logret', 'prc_x', 'prc_y'], inplace = True)

crsp

Unnamed: 0,permno,caldt,daily_ret,%neg - %pos,monthly_ret,mom
0,10001,2011-01-31,0.003724,-0.150794,0.028992,0.020414
1,10001,2011-02-28,-0.000910,-0.166667,0.022727,0.090498
2,10001,2011-03-31,0.033480,-0.166667,0.072404,0.119734
3,10001,2011-04-29,-0.004433,-0.111111,-0.038789,0.121773
4,10001,2011-05-31,0.012324,-0.111111,0.028050,0.074440
...,...,...,...,...,...,...
890976,93436,2023-08-31,0.004593,-0.047619,-0.034962,-0.126770
890977,93436,2023-09-29,0.015586,-0.023810,-0.030456,-0.030128
890978,93436,2023-10-31,0.017633,-0.015873,-0.197346,-0.027402
890979,93436,2023-11-30,-0.016630,-0.063492,0.195379,0.095016


In [15]:
crsp['id'] = np.sign(crsp['mom']) * crsp['%neg - %pos']
crsp.head()

Unnamed: 0,permno,caldt,daily_ret,%neg - %pos,monthly_ret,mom,id
0,10001,2011-01-31,0.003724,-0.150794,0.028992,0.020414,-0.150794
1,10001,2011-02-28,-0.00091,-0.166667,0.022727,0.090498,-0.166667
2,10001,2011-03-31,0.03348,-0.166667,0.072404,0.119734,-0.166667
3,10001,2011-04-29,-0.004433,-0.111111,-0.038789,0.121773,-0.111111
4,10001,2011-05-31,0.012324,-0.111111,0.02805,0.07444,-0.111111


In [16]:
crsp['mom_bins'] = crsp.groupby('caldt')['mom'].transform(pd.qcut,2,labels=False)
crsp.tail()

Unnamed: 0,permno,caldt,daily_ret,%neg - %pos,monthly_ret,mom,id,mom_bins
890976,93436,2023-08-31,0.004593,-0.047619,-0.034962,-0.12677,0.047619,0
890977,93436,2023-09-29,0.015586,-0.02381,-0.030456,-0.030128,0.02381,0
890978,93436,2023-10-31,0.017633,-0.015873,-0.197346,-0.027402,0.015873,0
890979,93436,2023-11-30,-0.01663,-0.063492,0.195379,0.095016,-0.063492,1
890980,93436,2023-12-29,-0.018564,-0.111111,0.034988,0.03105,-0.111111,1


In [17]:
crsp['id_bins'] = crsp.groupby(["caldt", "mom_bins"])['id'].transform(pd.qcut,5, labels=False)
crsp.tail()

Unnamed: 0,permno,caldt,daily_ret,%neg - %pos,monthly_ret,mom,id,mom_bins,id_bins
890976,93436,2023-08-31,0.004593,-0.047619,-0.034962,-0.12677,0.047619,0,4
890977,93436,2023-09-29,0.015586,-0.02381,-0.030456,-0.030128,0.02381,0,3
890978,93436,2023-10-31,0.017633,-0.015873,-0.197346,-0.027402,0.015873,0,3
890979,93436,2023-11-30,-0.01663,-0.063492,0.195379,0.095016,-0.063492,1,2
890980,93436,2023-12-29,-0.018564,-0.111111,0.034988,0.03105,-0.111111,1,1


In [18]:
crsp['id_bins2'] = crsp.groupby(["caldt"])['id'].transform(pd.qcut,5, labels=False)
crsp.tail()

Unnamed: 0,permno,caldt,daily_ret,%neg - %pos,monthly_ret,mom,id,mom_bins,id_bins,id_bins2
890976,93436,2023-08-31,0.004593,-0.047619,-0.034962,-0.12677,0.047619,0,4,4
890977,93436,2023-09-29,0.015586,-0.02381,-0.030456,-0.030128,0.02381,0,3,4
890978,93436,2023-10-31,0.017633,-0.015873,-0.197346,-0.027402,0.015873,0,3,3
890979,93436,2023-11-30,-0.01663,-0.063492,0.195379,0.095016,-0.063492,1,2,1
890980,93436,2023-12-29,-0.018564,-0.111111,0.034988,0.03105,-0.111111,1,1,0


In [20]:
port = crsp.groupby(['caldt','mom_bins','id_bins'])['monthly_ret'].mean()*100
port = port.unstack(level=['mom_bins','id_bins'])
port.head()

mom_bins,0,0,0,0,0,1,1,1,1,1
id_bins,0,1,2,3,4,0,1,2,3,4
caldt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2011-01-31,0.39837,0.233404,0.335189,0.028924,2.290503,1.100602,1.699894,1.15629,-0.04327,-2.372041
2011-02-28,1.890536,2.686756,2.982876,3.348363,3.38804,5.027067,5.244941,5.005476,3.591511,2.96817
2011-03-31,0.602787,0.701377,0.99269,0.387765,1.583712,2.375745,2.483227,2.730754,2.44277,1.438656
2011-04-29,2.227003,1.605172,1.722339,2.144838,1.566124,3.917121,3.794339,3.592654,2.574879,0.274601
2011-05-31,0.919072,-1.584099,-2.771343,-1.563316,1.710658,-0.858747,-1.084838,-1.952997,-1.897271,-3.426079


In [21]:
port1 = crsp.groupby(['caldt','mom_bins','id_bins2'])['monthly_ret'].mean()*100
port1 = port1.unstack(level=['mom_bins','id_bins2'])
port1.head()

mom_bins,0,0,0,0,0,1,1,1,1,1
id_bins2,0,1,2,3,4,0,1,2,3,4
caldt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2011-01-31,0.419199,0.299425,0.38044,-0.349033,1.53113,1.340287,1.115196,0.637943,-1.783306,-3.574395
2011-02-28,1.976713,2.451127,2.647814,3.039381,3.470828,5.027067,5.20463,4.497147,3.630495,1.787656
2011-03-31,0.804296,0.478824,1.058183,0.399113,1.250442,2.378494,2.665119,2.573619,2.263448,0.053935
2011-04-29,1.911944,2.413903,1.374636,2.147508,1.738035,4.168058,3.184678,3.194792,1.670532,-0.678513
2011-05-31,1.200387,-0.166254,-2.374459,-2.664657,0.375549,-0.916469,-1.456948,-2.041026,-2.743779,-4.430076


In [22]:
summary(port).loc[['mean','std','tstat']].round(3)

mom_bins,0,0,0,0,0,1,1,1,1,1
id_bins,0,1,2,3,4,0,1,2,3,4
mean,0.541,0.84,0.927,1.325,1.935,1.672,1.592,1.115,0.671,-0.041
std,4.88,5.093,5.113,4.991,4.342,3.305,4.085,4.298,4.556,5.152
tstat,1.385,2.061,2.265,3.317,5.565,6.319,4.868,3.239,1.839,-0.098


In [23]:
summary(port1).loc[['mean','std','tstat']].round(3)

mom_bins,0,0,0,0,0,1,1,1,1,1
id_bins2,0,1,2,3,4,0,1,2,3,4
mean,0.389,0.592,0.842,1.087,1.765,1.747,1.354,0.778,0.243,-0.117
std,4.966,4.955,5.162,5.083,4.529,3.463,4.13,4.467,4.935,6.068
tstat,0.977,1.493,2.037,2.672,4.868,6.302,4.093,2.175,0.616,-0.241
