In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from finance_byu.summarize import summary



In [2]:
crsp_daily1 = pd.read_feather('C:/Users/benja/desktop/ACME_Senior/Fin585/Final Project/crsp_daily.ftr')

crsp_monthly1 = pd.read_feather('C:/Users/benja/desktop/ACME_Senior/Fin585/Final Project/crsp_monthly.ftr')

In [27]:
# for both datasets keep only the data post jan 1 2000 from caldt
crsp_daily2 = crsp_daily1[(crsp_daily1['caldt'] >= '1980-01-01') & (crsp_daily1['caldt'] <= '2007-12-31')]
crsp_monthly2 = crsp_monthly1[(crsp_monthly1['caldt'] >= '1980-01-01') & (crsp_monthly1['caldt'] <= '2007-12-31')]

In [98]:
crsp_daily = crsp_daily2.copy()
crsp_monthly = crsp_monthly2.copy()

In [99]:
# Cleaning the daily dataset
crsp_daily['prc'] = abs(crsp_daily['prc'])
crsp_daily['prc_lag'] = crsp_daily.groupby('permno')['prc'].shift(1)
# crsp_daily = crsp_daily[crsp_daily['ret'] > -1]
crsp_daily = crsp_daily[crsp_daily['prc_lag'] > 5]
crsp_daily.sort_values(by = ['permno', 'caldt'], inplace = True)
crsp_daily.drop(columns = ['shrcd', 'excd', 'siccd', 'vol', 'shr', 'prc_lag'], inplace = True)

# Adding column for positive and negative returns
crsp_daily['ret_class'] = np.where(crsp_daily['ret'].shift(1) >= 0, '1', '0')

# Calculate rolling yearly number of positive and negative days for each stock
n = 250
crsp_daily['pos_days'] = crsp_daily.groupby('permno').rolling(window = n, min_periods = n)['ret_class'].sum().reset_index(level=0, drop=True)
crsp_daily['neg_days'] = n - crsp_daily['pos_days']
crsp_daily['%pos'] = crsp_daily['pos_days'] / n
crsp_daily['%neg'] = crsp_daily['neg_days'] / n
crsp_daily['%neg - %pos'] = crsp_daily['%neg'] - crsp_daily['%pos']
crsp_daily.drop(columns = ['ret_class', 'pos_days', 'neg_days', '%pos', '%neg'], inplace = True)
crsp_daily.rename(columns = {'ret': 'daily_ret'}, inplace = True)
crsp_daily.dropna(inplace = True)

In [112]:
crsp_monthly = crsp_monthly2.copy()
# crsp_daily.head()

In [113]:
crsp_monthly.drop(columns = ['cusip', 'ticker', 'shrcd', 'excd', 'siccd', 'vol', 'shr', 'cumfacshr'], inplace = True)

crsp_monthly['prc'] = abs(crsp_monthly['prc'])
crsp_monthly['prc_lag'] = crsp_monthly.groupby('permno')['prc'].shift(1)
crsp_monthly = crsp_monthly[crsp_monthly['prc_lag'] > 5]

crsp_monthly['monthly_ret_lag'] = crsp_monthly.groupby('permno')['ret'].shift(1) 

# keep ret >-1
# crsp_monthly = crsp_monthly[crsp_monthly['ret'] > -1]
crsp_monthly.rename(columns = {'ret': 'monthly_ret'}, inplace = True)
# crsp_monthly.head()

In [114]:
crsp_monthly['logret'] = np.log(1 + crsp_monthly['monthly_ret_lag'])
crsp_monthly['mom'] = crsp_monthly.groupby('permno')['logret'].rolling(11,11).sum().reset_index(drop=True, level=0)
crsp_monthly['mom'] = crsp_monthly.groupby('permno')['mom'].shift(2)
crsp_monthly.dropna(inplace=True)
# crsp_monthly.tail()

In [115]:
# I want to merge the two datasets on permno and caldt and keep everymonthly observation
crsp = pd.merge(crsp_daily, crsp_monthly, on = ['permno', 'caldt'])

# crsp = pd.merge(crsp_daily, crsp_monthly, on = ['permno', 'caldt'], how = 'left')

#fill any nans with the data that follows
# crsp.fillna(method = 'bfill', inplace = True)

crsp.drop(columns = ['logret', 'prc_x', 'prc_y'], inplace = True)

# crsp

In [116]:
crsp['id'] = np.sign(crsp['mom']) * crsp['%neg - %pos']
# crsp.head()

In [117]:
crsp['mom_bins'] = crsp.groupby('caldt')['mom'].transform(pd.qcut,2,labels=False)
# crsp.tail()

In [118]:
crsp['id_bins'] = crsp.groupby(["caldt", "mom_bins"])['id'].transform(pd.qcut,5, labels=False)
# crsp.tail()

In [119]:
crsp['id_bins2'] = crsp.groupby(["caldt"])['id'].transform(pd.qcut,5, labels=False)
# crsp.tail()

In [124]:
port = crsp.groupby(['caldt','mom_bins','id_bins'])['monthly_ret'].mean()*100
port = port.unstack(level=['mom_bins','id_bins'])
# port.head()

In [125]:
port1 = crsp.groupby(['caldt','mom_bins','id_bins2'])['monthly_ret'].mean()*100
port1 = port1.unstack(level=['mom_bins','id_bins2'])
# port1.head()

In [126]:
summary(port).loc[['mean','std','tstat']].round(3)

mom_bins,0,0,0,0,0,1,1,1,1,1
id_bins,0,1,2,3,4,0,1,2,3,4
mean,0.959,1.022,1.114,1.647,2.146,2.339,2.396,1.768,1.002,-0.221
std,4.472,5.195,5.152,4.785,3.642,3.082,4.186,4.633,5.093,5.545
tstat,3.849,3.53,3.879,6.176,10.575,13.62,10.273,6.848,3.53,-0.716


In [127]:
summary(port1).loc[['mean','std','tstat']].round(3)

mom_bins,0,0,0,0,0,1,1,1,1,1
id_bins2,0,1,2,3,4,0,1,2,3,4
mean,0.824,0.815,0.907,0.965,1.919,2.435,1.946,0.941,-0.365,-1.68
std,4.762,5.361,5.211,5.206,4.035,3.358,4.475,5.043,6.055,8.971
tstat,2.928,2.711,3.123,3.326,8.532,13.011,7.801,3.349,-1.07,-2.661
