In [None]:
# Importing all necessary libraries

import numpy as np

import pandas as pd

from finance_byu.summarize import summary

from statsmodels.formula.api import ols

In [None]:
# Loading in the daily and monthly crsp datasets

crsp_daily = pd.read_feather('~/FIN_585/crsp_data/crsp_daily.ftr')

crsp_monthly = pd.read_feather('~/FIN_585/crsp_data/crsp_monthly.ftr')

In [None]:
ff = pd.read_csv('~/FIN_585/crsp_data/ff.csv', parse_dates=['dateff'])

In [None]:
# Cleaning the daily dataset

crsp_daily['prc'] = abs(crsp_daily['prc'])

crsp_daily['prc_lag'] = crsp_daily.groupby('permno')['prc'].shift(1)

crsp_daily = crsp_daily[crsp_daily['ret'] > -1]

crsp_daily = crsp_daily[crsp_daily['prc_lag'] > 5]

crsp_daily.sort_values(by = ['permno', 'caldt'], inplace = True)

crsp_daily.drop(columns = ['shrcd', 'excd', 'siccd', 'vol', 'shr', 'prc_lag'], inplace = True)

In [None]:
# Adding column for positive and negative returns

crsp_daily['ret_class'] = np.where(crsp_daily['ret'].shift(1) >= 0, '1', '0')

In [None]:
# Calculate rolling yearly number of positive and negative days for each stock

n = 252

crsp_daily['pos_days'] = crsp_daily.groupby('permno').rolling(window = n, min_periods = n)['ret_class'].sum().reset_index(level=0, drop=True)

crsp_daily['neg_days'] = n - crsp_daily['pos_days']

crsp_daily['%pos'] = crsp_daily['pos_days'] / n

crsp_daily['%neg'] = crsp_daily['neg_days'] / n

crsp_daily['%neg - %pos'] = crsp_daily['%neg'] - crsp_daily['%pos']

crsp_daily.drop(columns = ['ret_class', 'pos_days', 'neg_days', '%pos', '%neg', 'prc', 'ret'], inplace = True)

crsp_daily.dropna(inplace = True)

In [None]:
# Getting daily data ready for merging with monthly data

crsp_daily_resampled = crsp_daily.set_index('caldt').groupby('permno').resample('ME').first().droplevel('permno').reset_index()

In [None]:
# Cleaning the monthly dataset

crsp_monthly.drop(columns = ['shrcd', 'excd', 'siccd', 'vol', 'shr', 'cusip', 'ticker', 'prc', 'cumfacshr'], inplace = True)

crsp_monthly.dropna(inplace = True)

In [None]:
# Merging the daily and monthly datasets

merged_data = pd.merge(crsp_monthly, crsp_daily, on = ['permno', 'caldt'], how = 'inner')

merged_data.dropna(inplace = True)

In [None]:
# Calculating momentum

merged_data['log_ret'] = np.log(1 + merged_data['ret'])

merged_data['cum_log_ret'] = merged_data.groupby('permno')['log_ret'].rolling(window = 11, min_periods = 11).sum().reset_index(drop=True)

merged_data['momentum'] = merged_data.groupby('permno')['cum_log_ret'].shift(2)

merged_data.drop(columns = ['log_ret', 'cum_log_ret'], inplace = True)

merged_data.dropna(inplace = True)

In [None]:
# Calculate information discreatness 'id'

merged_data['id'] = np.sign(merged_data['momentum']) * merged_data['%neg - %pos']

Unconditional Sort (Sequential)

In [None]:
# Unconditional double sort portfolios by momentum and id

merged_data['momentum_bins'] = merged_data.groupby('caldt')['momentum'].transform(lambda x: pd.qcut(x, 2, labels = False))

merged_data['id_bins'] = merged_data.groupby('caldt')['id'].transform(lambda x: pd.qcut(x, 5, labels = False))

unconditional_port = merged_data.groupby(['caldt', 'momentum_bins', 'id_bins'])['ret'].mean().unstack(level=['momentum_bins', 'id_bins'])

In [None]:
unconditional_port.dropna(inplace = True)

In [None]:
#Calculate the unconditional double sort portfolio returns in-sample and out-of-sample

unconditional_port_is = unconditional_port[(unconditional_port.index < '2008-01-01')]

unconditional_port_oos = unconditional_port[unconditional_port.index >= '2008-01-01']

In [None]:
# Calculating differences in high and low momentum across id bins

winner_losser_port_u = unconditional_port[1] - unconditional_port[0]

winner_losser_port_u['spread'] = winner_losser_port_u[0] - winner_losser_port_u[4]

In [None]:
winner_losser_port_u = pd.merge(winner_losser_port_u, ff, left_on = 'caldt', right_on= 'dateff' ,how = 'inner')

In [None]:
# Calculate the in-sample and out-of-sample information discreatness spread across momentum bins

winner_losser_port_u_is = winner_losser_port_u[(winner_losser_port_u.dateff < '2008-01-01')]

winner_losser_port_u_oos = winner_losser_port_u[winner_losser_port_u.dateff >= '2008-01-01']

In [None]:
# Calculate excess returns for all portfolios

for i in range(0, 5):
    winner_losser_port_u_is[f'excess_{i}'] = winner_losser_port_u_is[i] - winner_losser_port_u_is['rf']

winner_losser_port_u_is['excess_spread'] = winner_losser_port_u_is['spread'] - winner_losser_port_u_is['rf']

In [None]:
# Run regression to calculate the alpha and beta for the excess_0 to excess_4 & spread portfolios

reg1 = ols('excess_0 ~ mktrf + smb + hml', data = winner_losser_port_u_is).fit()

reg2 = ols('excess_1 ~ mktrf + smb + hml', data = winner_losser_port_u_is).fit()

reg3 = ols('excess_2 ~ mktrf + smb + hml', data = winner_losser_port_u_is).fit()

reg4 = ols('excess_3 ~ mktrf + smb + hml', data = winner_losser_port_u_is).fit()

reg5 = ols('excess_4 ~ mktrf + smb + hml', data = winner_losser_port_u_is).fit()

reg6 = ols('excess_spread ~ mktrf + smb + hml', data = winner_losser_port_u_is).fit()

In [None]:
from finance_byu.regtables import Regtable
table = Regtable([reg1,reg2,reg3,reg4,reg5,reg6], stat='tstat', sig='coeff')
table.render()

In [None]:
display((summary(unconditional_port_is)*100).round(2),
        
(summary(winner_losser_port_u_is[[0,1,2,3,4,'spread']])*100).round(4))

In [None]:
display((summary(unconditional_port_oos)*100).round(4),
        
(summary(winner_losser_port_u_oos[[0,1,2,3,4,'spread']])).round(4))

Conditional Sort (Independent)

In [None]:
# Conditional double sort portfolios by momentum and id

merged_data['conditional_id_bins'] = merged_data.groupby(['caldt', 'momentum_bins'])['id'].transform(lambda x: pd.qcut(x, 5, labels = False))

conditional_port = merged_data.groupby(['caldt', 'momentum_bins', 'conditional_id_bins'])['ret'].mean().unstack(level=['momentum_bins', 'conditional_id_bins'])

In [None]:
conditional_port.dropna(inplace = True)

In [None]:
# Cqalculate the conditional double sort portfolio returns in-sample and out-of-sample

conditional_port_is = conditional_port[(conditional_port.index < '2008-01-01') & (conditional_port.index >= '1980-01-01')]

conditional_port_oos = conditional_port[(conditional_port.index >= '2008-01-01') & (conditional_port.index >= '1980-01-01')]

In [None]:
# Calculating differences in high and low momentum across id bins

winner_losser_port_c = conditional_port[1] - conditional_port[0]

winner_losser_port_c['spread'] = winner_losser_port_c[0] - winner_losser_port_c[4]

In [None]:
winner_losser_port_c = pd.merge(winner_losser_port_c, ff, left_on = 'caldt', right_on= 'dateff' ,how = 'inner')

In [None]:
# Calculate the in-sample and out-of-sample information discreatness spread across momentum bins

winner_losser_port_c_is = winner_losser_port_c[(winner_losser_port_c.dateff < '2008-01-01') & (winner_losser_port_c.dateff >= '1980-01-01')]

winner_losser_port_c_oos = winner_losser_port_c[(winner_losser_port_c.dateff >= '2008-01-01')]

In [None]:
winner_losser_port_c_is

In [None]:
# Calculate excess returns for all portfolios

for i in range(0, 5):
    winner_losser_port_c_is[f'excess_{i}'] = winner_losser_port_c_is[i] - winner_losser_port_c_is['rf']

winner_losser_port_c_is['excess_spread'] = winner_losser_port_c_is['spread'] - winner_losser_port_c_is['rf']

In [None]:
winner_losser_port_c_is

In [None]:
# Run regression to calculate the alpha and beta for the excess_0 to excess_4 & spread portfolios

reg1 = ols('excess_0 ~ mktrf + smb + hml', data = winner_losser_port_c_is).fit()

reg2 = ols('excess_1 ~ mktrf + smb + hml', data = winner_losser_port_c_is).fit()

reg3 = ols('excess_2 ~ mktrf + smb + hml', data = winner_losser_port_c_is).fit()

reg4 = ols('excess_3 ~ mktrf + smb + hml', data = winner_losser_port_c_is).fit()

reg5 = ols('excess_4 ~ mktrf + smb + hml', data = winner_losser_port_c_is).fit()

reg6 = ols('excess_spread ~ mktrf + smb + hml', data = winner_losser_port_c_is).fit()

In [None]:
from finance_byu.regtables import Regtable
table = Regtable([reg1,reg2,reg3,reg4,reg5,reg6], stat='tstat', sig='coeff')
table.render()

In [None]:
display((summary(conditional_port_is)*100).round(4),
        
(summary(winner_losser_port_c_is[[0,1,2,3,4,'spread']])).round(4))