In [1]:
# Importing all necessary libraries

import numpy as np

import pandas as pd

In [2]:
# Loading in the daily and monthly crsp datasets

crsp_daily = pd.read_feather('~/FIN_585/crsp_data/crsp_daily.ftr')

crsp_monthly = pd.read_feather('~/FIN_585/crsp_data/crsp_monthly.ftr')

In [3]:
# Cleaning the daily dataset

crsp_daily['prc'] = abs(crsp_daily['prc'])

crsp_daily['prc_lag'] = crsp_daily.groupby('permno')['prc'].shift(1)

crsp_daily = crsp_daily[crsp_daily['ret'] > -1]

crsp_daily = crsp_daily[crsp_daily['prc_lag'] > 5]

crsp_daily.sort_values(by = ['permno', 'caldt'], inplace = True)

crsp_daily.drop(columns = ['shrcd', 'excd', 'siccd', 'vol', 'shr', 'prc_lag'], inplace = True)

In [4]:
# Adding column for positive and negative returns

crsp_daily['ret_class'] = np.where(crsp_daily['ret'].shift(1) >= 0, '1', '0')

In [5]:
# Calculate rolling yearly number of positive and negative days for each stock

n = 252

crsp_daily['pos_days'] = crsp_daily.groupby('permno').rolling(window = n, min_periods = n)['ret_class'].sum().reset_index(level=0, drop=True)

crsp_daily['neg_days'] = n - crsp_daily['pos_days']

crsp_daily['%pos'] = crsp_daily['pos_days'] / n

crsp_daily['%neg'] = crsp_daily['neg_days'] / n

crsp_daily['%neg - %pos'] = crsp_daily['%neg'] - crsp_daily['%pos']

crsp_daily.drop(columns = ['ret_class', 'pos_days', 'neg_days', '%pos', '%neg', 'prc', 'ret'], inplace = True)

crsp_daily.dropna(inplace = True)

In [6]:
# Getting daily data ready for merging with monthly data

crsp_daily_resampled = crsp_daily.set_index('caldt').groupby('permno').resample('ME').first().droplevel('permno').reset_index()

In [7]:
# Cleaning the monthly dataset

crsp_monthly.drop(columns = ['shrcd', 'excd', 'siccd', 'vol', 'shr', 'cusip', 'ticker', 'prc', 'cumfacshr'], inplace = True)

crsp_monthly.dropna(inplace = True)

In [8]:
# Merging the daily and monthly datasets

merged_data = pd.merge(crsp_monthly, crsp_daily, on = ['permno', 'caldt'], how = 'inner')

merged_data.dropna(inplace = True)

In [9]:
# Calculating momentum

merged_data['log_ret'] = np.log(1 + merged_data['ret'])

merged_data['cum_log_ret'] = merged_data.groupby('permno')['log_ret'].rolling(window = 11, min_periods = 11).sum().reset_index(drop=True)

merged_data['momentum'] = merged_data.groupby('permno')['cum_log_ret'].shift(2)

merged_data.drop(columns = ['log_ret', 'cum_log_ret'], inplace = True)

merged_data.dropna(inplace = True)

In [10]:
# Calculate information discreatness 'id'

merged_data['id'] = np.sign(merged_data['momentum']) * merged_data['%neg - %pos']

Unconditional Sort

In [11]:
# Unconditional double sort portfolios by momentum and id

merged_data['momentum_bins'] = merged_data.groupby('caldt')['momentum'].transform(lambda x: pd.qcut(x, 2, labels = False))

merged_data['id_bins'] = merged_data.groupby('caldt')['id'].transform(lambda x: pd.qcut(x, 5, labels = False))

unconditional_port = merged_data.groupby(['caldt', 'momentum_bins', 'id_bins'])['ret'].mean().unstack(level=['momentum_bins', 'id_bins'])

In [12]:
#Calculate the unconditional double sort portfolio returns in-sample and out-of-sample

unconditional_port_is = (unconditional_port[unconditional_port.index < '2008-01-01'].describe()*100).round(2)

unconditional_port_oos = (unconditional_port[unconditional_port.index >= '2008-01-01'].describe()*100).round(2)

In [13]:
# Calculating differences in high and low momentum across id bins

winner_losser_port = unconditional_port[1] - unconditional_port[0]

winner_losser_port['spread'] = winner_losser_port[0] - winner_losser_port[4]

In [14]:
# Calculate the in-sample and out-of-sample information discreatness spread across momentum bins

winner_losser_port_is = (winner_losser_port[winner_losser_port.index < '2008-01-01'].describe()*100).round(2)

winner_losser_port_oos = (winner_losser_port[winner_losser_port.index >= '2008-01-01'].describe()*100).round(2)

In [15]:
display(unconditional_port_is, unconditional_port_oos, winner_losser_port_is, winner_losser_port_oos)

momentum_bins,0,0,0,0,0,1,1,1,1,1
id_bins,0,1,2,3,4,0,1,2,3,4
count,84700.0,93200.0,96200.0,96200.0,96200.0,96200.0,96200.0,96200.0,93600.0,67400.0
mean,1.31,1.19,1.04,0.99,1.69,2.47,1.9,1.27,0.48,-0.33
std,7.91,7.85,7.77,7.4,6.6,5.68,6.75,7.15,7.0,7.98
min,-38.95,-35.7,-38.28,-32.1,-28.71,-24.99,-31.97,-30.82,-29.66,-30.34
25%,-0.76,-1.73,-2.35,-2.59,-1.31,0.01,-1.17,-2.43,-3.17,-4.26
50%,1.64,1.53,1.29,0.74,1.45,2.73,2.42,1.69,0.82,0.51
75%,3.77,4.23,4.08,4.45,4.4,5.1,5.29,5.09,4.6,4.03
max,77.0,68.6,66.55,53.71,63.61,61.35,53.44,59.58,51.22,48.31


momentum_bins,0,0,0,0,0,1,1,1,1,1
id_bins,0,1,2,3,4,0,1,2,3,4
count,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0
mean,0.51,0.8,1.03,1.31,1.98,1.64,1.21,0.79,0.38,0.43
std,5.97,5.65,5.76,5.57,4.97,4.19,4.68,4.96,5.41,6.08
min,-22.8,-21.12,-20.81,-22.14,-20.11,-14.31,-16.27,-18.96,-21.31,-22.8
25%,-1.77,-2.23,-2.05,-1.81,-0.79,-0.23,-0.96,-1.94,-2.7,-2.95
50%,0.84,1.06,1.03,1.22,2.16,2.09,1.76,1.18,0.62,0.76
75%,2.4,3.49,3.9,3.91,4.52,4.14,4.28,4.09,3.34,3.96
max,27.71,23.57,23.36,23.42,19.19,13.23,12.88,13.82,18.83,27.88


id_bins,0,1,2,3,4,spread
count,84700.0,93200.0,96200.0,93600.0,67400.0,56100.0
mean,1.12,0.74,0.23,-0.39,-1.7,3.32
std,4.75,3.72,2.96,3.12,5.41,6.86
min,-49.94,-27.71,-28.25,-20.03,-31.91,-51.22
25%,-0.38,-0.74,-0.93,-1.65,-3.7,0.34
50%,0.93,0.72,0.45,-0.2,-1.05,2.8
75%,2.41,2.28,1.66,1.17,0.93,6.07
max,37.18,20.21,11.0,14.77,37.19,38.23


id_bins,0,1,2,3,4,spread
count,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0
mean,1.13,0.41,-0.24,-0.93,-1.55,2.68
std,4.36,3.01,2.44,2.26,3.25,5.01
min,-15.23,-15.81,-13.9,-14.61,-10.18,-26.33
25%,-0.66,-0.79,-1.22,-2.1,-2.92,0.81
50%,1.17,0.72,0.18,-1.0,-1.68,2.37
75%,3.29,2.07,1.32,0.5,-0.34,5.13
max,15.73,7.59,5.19,11.82,25.82,19.63


Conditional Sort

In [16]:
# Conditional double sort portfolios by momentum and id

merged_data['conditional_id_bins'] = merged_data.groupby(['caldt', 'momentum_bins'])['id'].transform(lambda x: pd.qcut(x, 5, labels = False))

In [17]:
# Creating id bins conditional on momentum (winners and losers)

conditional_port = merged_data.groupby(['caldt', 'momentum_bins', 'conditional_id_bins'])['ret'].mean().unstack(level=['momentum_bins', 'conditional_id_bins'])

In [18]:
# Cqalculate the conditional double sort portfolio returns in-sample and out-of-sample

conditional_port_is = (conditional_port[conditional_port.index < '2008-01-01'].describe()*100).round(2)

conditional_port_oos = (conditional_port[conditional_port.index >= '2008-01-01'].describe()*100).round(2)

In [19]:
# Calculating differences in high and low momentum across id bins

winner_losser_port = conditional_port[1] - conditional_port[0]

winner_losser_port['spread'] = winner_losser_port[0] - winner_losser_port[4]

In [20]:
# Calculate the in-sample and out-of-sample information discreatness spread across momentum bins

winner_losser_port_is = (winner_losser_port[winner_losser_port.index < '2008-01-01'].describe()*100).round(2)

winner_losser_port_oos = (winner_losser_port[winner_losser_port.index >= '2008-01-01'].describe()*100).round(2)

In [21]:
display(conditional_port_is, conditional_port_oos, winner_losser_port_is, winner_losser_port_oos)

momentum_bins,0,0,0,0,0,1,1,1,1,1
conditional_id_bins,0,1,2,3,4,0,1,2,3,4
count,96200.0,96200.0,96200.0,96200.0,96200.0,96200.0,96200.0,96200.0,96200.0,96200.0
mean,1.31,1.13,1.11,1.4,1.93,2.47,2.22,1.84,1.31,0.61
std,7.87,7.82,7.83,7.55,6.33,5.51,6.34,6.65,6.83,6.86
min,-35.7,-35.65,-37.3,-31.57,-27.01,-24.52,-31.3,-32.34,-29.75,-29.41
25%,-1.33,-2.35,-2.39,-2.05,-0.8,0.22,-0.62,-1.49,-2.24,-3.04
50%,1.59,1.3,0.91,1.15,1.71,2.7,2.59,2.21,1.69,0.89
75%,3.85,4.32,4.46,4.61,4.52,4.84,5.41,5.34,5.01,4.39
max,83.21,61.64,65.93,69.47,58.43,55.17,46.34,46.67,63.56,52.81


momentum_bins,0,0,0,0,0,1,1,1,1,1
conditional_id_bins,0,1,2,3,4,0,1,2,3,4
count,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0
mean,0.6,0.97,1.05,1.47,2.12,1.55,1.45,1.02,0.73,0.32
std,5.95,5.81,5.83,5.65,4.88,4.13,4.59,4.77,4.97,5.54
min,-23.39,-21.36,-22.21,-21.62,-19.04,-13.57,-16.62,-16.8,-18.67,-21.84
25%,-1.92,-1.88,-1.85,-1.45,-0.76,-0.1,-0.9,-1.4,-2.05,-2.87
50%,1.09,1.08,1.06,1.51,2.37,2.09,2.05,1.37,1.16,0.64
75%,2.92,3.77,3.82,4.3,4.81,4.1,4.45,4.12,3.88,3.62
max,28.21,24.32,23.49,23.99,19.99,12.08,12.84,13.46,13.75,17.17


conditional_id_bins,0,1,2,3,4,spread
count,96200.0,96200.0,96200.0,96200.0,96200.0,96200.0
mean,1.16,1.09,0.72,-0.09,-1.32,2.47
std,4.23,3.55,3.48,3.3,3.65,4.96
min,-35.92,-31.77,-29.37,-31.45,-20.63,-41.02
25%,-0.23,-0.29,-0.61,-1.42,-3.33,0.09
50%,1.08,1.16,1.03,0.17,-1.12,2.19
75%,2.65,2.8,2.55,1.69,0.71,4.94
max,14.9,14.24,11.12,12.7,13.28,24.93


conditional_id_bins,0,1,2,3,4,spread
count,19200.0,19200.0,19200.0,19200.0,19200.0,19200.0
mean,0.94,0.48,-0.02,-0.74,-1.8,2.74
std,4.18,3.0,2.53,2.18,2.55,4.51
min,-16.96,-15.51,-14.87,-14.89,-11.67,-18.14
25%,-0.74,-0.71,-1.11,-1.71,-3.03,1.0
50%,0.91,0.62,0.3,-0.6,-1.82,2.69
75%,3.15,2.2,1.27,0.68,-0.64,4.75
max,15.4,7.42,5.57,3.84,14.73,14.76
