In [45]:
import numpy as np
import pandas as pd
from pandas.tseries.offsets import *

crsp = pd.read_csv('crsp.csv')
crsp['date'] = pd.to_datetime(crsp['date'])
crsp['ret'] = crsp['ret'].fillna(0)

J = 6
K = 6
# [2]cumulative return
crsp.sort_values(['permno', 'date'], inplace=True)
crsp['logret'] = np.log(1 + crsp['ret'])
crsp = crsp.set_index('date')

umd = crsp.groupby('permno')['logret'].rolling(J).sum()
umd = umd.reset_index()
umd['cumret'] = np.exp(umd['logret']) - 1
umd = umd[['permno', 'date', 'cumret']]

# [3] momentum 
umd.dropna(subset=['cumret'], inplace=True)
umd['momr'] = umd.groupby('date')['cumret'].transform(lambda x: pd.qcut(x, 10, labels=False))
umd['momr'] = umd['momr'] + 1

# [4] merge
umd['medate'] = umd['date'] + MonthEnd(0)
umd['hdate1'] = umd['medate'] + MonthBegin(1)
umd['hdate2'] = umd['medate'] + MonthEnd(K)
umd['form_date'] = umd['date']
umd = umd[['permno', 'form_date', 'momr', 'hdate1', 'hdate2']]
crsp.reset_index(inplace=True)
crsp = crsp[['permno', 'date', 'ret']]

port = pd.DataFrame()
n = 100000
for i in range(0, crsp.shape[0], n):
    tmp = crsp.iloc[i: i+n]
    merged = umd.merge(tmp, on=['permno'], how='inner')
    merged = merged[(merged['hdate1'] <= merged['date'])\
                   & (merged['date'] <= merged['hdate2'])]
    port = pd.concat([port, merged])

# [6] Portfolio return 
umd2 = port.groupby(['date', 'momr', 'form_date'])['ret'].mean().reset_index()
start_year = 1965
umd2 = umd2[umd2['date'].dt.year >= start_year]
ewret = umd2.groupby(['date', 'momr'])['ret'].mean().reset_index()
ewret.rename(columns={'ret': 'ewret'}, inplace=True)
ewret.groupby(['momr'])['ewret'].describe()[['count', 'mean', 'std']]

# [7] Pivot table
ewret2 = ewret.pivot(index='date', columns='momr', values='ewret')
ewret2 = ewret2.add_prefix('port')
ewret2 = ewret2.rename(columns={'port1':'losers', 'port10':'winners'})
ewret2['long_short'] = ewret2['winners'] - ewret2['losers']

mom_mean = ewret2[['winners', 'losers', 'long_short']].mean().to_frame()
mom_mean = mom_mean.rename(columns={0:'mean'}).reset_index()

t_losers = pd.Series(stats.ttest_1samp(ewret2['losers'], 0.0)).to_frame().T
t_winners = pd.Series(stats.ttest_1samp(ewret2['winners'], 0.0)).to_frame().T
t_long_short = pd.Series(stats.ttest_1samp(ewret2['long_short'], 0.0)).to_frame().T
t_output = pd.concat([t_winners, t_losers, t_long_short])\
            .rename(columns={0:'t-stat', 1:'p-value'})
t_output['momr'] = ['winners', 'losers', 'long_short']
pd.merge(mom_mean, t_output, on=['momr'], how='inner')

Unnamed: 0,momr,mean,t-stat,p-value
0,winners,0.016759,4.208047,3.4e-05
1,losers,0.008213,1.663901,0.09718
2,long_short,0.008546,2.809272,0.005293


In [46]:
ewret2 = ewret.pivot(index='date', columns='momr', values='ewret')

In [59]:
ewret2 = ewret2.add_prefix('port')
ewret2 = ewret2.rename(columns={'port1':'losers', 'port10':'winners'})
ewret2['long_short'] = ewret2['winners'] - ewret2['losers']

mom_mean = ewret2[['winners', 'losers', 'long_short']].mean().to_frame()
mom_mean = mom_mean.rename(columns={0:'mean'}).reset_index()

t_losers = pd.Series(stats.ttest_1samp(ewret2['losers'], 0.0)).to_frame().T
t_winners = pd.Series(stats.ttest_1samp(ewret2['winners'], 0.0)).to_frame().T
t_long_short = pd.Series(stats.ttest_1samp(ewret2['long_short'], 0.0)).to_frame().T
t_output = pd.concat([t_winners, t_losers, t_long_short])\
            .rename(columns={0:'t-stat', 1:'p-value'})
t_output['momr'] = ['winners', 'losers', 'long_short']
pd.merge(mom_mean, t_output, on=['momr'], how='inner')

Unnamed: 0,momr,mean,t-stat,p-value
0,winners,0.016759,4.208047,3.4e-05
1,losers,0.008213,1.663901,0.09718
2,long_short,0.008546,2.809272,0.005293
