In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read in crsp daily data

df = pd.read_parquet("~/Data/crsp_monthly_clean.parquet")

df.head()

Unnamed: 0,permno,caldt,shrcd,exchcd,ticker,prc,ret,shrout
0,10000,1986-02-28,10.0,3.0,OMFGA,3.25,-0.257143,3680.0
1,10000,1986-03-31,10.0,3.0,OMFGA,4.4375,0.365385,3680.0
2,10000,1986-04-30,10.0,3.0,OMFGA,4.0,-0.098592,3793.0
3,10000,1986-05-30,10.0,3.0,OMFGA,3.10938,-0.222656,3793.0
4,10000,1986-06-30,10.0,3.0,OMFGA,3.09375,-0.005025,3793.0


In [3]:
# Cleaning

df = df[['permno', 'ticker', 'caldt', 'prc', 'ret']]

df.head()

Unnamed: 0,permno,ticker,caldt,prc,ret
0,10000,OMFGA,1986-02-28,3.25,-0.257143
1,10000,OMFGA,1986-03-31,4.4375,0.365385
2,10000,OMFGA,1986-04-30,4.0,-0.098592
3,10000,OMFGA,1986-05-30,3.10938,-0.222656
4,10000,OMFGA,1986-06-30,3.09375,-0.005025


In [4]:
# Return matrices
holding_period = 6

returns = df.pivot(index='caldt', columns='permno', values='ret')
log_returns = np.log1p(returns)
holding_period_returns = log_returns.rolling(holding_period, holding_period).sum()
holding_period_returns = holding_period_returns.shift(-(holding_period-1))

# Momentum matrix
window = 11
skip = 1
momentums = log_returns.rolling(window,window).sum()
momentums = momentums.shift(skip + 1) # Shift once for skip and once for lag

In [5]:
# Bin matrix
num_bins = 10
bins = momentums.apply(lambda x: pd.qcut(x, num_bins, labels=False, duplicates='drop'))

In [6]:
portfolios = {i: holding_period_returns[bins == i] for i in range(num_bins)}

portfolio_return_vectors = {i: portfolio.mean(axis=1) for i, portfolio in portfolios.items()}

In [7]:
joined_return_vectors = pd.DataFrame(portfolio_return_vectors)

joined_return_vectors['spread'] = joined_return_vectors[9] - joined_return_vectors[0]

In [8]:
def t_stat(vector: np.ndarray) -> float:
    mean = vector.mean()
    std = vector.std()
    n = len(vector)
    return mean / (std / np.sqrt(n))

def summary_stats(vector: np.ndarray) -> dict:
    return {
        'mean': vector.mean(),
        'std': vector.std(),
        't_stat': t_stat(vector)
    }

In [9]:
pd.DataFrame(list(joined_return_vectors.apply(summary_stats).T), index=joined_return_vectors.columns).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,spread
mean,0.011234,-0.000266,-0.000829,0.004654,0.005774,0.00958,0.010517,0.011353,0.01054,-0.01164,-0.021205
std,0.212944,0.19859,0.193068,0.189952,0.187931,0.192912,0.194479,0.202808,0.200502,0.207359,0.149794
t_stat,1.809922,-0.046014,-0.147371,0.840477,1.054133,1.703777,1.855212,1.920489,1.803469,-1.925797,-4.856575
