In [118]:
from config import set_project_root
set_project_root()

In [119]:
import pandas as pd
import numpy as np
from finance_byu.summarize import summary
import matplotlib.pyplot as plt
import seaborn as sns
from research.datasets import CRSP
import statsmodels.formula.api as smf

In [120]:
# Read in crsp monthly data

df = CRSP().df.copy()

df.head()

LOADING CLEAN FILE


Unnamed: 0,permno,date,cusip,shrcd,exchcd,ticker,shrout,vol,prc,ret
0,10000,1986-02-28,68391610,10.0,3.0,OMFGA,3680.0,828.0,3.25,-0.257143
1,10000,1986-03-31,68391610,10.0,3.0,OMFGA,3680.0,1078.0,4.4375,0.365385
2,10000,1986-04-30,68391610,10.0,3.0,OMFGA,3793.0,957.0,4.0,-0.098592
3,10000,1986-05-30,68391610,10.0,3.0,OMFGA,3793.0,1074.0,3.10938,-0.222656
4,10000,1986-06-30,68391610,10.0,3.0,OMFGA,3793.0,1069.0,3.09375,-0.005025


In [121]:
# In sample
start = np.datetime64('1929-01-01')
end = np.datetime64('1982-12-31')

df = df[(df['date'] >= start) & (df['date'] <= end)]

df = df.reset_index(drop=True)

df

Unnamed: 0,permno,date,cusip,shrcd,exchcd,ticker,shrout,vol,prc,ret
0,10006,1929-01-31,00080010,10.0,1.0,,600.0,359.0,99.2500,0.011465
1,10006,1929-02-28,00080010,10.0,1.0,,600.0,140.0,99.5000,0.002519
2,10006,1929-03-28,00080010,10.0,1.0,,600.0,634.0,100.7500,0.027638
3,10006,1929-04-30,00080010,10.0,1.0,,600.0,127.0,98.5000,-0.022333
4,10006,1929-05-31,00080010,10.0,1.0,,600.0,107.0,94.0000,-0.045685
...,...,...,...,...,...,...,...,...,...,...
1226612,92946,1982-07-30,92922210,11.0,3.0,VYQT,1870.0,,,
1226613,92946,1982-08-31,92922210,11.0,3.0,VYQT,1870.0,,,
1226614,92946,1982-09-30,92922210,11.0,3.0,VYQT,1870.0,,,
1226615,92946,1982-10-29,92922210,11.0,3.0,VYQT,1870.0,,,


In [122]:
# Cleaning

df = df[['permno', 'ticker', 'date', 'prc', 'ret']].copy()

df['mdt'] = df['date'].dt.strftime("%Y-%m")
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year


df.head()

Unnamed: 0,permno,ticker,date,prc,ret,mdt,month,year
0,10006,,1929-01-31,99.25,0.011465,1929-01,1,1929
1,10006,,1929-02-28,99.5,0.002519,1929-02,2,1929
2,10006,,1929-03-28,100.75,0.027638,1929-03,3,1929
3,10006,,1929-04-30,98.5,-0.022333,1929-04,4,1929
4,10006,,1929-05-31,94.0,-0.045685,1929-05,5,1929


In [123]:
# Target Variable
df['ret_60_mean'] = df.groupby('permno')['ret'].rolling(59,59).mean().reset_index(drop=True)
df['ret_60_mean'] = df.groupby('permno')['ret_60_mean'].shift(-1)

df['xs_ret'] = df['ret'] - df['ret_60_mean']

# Explanatory Variables
def create_lags(df, lags, column='ret'):
    for lag in lags:
        df[f'{column}_lag_{lag}'] = df.groupby('permno')[column].shift(lag)
    return df

lags = list(range(1, 13)) + [24, 36]

df = create_lags(df, lags)
df = df.dropna()

df

Unnamed: 0,permno,ticker,date,prc,ret,mdt,month,year,ret_60_mean,xs_ret,...,ret_lag_5,ret_lag_6,ret_lag_7,ret_lag_8,ret_lag_9,ret_lag_10,ret_lag_11,ret_lag_12,ret_lag_24,ret_lag_36
405,10006,ACF,1962-07-31,67.5000,0.144068,1962-07,7,1962,0.012450,0.131618,...,0.011986,0.087523,0.082661,0.043750,0.043478,-0.033613,0.006276,0.066964,-0.013514,0.064838
406,10006,ACF,1962-08-31,67.1250,0.003704,1962-08,8,1962,0.013075,-0.009371,...,-0.071672,0.011986,0.087523,0.082661,0.043750,0.043478,-0.033613,0.006276,-0.021918,0.056206
407,10006,ACF,1962-09-28,63.5000,-0.054004,1962-09,9,1962,0.014700,-0.068704,...,0.056985,-0.071672,0.011986,0.087523,0.082661,0.043750,0.043478,-0.033613,-0.142045,-0.067265
408,10006,ACF,1962-10-31,68.0000,0.070866,1962-10,10,1962,0.018294,0.052572,...,-0.160000,0.056985,-0.071672,0.011986,0.087523,0.082661,0.043750,0.043478,-0.013245,-0.038462
409,10006,ACF,1962-11-30,72.8750,0.080882,1962-11,11,1962,0.016951,0.063931,...,-0.012552,-0.160000,0.056985,-0.071672,0.011986,0.087523,0.082661,0.043750,0.003356,-0.030000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1226386,86239,CF,1971-07-30,20.5000,-0.052023,1971-07,7,1971,0.020030,-0.072053,...,0.060000,0.025641,0.106383,0.004225,-0.003509,-0.080645,0.003846,0.026316,-0.044681,0.049351
1226387,86239,CF,1971-08-31,23.8750,0.174390,1971-08,8,1971,0.019370,0.155020,...,0.000000,0.060000,0.025641,0.106383,0.004225,-0.003509,-0.080645,0.003846,-0.016854,0.025000
1226388,86239,CF,1971-09-30,22.4375,-0.060209,1971-09,9,1971,0.012771,-0.072980,...,0.184524,0.000000,0.060000,0.025641,0.106383,0.004225,-0.003509,-0.080645,-0.085714,0.021951
1226389,86239,CF,1971-10-29,21.7500,-0.030641,1971-10,10,1971,0.013355,-0.043996,...,-0.082412,0.184524,0.000000,0.060000,0.025641,0.106383,0.004225,-0.003509,0.013750,0.090361


In [124]:
formula = 'xs_ret ~ ' + ' + '.join([f'ret_lag_{lag}' for lag in lags])

mdts = df['mdt'].unique()

result_df_list = []

for mdt in mdts:
    # Filter data for the current year
    slice_df = df[df['mdt'] == mdt].copy()
    
    # Fit the OLS regression model using the formula syntax
    model = smf.ols(formula=formula, data=slice_df)
    result = model.fit()
    
    result_df = pd.DataFrame()
    
    result_df['name'] = ['intercept'] + [f'ret_lag_{lag}' for lag in lags]
    result_df['mdt'] = mdt
    result_df['coef'] = result.params.values
    result_df['t_stat'] = result.tvalues.values

    result_df_list.append(result_df)

# Display the resulting DataFrame
combined = pd.concat(result_df_list)

combined


Unnamed: 0,name,mdt,coef,t_stat
0,intercept,1962-07,0.008132,1.330148
1,ret_lag_1,1962-07,-0.211372,-5.065103
2,ret_lag_2,1962-07,-0.288598,-7.055987
3,ret_lag_3,1962-07,-0.002950,-0.069007
4,ret_lag_4,1962-07,0.091284,2.252773
...,...,...,...,...
10,ret_lag_10,1962-06,-0.011587,-0.000000
11,ret_lag_11,1962-06,-0.042844,-0.000000
12,ret_lag_12,1962-06,0.042117,0.000000
13,ret_lag_24,1962-06,0.009649,0.000000


In [125]:
combined[['name','coef', 't_stat']].groupby("name").mean().T

name,intercept,ret_lag_1,ret_lag_10,ret_lag_11,ret_lag_12,ret_lag_2,ret_lag_24,ret_lag_3,ret_lag_36,ret_lag_4,ret_lag_5,ret_lag_6,ret_lag_7,ret_lag_8,ret_lag_9
coef,0.001341,-0.048898,-0.005372,-0.000596,0.007627,-0.007672,-0.006813,0.00331,-0.007585,0.005138,-0.000206,-0.001202,-0.002487,-0.006768,-0.006442
t_stat,-0.17279,-2.176215,-0.242783,-0.151941,0.239002,-0.595847,-0.20712,-0.048056,-0.254095,0.093586,-0.09052,-0.13037,-0.163849,-0.206831,-0.170576
