In [1]:
import pandas as pd
import numpy as np

monthly_prices = pd.read_csv("../data/processed/prices_monthly.csv", index_col=0, parse_dates=True)
monthly_returns = pd.read_csv("../data/processed/returns_monthly.csv", index_col=0, parse_dates=True)
ff5 = pd.read_csv("../data/processed/ff5_monthly_clean.csv", index_col=0, parse_dates=True)
panel = pd.read_csv("../data/processed/panel_monthly_with_ff5.csv", index_col=[0,1], parse_dates=[0])

ff5.index = ff5.index + pd.offsets.MonthEnd(0)

# Momentum
momentum_12_1 = monthly_prices.shift(1) / monthly_prices.shift(13) - 1
momentum_6_1  = monthly_prices.shift(1) / monthly_prices.shift(7)  - 1
momentum_3_1  = monthly_prices.shift(1) / monthly_prices.shift(4)  - 1

# 3 month volatility of returns
vol_3m = monthly_returns.rolling(3).std()

X = (momentum_12_1.stack().rename("momentum_12_1")
     .to_frame().join(momentum_6_1.stack().rename("momentum_6_1"))
     .join(momentum_3_1.stack().rename("momentum_3_1"))
     .join(vol_3m.stack().rename("vol_3m")))

# Lag FF5 factors
ff5 = pd.read_csv("../data/processed/ff5_monthly_clean.csv", index_col=0, parse_dates=True)
ff5.index = ff5.index + pd.offsets.MonthEnd(0)
ff5_lag = ff5.shift(1).add_suffix("_lag1")

X = X.join(ff5_lag, how="left", on=X.index.get_level_values(0))

# Target: next month excess return
panel = pd.read_csv("../data/processed/panel_monthly_with_ff5.csv", index_col=[0,1], parse_dates=[0])
y_next  = panel["excess_ret"].shift(-1).rename("y_next")
data = pd.concat([X, y_next], axis=1).dropna()

data.to_csv("../data/processed/ml_feature_matrix.csv", index=True)
print("Feature matrix shape:", data.shape)
data.head()

Feature matrix shape: (1189, 12)


Unnamed: 0_level_0,Unnamed: 1_level_0,key_0,momentum_12_1,momentum_6_1,momentum_3_1,vol_3m,Mkt-RF_lag1,SMB_lag1,HML_lag1,RMW_lag1,CMA_lag1,RF_lag1,y_next
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2015-02-28,AAPL,2015-02-28,0.672751,0.236959,0.089519,0.090495,-0.0309,-0.0093,-0.0345,0.0158,-0.0164,0.0,0.072293
2015-02-28,AMZN,2015-02-28,-0.011598,0.13272,0.160643,0.11563,-0.0309,-0.0093,-0.0345,0.0158,-0.0164,0.0,0.044676
2015-02-28,GOOG,2015-02-28,-0.093873,-0.06487,-0.043929,0.03682,-0.0309,-0.0093,-0.0345,0.0158,-0.0164,0.0,0.098928
2015-02-28,HD,2015-02-28,0.3881,0.304252,0.075868,0.052728,-0.0309,-0.0093,-0.0345,0.0158,-0.0164,0.0,0.126885
2015-02-28,JPM,2015-02-28,0.008552,-0.044589,-0.095076,0.128209,-0.0309,-0.0093,-0.0345,0.0158,-0.0164,0.0,0.040311
