# 02 - Feature Engineering Notebook
**Goal:**  Clean the raw data, add more features, and create lagged/rolling averaged features.

- Implement a 1M, 3M, 6M, and 12M lag for economic features.

In [10]:
# Load raw csv data
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

combined = pd.read_csv("data/raw/combined.csv", index_col=0, parse_dates=True)
fred_only = pd.read_csv("data/raw/fred_only.csv", index_col=0, parse_dates=True)

df = combined.copy()

# Parse date column and sort just in case
#df['date'] = pd.to_datetime(df['Unnamed: 0'])
#df = df.drop(columns=['Unnamed: 0'])
#df = df.sort_values('date').set_index('date')

df.head()

Unnamed: 0,fed_funds_lower,fed_funds_upper,fed_funds_mid,local_price,cpi,unemployment_rate,m2_money_supply,treasury_10yr_yield,yield_curve_spread
2000-04-30,6.0,6.0,6.0,2.24,170.9,3.8,4767.8,5.990526,-0.413158
2000-05-31,6.5,6.5,6.5,2.24,171.2,4.0,4755.7,6.440455,-0.369091
2000-06-30,6.5,6.5,6.5,2.24,172.2,4.0,4773.6,6.097273,-0.384545
2000-07-31,6.5,6.5,6.5,2.24,172.7,4.0,4791.3,6.054,-0.285
2000-08-31,6.5,6.5,6.5,2.24,172.7,4.1,4819.5,5.826087,-0.402609


In [11]:
TARGET = "fed_funds_mid"   # can be changed later to "fed_funds_upper" or "fed_funds_lower"

y = df[TARGET].copy()

X = df.drop(columns=["fed_funds_mid", "fed_funds_upper", "fed_funds_lower"])

In [12]:
def add_lags(data, cols, lags=[1, 3, 6, 12]):
    for col in cols:
        for lag in lags:
            data[f"{col}_lag{lag}"] = data[col].shift(lag)
    return data

lag_columns = X.columns.tolist()
X = add_lags(X, lag_columns)

def add_pct_change(data, cols, periods=[1, 3, 12]):
    for col in cols:
        for p in periods:
            data[f"{col}_pct_change{p}"] = data[col].pct_change(p)
    return data

X = add_pct_change(X, lag_columns)

def add_rolling_means(data, cols, windows=[3, 6, 12]):
    for col in cols:
        for w in windows:
            data[f"{col}_rollmean{w}"] = data[col].rolling(window=w).mean()
    return data

X = add_rolling_means(X, lag_columns)

X['local_price_trend'] = X['local_price'].diff()
X['yield_curve_spread_change'] = X['yield_curve_spread'].diff()
X['cpi_unemp_interaction'] = X['cpi'] * X['unemployment_rate']

In [14]:
full = pd.concat([X, y], axis=1)
full = full.dropna()

X_final = full.drop(columns=[TARGET])
y_final = full[TARGET]

X_final.shape, y_final.shape

X_final.to_csv("data/X_features.csv")
y_final.to_csv("data/y_target.csv")