# 02 - Feature Engineering Notebook
**Goal:**  Clean the raw data, add more features, and create lagged/rolling averaged features.

- Implement a 1M, 3M, 6M, and 12M lag for economic features.

In [43]:
# Load raw csv data
import pandas as pd
import numpy as np

combined = pd.read_csv("data/raw/combined_raw.csv", index_col=0, parse_dates=True)
fred_only = pd.read_csv("data/raw/fred_only.csv", index_col=0, parse_dates=True)

USE_BIGMAC = False
df = combined.copy() if USE_BIGMAC else fred_only.copy()

# Parse date column and sort just in case
#df['date'] = pd.to_datetime(df['Unnamed: 0'])
#df = df.drop(columns=['Unnamed: 0'])
#df = df.sort_values('date').set_index('date')

df.head()

Unnamed: 0,fed_funds_lower,fed_funds_upper,fed_funds_mid,cpi,unemployment_rate,m2_money_supply,treasury_10yr_yield,yield_curve_spread
1982-09-30,10.25,10.25,10.25,97.7,10.1,1858.4,12.339048,0.555714
1982-10-31,9.5,9.5,9.5,98.1,10.4,1869.7,10.9065,0.719
1982-11-30,9.0,9.0,9.0,98.0,10.8,1883.7,10.550526,0.754737
1982-12-31,8.5,8.5,8.5,97.7,10.8,1905.9,10.54,0.877727
1983-01-31,8.5,8.5,8.5,97.9,10.4,1959.4,10.457143,1.129524


In [44]:
# Choose target dynamically
TARGET = "fed_funds_upper"   # or fed_funds_upper / fed_funds_lower

# Fed columns
fed_cols = ["fed_funds_mid", "fed_funds_upper", "fed_funds_lower"]

# Create target column
df["target"] = df[TARGET].copy()

# Drop unused Fed columns
cols_to_drop = [c for c in fed_cols if c != TARGET and c in df.columns]
df = df.drop(columns=cols_to_drop)

X = df.drop(columns=["target"])
y = df["target"]

original_cols = X.columns.tolist()

df.head()



Unnamed: 0,fed_funds_upper,cpi,unemployment_rate,m2_money_supply,treasury_10yr_yield,yield_curve_spread,target
1982-09-30,10.25,97.7,10.1,1858.4,12.339048,0.555714,10.25
1982-10-31,9.5,98.1,10.4,1869.7,10.9065,0.719,9.5
1982-11-30,9.0,98.0,10.8,1883.7,10.550526,0.754737,9.0
1982-12-31,8.5,97.7,10.8,1905.9,10.54,0.877727,8.5
1983-01-31,8.5,97.9,10.4,1959.4,10.457143,1.129524,8.5


In [45]:
def add_lags(data, cols, lags=[1, 3, 6, 12]):
    new_cols = {}
    for col in cols:
        for lag in lags:
            new_cols[f"{col}_lag{lag}"] = data[col].shift(lag)
    return data.join(pd.DataFrame(new_cols, index=data.index))

def add_pct_change(data, cols, periods=[1, 3, 12]):
    new_cols = {}
    for col in cols:
        for p in periods:
            new_cols[f"{col}_pct_change{p}"] = data[col].pct_change(p)
    return data.join(pd.DataFrame(new_cols, index=data.index))

def add_rolling_means(data, cols, windows=[3, 6, 12]):
    new_cols = {}
    for col in cols:
        for w in windows:
            new_cols[f"{col}_rollmean{w}"] = data[col].rolling(window=w).mean()
    return data.join(pd.DataFrame(new_cols, index=data.index))

# Add lag features
X = add_lags(X, original_cols)

# Add percent change features
X = add_pct_change(X, original_cols)

# Add rolling mean features
X = add_rolling_means(X, original_cols)


In [46]:
if "local_price" in X.columns:
    X['local_price_trend'] = X['local_price'].diff()

if "yield_curve_spread" in X.columns:
    X['yield_curve_spread_change'] = X['yield_curve_spread'].diff()

if "cpi" in X.columns and "unemployment_rate" in X.columns:
    X['cpi_unemp_interaction'] = X['cpi'] * X['unemployment_rate']

In [47]:
X = X.dropna()
y = y.loc[X.index]  # align target with valid feature rows

# Combine for convenience
df_engineered = X.copy()
df_engineered["target"] = y

print("Original rows:", len(df))
print("Rows after feature engineering:", len(df_engineered))
print("Columns in final dataset:", df_engineered.shape[1])

if USE_BIGMAC:
    big_mac = "big_mac"
else:
    big_mac = "no_big_mac"

X.to_csv(f"data/X_features_{TARGET}_{big_mac}.csv")
y.to_csv(f"data/y_target_{TARGET}_{big_mac}.csv")

df_engineered.head()




Original rows: 519
Rows after feature engineering: 507
Columns in final dataset: 69


Unnamed: 0,fed_funds_upper,cpi,unemployment_rate,m2_money_supply,treasury_10yr_yield,yield_curve_spread,fed_funds_upper_lag1,fed_funds_upper_lag3,fed_funds_upper_lag6,fed_funds_upper_lag12,...,m2_money_supply_rollmean12,treasury_10yr_yield_rollmean3,treasury_10yr_yield_rollmean6,treasury_10yr_yield_rollmean12,yield_curve_spread_rollmean3,yield_curve_spread_rollmean6,yield_curve_spread_rollmean12,yield_curve_spread_change,cpi_unemp_interaction,target
1983-09-30,9.375,100.4,9.2,2083.2,11.65381,0.86381,9.5,9.0,8.625,10.25,...,1998.158333,11.625632,11.084255,10.849247,0.777154,0.786872,0.84509,0.088157,923.68,9.375
1983-10-31,9.375,100.8,8.8,2099.2,11.541,0.97,9.375,9.4375,8.625,9.5,...,2017.283333,11.680299,11.274422,10.902122,0.869821,0.809872,0.866006,0.10619,887.04,9.375
1983-11-30,9.375,101.1,8.5,2112.3,11.69,1.025789,9.375,9.5,8.75,9.0,...,2036.333333,11.62827,11.492755,10.997078,0.9532,0.832345,0.888594,0.055789,859.35,9.375
1983-12-31,9.375,101.4,8.3,2123.5,11.829524,0.992857,9.375,9.375,9.0,8.5,...,2054.466667,11.686841,11.656237,11.104538,0.996216,0.886685,0.898188,-0.032932,841.62,9.375
1984-01-31,9.375,102.1,8.0,2138.2,11.674286,1.031905,9.375,9.375,9.4375,8.5,...,2069.366667,11.73127,11.705784,11.205967,1.01685,0.943336,0.890053,0.039048,816.8,9.375
