# 01 — Data Collection Notebook
Federal Funds Rate Prediction Using Big Mac Index & FRED Data

**Goal:** Collect and align raw macroeconomic data needed for feature engineering and modeling.

**Sources:**
- Federal Reserve Economic Data (FRED API)
- The Economist Big Mac Index (CSV)

In [4]:
import os
import pandas as pd
import numpy as np
from fredapi import Fred
from dotenv import load_dotenv

pd.set_option("display.max_rows", 20)
pd.set_option("display.max_columns", None)

print("Libraries loaded.")

Libraries loaded.


In [5]:
# Load .env file
load_dotenv()

fred_api_key = os.getenv("FRED_API_KEY")
if fred_api_key is None:
    raise ValueError("FRED_API_KEY not found. Check your .env file.")

fred = Fred(api_key=fred_api_key)
print("FRED API connected.")

FRED API connected.


In [13]:
bigmac_path = "data/raw/big-mac-index.csv"
bigmac = pd.read_csv(bigmac_path)
print("Big Mac Index loaded. Rows:", len(bigmac))

bigmac_us = bigmac[bigmac['iso_a3'] == 'USA'].copy()
bigmac_us = bigmac_us[['date', 'dollar_price']]
bigmac_us['date'] = pd.to_datetime(bigmac_us['date'])
bigmac_us = bigmac_us.set_index('date').resample("ME").ffill()
bigmac_us.tail()

Big Mac Index loaded. Rows: 1386


Unnamed: 0_level_0,dollar_price
date,Unnamed: 1_level_1
2020-03-31,5.67
2020-04-30,5.67
2020-05-31,5.67
2020-06-30,5.67
2020-07-31,5.71


In [18]:
# Federal Funds Target Range (upper & lower)

ffr_upper = fred.get_series('DFEDTARU')
ffr_upper = ffr_upper.to_frame(name="fed_funds_upper")
ffr_upper.index = pd.to_datetime(ffr_upper.index)
ffr_upper = ffr_upper.resample("ME").ffill()

ffr_lower = fred.get_series('DFEDTARL')
ffr_lower = ffr_lower.to_frame(name="fed_funds_lower")
ffr_lower.index = pd.to_datetime(ffr_lower.index)
ffr_lower = ffr_lower.resample("ME").ffill()

ffr = pd.concat([ffr_lower, ffr_upper], axis=1)
ffr.tail()


Unnamed: 0,fed_funds_lower,fed_funds_upper
2025-07-31,4.25,4.5
2025-08-31,4.25,4.5
2025-09-30,4.0,4.25
2025-10-31,3.75,4.0
2025-11-30,3.75,4.0


In [12]:
series_dict = {
    "CPIAUCSL": "cpi",
    "UNRATE": "unemployment_rate",
    "M2SL": "m2_money_supply",
    "DGS10": "treasury_10yr_yield",
    "T10Y2Y": "yield_curve_spread"
}

fred_frames = []

for series_id, colname in series_dict.items():
    data = fred.get_series(series_id)
    df = data.to_frame(name=colname)
    df.index = pd.to_datetime(df.index)
    df = df.resample("ME").mean()
    fred_frames.append(df)

fred_combined = pd.concat(fred_frames, axis=1)
fred_combined.head()

Unnamed: 0,cpi,unemployment_rate,m2_money_supply,treasury_10yr_yield,yield_curve_spread
1947-01-31,21.48,,,,
1947-02-28,21.62,,,,
1947-03-31,22.0,,,,
1947-04-30,22.0,,,,
1947-05-31,21.95,,,,


In [35]:
frames = [ffr, bigmac_us, fred_combined]

target_start = ffr.dropna().index.min()
full_index = pd.date_range(start=target_start, end=max(
    bigmac_us.index.max(),
    fred_combined.index.max()
), freq="ME")
ffr = ffr.reindex(full_index)
bigmac_us = bigmac_us.reindex(full_index)
fred_combined = fred_combined.reindex(full_index)
bigmac_us = bigmac_us.ffill()
fred_combined = fred_combined.ffill()
combined = pd.concat([ffr, bigmac_us, fred_combined], axis=1)

combined = combined.sort_index()
combined.tail(100)

Unnamed: 0,fed_funds_lower,fed_funds_upper,dollar_price,cpi,unemployment_rate,m2_money_supply,treasury_10yr_yield,yield_curve_spread
2017-08-31,1.00,1.25,5.30,245.183,4.4,13674.3,2.209130,0.873913
2017-09-30,1.00,1.25,5.30,246.435,4.3,13722.7,2.202000,0.817500
2017-10-31,1.00,1.25,5.30,246.626,4.2,13782.0,2.360000,0.810476
2017-11-30,1.00,1.25,5.30,247.284,4.2,13814.6,2.353333,0.656667
2017-12-31,1.25,1.50,5.30,247.805,4.1,13873.5,2.402500,0.558500
...,...,...,...,...,...,...,...,...
2025-07-31,4.25,4.50,5.71,322.132,4.2,22028.8,4.391818,0.509545
2025-08-31,4.25,4.50,5.71,323.364,4.3,22108.4,4.264762,0.560952
2025-09-30,4.00,4.25,5.71,324.368,4.3,22212.5,4.120476,0.551905
2025-10-31,3.75,4.00,5.71,324.368,4.3,22212.5,4.061818,0.540455


In [31]:
missing_report = combined.isna().sum()
missing_report

fed_funds_lower        0
fed_funds_upper        0
dollar_price           0
cpi                    0
unemployment_rate      0
m2_money_supply        0
treasury_10yr_yield    0
yield_curve_spread     0
dtype: int64

In [36]:
output_path = "data/raw/combined_raw.csv"
combined.to_csv(output_path)
print("Raw dataset saved to:", output_path)

Raw dataset saved to: data/raw/combined_raw.csv
