In [2]:
# This notebook will extract CPI food inflation data from the BLS API
print('Notebook setup complete — ready to add extraction code!')

Notebook setup complete — ready to add extraction code!


In [4]:
import requests
import pandas as pd
from datetime import datetime
import os

API_URL = "https://api.bls.gov/publicAPI/v2/timeseries/data/"

SERIES_IDS = {
    "CUSR0000SAF1": "Food (all)",
    "CUSR0000SEFJ": "Meats, poultry, fish, eggs",
    "CUSR0000SEFC": "Cereals & bakery",
    "CUSR0000SEFP": "Dairy",
    "CUSR0000SEFR": "Fruits & vegetables",
    "CUSR0000SEFH": "Nonalcoholic beverages",
    "CUSR0000SEFW": "Other foods"
}

def fetch_bls_data(series_ids, start_year="2015"):
    payload = {
        "seriesid": list(series_ids.keys()),
        "startyear": start_year,
        "endyear": str(datetime.now().year)
    }
    response = requests.post(API_URL, json=payload)
    response.raise_for_status()
    return response.json()

def parse_bls_json(json_data, id_to_name_map):
    all_rows = []

    for series in json_data["Results"]["series"]:
        sid = series["seriesID"]
        friendly_name = id_to_name_map.get(sid, sid)

        for item in series["data"]:
            year = item["year"]
            period = item["period"]
            value = float(item["value"])

            if not period.startswith("M"):
                continue

            month = period[1:]
            date = pd.to_datetime(f"{year}-{month}-01")

            all_rows.append({
                "date": date,
                "series_id": sid,
                "category": friendly_name,
                "value": value
            })

    df = pd.DataFrame(all_rows)
    df = df.sort_values(["category", "date"])
    return df

os.makedirs("../data/raw", exist_ok=True)
os.makedirs("../data/processed", exist_ok=True)

raw_json = fetch_bls_data(SERIES_IDS)
df_cpi = parse_bls_json(raw_json, SERIES_IDS)

raw_json_path = "../data/raw/bls_cpi_food_raw.json"
processed_csv_path = "../data/processed/cpi_food_clean.csv"

with open(raw_json_path, "w") as f:
    import json
    json.dump(raw_json, f)

df_cpi.to_csv(processed_csv_path, index=False)

df_cpi.head()

Unnamed: 0,date,series_id,category,value
359,2015-01-01,CUSR0000SEFC,Cereals & bakery,325.728
358,2015-02-01,CUSR0000SEFC,Cereals & bakery,326.521
357,2015-03-01,CUSR0000SEFC,Cereals & bakery,324.972
356,2015-04-01,CUSR0000SEFC,Cereals & bakery,325.895
355,2015-05-01,CUSR0000SEFC,Cereals & bakery,324.713
