In [8]:
import pandas as pd
import os

# 1. Set base paths (works in a notebook)
BASE_DIR = os.path.abspath("..")  # parent folder of /notebooks
csv_path = os.path.join(BASE_DIR, "data", "processed", "cpi_food_clean.csv")

# 2. Load the cleaned CPI data
df = pd.read_csv(csv_path)

# 3. Convert date column to datetime
df["date"] = pd.to_datetime(df["date"])

# 4. Sort by category + date so time-series is in order
df = df.sort_values(["category", "date"])

# 5. Calculate Year-over-Year % change (12 months difference)
df["yoy_pct_change"] = (
    df.groupby("category")["value"]
      .pct_change(periods=12) * 100
)

# 6. Quick preview
df.head(20)

Unnamed: 0,date,series_id,category,value,yoy_pct_change
0,2015-01-01,CUSR0000SEFC,Cereals & bakery,325.728,
1,2015-02-01,CUSR0000SEFC,Cereals & bakery,326.521,
2,2015-03-01,CUSR0000SEFC,Cereals & bakery,324.972,
3,2015-04-01,CUSR0000SEFC,Cereals & bakery,325.895,
4,2015-05-01,CUSR0000SEFC,Cereals & bakery,324.713,
5,2015-06-01,CUSR0000SEFC,Cereals & bakery,325.447,
6,2015-07-01,CUSR0000SEFC,Cereals & bakery,324.516,
7,2015-08-01,CUSR0000SEFC,Cereals & bakery,322.202,
8,2015-09-01,CUSR0000SEFC,Cereals & bakery,321.693,
9,2015-10-01,CUSR0000SEFC,Cereals & bakery,320.252,


In [9]:
# Step 2: Remove NaN YoY rows (first 12 months of each category)
df_yoy = df.dropna(subset=["yoy_pct_change"])  

df_yoy.head()

Unnamed: 0,date,series_id,category,value,yoy_pct_change
12,2016-01-01,CUSR0000SEFC,Cereals & bakery,308.935,-5.155529
13,2016-02-01,CUSR0000SEFC,Cereals & bakery,308.063,-5.652929
14,2016-03-01,CUSR0000SEFC,Cereals & bakery,309.055,-4.89796
15,2016-04-01,CUSR0000SEFC,Cereals & bakery,306.75,-5.874592
16,2016-05-01,CUSR0000SEFC,Cereals & bakery,305.881,-5.799583


In [10]:
# Step 3: Save the clean YoY dataset
yoy_path = os.path.join(BASE_DIR, "data", "processed", "cpi_food_yoy.csv")
df_yoy.to_csv(yoy_path, index=False)

print("Clean YoY dataset saved at:", yoy_path)

Clean YoY dataset saved at: /Users/udayb/Desktop/Portfolio/food-inflation-insights/data/processed/cpi_food_yoy.csv
