In [20]:
import numpy as np
import pandas as pd

In [6]:
path = "path/weighted_cpi.csv"

In [7]:
weighted_cpi= pd.read_csv(path)
weighted_cpi.head()

Unnamed: 0,Date,Weighted_CPI
0,2016-09-01,131.513916
1,2016-10-01,132.03254
2,2016-11-01,131.719926
3,2016-12-01,130.562107
4,2017-01-01,130.23316


In [8]:
#to format date to datetime
weighted_cpi["Date"] = pd.to_datetime(weighted_cpi["Date"])

# Sorting by date
weighted_cpi = weighted_cpi.sort_values("Date").reset_index(drop=True)

1. Month-over-month percentage change:
Captures short-term trends and momentum, helping models understand recent growth or decline in values.

In [11]:
# 1. Month-over-month percentage change
weighted_cpi["CPI_pct_change"] = weighted_cpi["Weighted_CPI"].pct_change() * 100

2. Year-over-year (YoY) percentage change:
Accounts for seasonality, allowing comparison with the same period in the previous year to identify repeating annual patterns.

In [12]:
# 2. Year-over-year (YoY) percentage change (12 months difference)
weighted_cpi["CPI_yoy_change"] = weighted_cpi["Weighted_CPI"].pct_change(periods=12) * 100

3. Month and Year extraction:
Enables models to learn seasonal and temporal patterns, such as higher CPI in certain months or inflation trends over years.

In [13]:
# 3. Month and Year extraction
weighted_cpi["Month"] = weighted_cpi["Date"].dt.month
weighted_cpi["Year"] = weighted_cpi["Date"].dt.year

4. Is start of year? (January = 1):
Highlights new-year effects, such as policy changes, resets in business cycles, or budget impacts that may influence values.

In [14]:
# 4. Is start of year? (January = 1)
weighted_cpi["Is_start_of_year"] = (weighted_cpi["Month"] == 1).astype(int)

5. Lag features (1, 2, 3 months):
Introduces past dependencies which are crucial for forecasting models to recognize temporal continuity and inertia.

In [15]:
# 5. Lag features (1, 2, 3 months)
for lag in [1, 2, 3]:
    weighted_cpi[f"Lag_{lag}"] = weighted_cpi["Weighted_CPI"].shift(lag)

6. Rolling statistics (mean and std over past 3 months):
Smooths out short-term fluctuations and captures local trends and volatility, which improves model stability and understanding of recent context.

In [16]:
# 6. Rolling statistics (mean and std over past 3 months)
weighted_cpi["CPI_roll_mean_3"] = weighted_cpi["Weighted_CPI"].rolling(window=3).mean()
weighted_cpi["CPI_roll_std_3"] = weighted_cpi["Weighted_CPI"].rolling(window=3).std()

7. Cyclical month features (e.g., sin/cos encoding):
Preserves the circular nature of months (December to January), unlike plain numeric month values, improving model's ability to detect periodicity.

In [17]:
# 7. Cyclical month features (optional, useful for some models)
weighted_cpi["Month_sin"] = np.sin(2 * np.pi * weighted_cpi["Month"] / 12)
weighted_cpi["Month_cos"] = np.cos(2 * np.pi * weighted_cpi["Month"] / 12)

8. Drop rows with NaNs due to lag/rolling calculations:
Ensures clean, complete data for model training, preventing errors or bias from missing values introduced by time-based feature creation.

In [18]:
# 8. Drop rows with NaNs due to lag/rolling calculations
weighted_cpi = weighted_cpi.dropna().reset_index(drop=True)

In [19]:
#to save the feature-rich dataset for modelling
weighted_cpi.to_csv("path/featured_cpi.csv", index=False)