# 03 - Feature Engineering

This notebook creates new features from the cleaned Walmart dataset to improve model performance.

In [1]:
# Step 1: Load cleaned data
import os
import pandas as pd

cwd = os.getcwd()
base_path = os.path.abspath(os.path.join(cwd, "..")) if cwd.endswith("notebooks") else cwd
cleaned_path = os.path.join(base_path, "data", "processed", "walmart_cleaned.csv")

df = pd.read_csv(cleaned_path)
df['date'] = pd.to_datetime(df['date'])


In [2]:
# Step 2: Create time-based features
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['week'] = df['date'].dt.isocalendar().week
df['dayofweek'] = df['date'].dt.dayofweek
df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int)


In [3]:
# Step 3: Create lag features
df = df.sort_values(by=['store', 'date'])
df['weekly_sales_lag1'] = df.groupby('store')['weekly_sales'].shift(1)


In [4]:
# Step 4: Create interaction features
df['temp_fuel_interaction'] = df['temperature'] * df['fuel_price']


In [5]:
# Step 5: Save engineered dataset

import os

# Define base path relative to repo root
base_path = os.getcwd()  # ensures portability in VS Code, Colab, GitHub Actions

# Ensure processed folder exists
processed_dir = os.path.join(base_path, "data", "processed")
os.makedirs(processed_dir, exist_ok=True)

# Define output path
engineered_path = os.path.join(processed_dir, "walmart_features.csv")

# Save engineered dataset
df.to_csv(engineered_path, index=False)

print(f"Engineered dataset saved successfully at: {engineered_path}")
print(f"Total samples: {len(df)} | Columns: {list(df.columns)}")



Engineered dataset saved successfully at: c:\Users\Emron nabizadeh\Documents\Data-analyst\Project\walmart-sales-forecasting\notebooks\data\processed\walmart_features.csv
Total samples: 2565 | Columns: ['store', 'date', 'weekly_sales', 'holiday_flag', 'temperature', 'fuel_price', 'cpi', 'unemployment', 'year', 'month', 'week', 'dayofweek', 'is_weekend', 'weekly_sales_lag1', 'temp_fuel_interaction']


In [6]:
# Step 6: Summary
# - Extracted time-based features: year, month, week, dayofweek, is_weekend
# - Created lag feature: weekly_sales_lag1
# - Added interaction feature: temp Ã— fuel_price
# - Saved dataset to data/processed/walmart_features.csv
