<a href="https://colab.research.google.com/github/alisa0040/sales-forecast-ml-pipeline/blob/main/src/features.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd

# Set working directory (in case you're in /content)
import os
os.chdir("/content/sales-forecast-ml-pipeline")

# Load and merge data
train = pd.read_csv("data/train.csv", parse_dates=["Date"], low_memory=False)
store = pd.read_csv("data/store.csv")

df = pd.merge(train, store, on="Store", how="left")

In [7]:
def create_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Generate time-based and domain-specific features for sales forecasting.
    """
    # Make a copy to avoid modifying original
    df = df.copy()

    # Extract date parts
    df["Year"] = df["Date"].dt.year
    df["Month"] = df["Date"].dt.month
    df["Day"] = df["Date"].dt.day
    df["DayOfWeek"] = df["Date"].dt.dayofweek
    df["WeekOfYear"] = df["Date"].dt.isocalendar().week.astype(int)
    df["IsWeekend"] = df["DayOfWeek"].isin([5, 6]).astype(int)

    # Promo2 active flag
    df["Promo2Active"] = 0
    df.loc[
        (df["Promo2"] == 1) & (df["Promo2SinceYear"] > 0),
        "Promo2Active"
    ] = df.apply(
        lambda row: int(
            (row["Year"] > row["Promo2SinceYear"]) or
            (row["Year"] == row["Promo2SinceYear"] and row["WeekOfYear"] >= row["Promo2SinceWeek"])
        ),
        axis=1
    )

    # Competition open duration (in months)
    df["CompetitionOpenMonths"] = (
        (df["Year"] - df["CompetitionOpenSinceYear"]) * 12 +
        (df["Month"] - df["CompetitionOpenSinceMonth"])
    )
    df["CompetitionOpenMonths"] = df["CompetitionOpenMonths"].apply(lambda x: x if x > 0 else 0)

    # Convert categorical columns
    df["StoreType"] = df["StoreType"].astype("category")
    df["Assortment"] = df["Assortment"].astype("category")
    df["StateHoliday"] = df["StateHoliday"].astype("category")

    return df

In [8]:
df = create_features(df)
df.head()

Unnamed: 0,Store,DayOfWeek,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday,StoreType,...,Promo2SinceWeek,Promo2SinceYear,PromoInterval,Year,Month,Day,WeekOfYear,IsWeekend,Promo2Active,CompetitionOpenMonths
0,1,4,2015-07-31,5263,555,1,1,0,1,c,...,,,,2015,7,31,31,0,0,82.0
1,2,4,2015-07-31,6064,625,1,1,0,1,a,...,13.0,2010.0,"Jan,Apr,Jul,Oct",2015,7,31,31,0,1,92.0
2,3,4,2015-07-31,8314,821,1,1,0,1,a,...,14.0,2011.0,"Jan,Apr,Jul,Oct",2015,7,31,31,0,1,103.0
3,4,4,2015-07-31,13995,1498,1,1,0,1,c,...,,,,2015,7,31,31,0,0,70.0
4,5,4,2015-07-31,4822,559,1,1,0,1,a,...,,,,2015,7,31,31,0,0,3.0
