In [None]:
## CREATE MASTER DATASET

import pandas as pd

calendar = pd.read_csv("./data/calendar_afcs2025.csv")
sell = pd.read_csv("./data/sell_prices_afcs2025.csv")
train = pd.read_csv("./data/sales_train_validation_afcs2025.csv")
test_v = pd.read_csv("./data/sales_test_validation_afcs2025.csv")
test_e = pd.read_csv("./data/sales_test_evaluation_afcs_2025.csv")

calendar["d"] = [f"d_{i}" for i in range(1, len(calendar) + 1)]
calendar["date"] = pd.to_datetime(calendar["date"])

def add_keys(df):
    p = df["id"].str.split("_", expand=True)
    df = df.copy()
    df["cat_id"]   = p[0]
    df["dept_id"]  = p[0] + "_" + p[1]
    df["item_id"]  = p[0] + "_" + p[1] + "_" + p[2]
    df["state_id"] = p[3]
    df["store_id"] = p[3] + "_" + p[4]
    df["split"]    = p[5]
    return df

def melt_sales(df, dataset_name):
    df = add_keys(df)
    dcols = [c for c in df.columns if c.startswith("d_")]
    long = df.melt(
        id_vars=["id","cat_id","dept_id","item_id","state_id","store_id","split"],
        value_vars=dcols, var_name="d", value_name="sales"
    )
    long["dataset"] = dataset_name
    return long

master = pd.concat([
    melt_sales(train, "train_validation"),
    melt_sales(test_v, "test_validation"),
    melt_sales(test_e, "test_evaluation"),
], ignore_index=True)

master = master.merge(calendar, on="d", how="left")
master = master.merge(sell, on=["store_id","item_id","wm_yr_wk"], how="left")
master["revenue"] = master["sales"] * master["sell_price"]

master.to_csv("./outputs/master_dataset.csv", index=False)


In [None]:
master