In [None]:
import numpy as np
import pandas as pd

from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

all_data = pd.read_csv("train.csv")
all_data.rename(columns={"Unnamed: 0": "id"}, inplace=True)
all_data["period_start_dt"] = pd.to_datetime(all_data["period_start_dt"])

all_data = all_data.drop(columns=["PROMO2_FLAG", "NUM_CONSULTANT"])

all_data["PROMO1_FLAG"] = all_data["PROMO1_FLAG"].fillna(
    all_data["PROMO1_FLAG"].mode()[0]
)

all_data = all_data.sort_values(
    ["product_rk", "store_location_rk", "period_start_dt"]
)

for col in ["PRICE_REGULAR", "PRICE_AFTER_DISC", "AUTORIZATION_FLAG"]:
    all_data[col] = (
        all_data
        .groupby(["product_rk", "store_location_rk"])[col]
        .transform(lambda s: s.ffill().bfill())
    )
    all_data[col] = all_data[col].fillna(all_data[col].median())

all_data["discount_abs"] = all_data["PRICE_REGULAR"] - all_data["PRICE_AFTER_DISC"]
all_data["discount_pct"] = (
    all_data["discount_abs"] / all_data["PRICE_REGULAR"].replace(0, np.nan)
)
all_data["discount_pct"] = all_data["discount_pct"].fillna(0)

dt = all_data["period_start_dt"]
all_data["dow"] = dt.dt.weekday
all_data["weekofyear"] = dt.dt.isocalendar().week.astype(int)
all_data["month"] = dt.dt.month
all_data["year"] = dt.dt.year
all_data["dayofyear"] = dt.dt.dayofyear

train_df = all_data[all_data["demand"].notna()].copy()
test_df = all_data[all_data["demand"].isna()].copy()

feature_cols = [
    c for c in train_df.columns
    if c not in ["id", "demand", "period_start_dt"]
]

X = train_df[feature_cols]
y = train_df["demand"]

X_tr, X_val, y_tr, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = HistGradientBoostingRegressor(
    max_depth=6,
    learning_rate=0.05,
    max_iter=400,
    l2_regularization=0.0,
    random_state=42
)

model.fit(X_tr, y_tr)
val_pred = model.predict(X_val)

model.fit(X, y)

X_test = test_df[feature_cols]
test_pred = model.predict(X_test)

test_pred = np.maximum(test_pred, 0)

sample_sub = pd.read_csv("sample_submission.csv")

pred_df = pd.DataFrame({
    "id": test_df["id"].values,
    "predicted": test_pred
})

submission = (
    sample_sub.drop(columns=["predicted"])
    .merge(pred_df, on="id", how="left")
)

submission["predicted"] = submission["predicted"].fillna(0).astype(float)

submission.to_csv("submission.csv", index=False)
