### " Given Price, discount, marketing etc, How well can we predict the number of units sold"

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from joblib import dump

from pathlib import Path

DATA_PROCESSED = Path("../data/processed")
OUT_MODELS = Path("../outputs/models")
OUT_TABLES = Path("../outputs/tables")

OUT_MODELS.mkdir(parents=True, exist_ok=True)
OUT_TABLES.mkdir(parents=True, exist_ok=True)


In [2]:
df = pd.read_csv(DATA_PROCESSED / "model_data.csv")
features = [
    "final_price",
    "discount_percent",
    "marketing_spend",
    "week"
]

target = "units_sold"

X = df[features]
y = df[target]



In [3]:
train_df = df[df["week"] <= 44]
test_df  = df[df["week"] > 44]

X_train = train_df[features]
y_train = train_df[target]

X_test = test_df[features]
y_test = test_df[target]

len(X_train), len(X_test)


(21978, 3998)

### SImple Linear Regression

In [5]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

pred_linear = linear_model.predict(X_test)

linear_mape = mean_absolute_percentage_error(y_test, pred_linear)
linear_mape


53841538093051.06

### Random forest 

In [6]:
rf_model = RandomForestRegressor(
    n_estimators=200,
    max_depth=10,
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)

pred_rf = rf_model.predict(X_test)

rf_mape = mean_absolute_percentage_error(y_test, pred_rf)
rf_mape


52909698865575.11

In [7]:
performance = pd.DataFrame({
    "model": ["Linear Regression", "Random Forest"],
    "MAPE": [linear_mape, rf_mape]
})

performance


Unnamed: 0,model,MAPE
0,Linear Regression,53841540000000.0
1,Random Forest,52909700000000.0


In [8]:
performance.to_csv(
    OUT_TABLES / "prediction_performance.csv",
    index=False
)


In [9]:
best_model = rf_model if rf_mape < linear_mape else linear_model

dump(best_model, OUT_MODELS / "demand_prediction_model.pkl")

print("Saved best prediction model")


Saved best prediction model
