<a href="https://colab.research.google.com/github/Zuhair0000/Retail_Demand_Prediction/blob/main/retail_demand_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import Libraries**

In [35]:
import pandas as pd
import numpy as np

# **Load Dataset**

In [36]:
df = pd.read_csv("dataset.csv")
df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)

In [37]:
df = df.sort_values("Date")

df["year"] = df["Date"].dt.year
df["month"] = df["Date"].dt.month
df["week"] = df["Date"].dt.isocalendar().week.astype(int)

df = df.drop(columns=["Date"])

In [38]:
df = df.sort_values(["Store", "year", "week"])

df["lag_1"] = df.groupby("Store")["Weekly_Sales"].shift(1)
df["lag_4"] = df.groupby("Store")["Weekly_Sales"].shift(4)
df["rolling_4"] = df.groupby("Store")["Weekly_Sales"].rolling(4).mean().reset_index(0, drop=True)

df = df.dropna()

# **train-test split**

In [39]:
# train_df = df[df["year"] < 2011]
# test_df = df[df["year"] >= 2011]

# split_idx = int(len(df) * 0.8)  # 80% train, 20% test
# train_df = df.iloc[:split_idx]
# test_df = df.iloc[split_idx:]

train_df = df.groupby("Store").apply(
    lambda x : x.iloc[:int(len(x)*0.8)]
).reset_index(drop=True)
test_df = df.groupby("Store").apply(
    lambda x : x.iloc[int(len(x)*0.8):]
).reset_index(drop=True)

  train_df = df.groupby("Store").apply(
  test_df = df.groupby("Store").apply(


In [40]:
X_train = train_df.drop(columns=["Weekly_Sales", "Store"])
y_train = train_df["Weekly_Sales"]

X_test = test_df.drop(columns=["Weekly_Sales", "Store"])
y_test = test_df["Weekly_Sales"]

# **Data Preprocessing**

In [41]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [42]:
X_train.columns

Index(['Holiday_Flag', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment',
       'year', 'month', 'week', 'lag_1', 'lag_4', 'rolling_4'],
      dtype='object')

In [43]:
numerical_features = ["year", "month", "week",'Holiday_Flag', 'Temperature', 'Fuel_Price', "CPI", "Unemployment", 'lag_1', 'lag_4', 'rolling_4']

In [44]:
preprocessor = ColumnTransformer(transformers=[
    ("num", Pipeline([
        ("imputer", SimpleImputer(strategy='mean')),
        ("scaler", StandardScaler())
    ]), numerical_features)
])

# **Model Training**

In [45]:
from sklearn.linear_model import LinearRegression
lr = Pipeline([
    ("preprocessing", preprocessor),
    ("model", LinearRegression())
])

lr.fit(X_train, y_train)

In [46]:
lr_pred = lr.predict(X_test)

In [47]:
from sklearn.ensemble import RandomForestRegressor
rf = Pipeline([
    ("preprocessor", preprocessor),
    ("model", RandomForestRegressor(n_estimators=100, random_state=42))
])

rf.fit(X_train, y_train)

In [48]:
rf_pred = rf.predict(X_test)

In [49]:
from xgboost import XGBRegressor
xgb = Pipeline([
    ("preprocessor", preprocessor),
    ("model", XGBRegressor(
        n_estimators=300,
        max_depth=5,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        eval_metric="rmse",
        random_state=42))
])
xgb.fit(X_train, y_train)

In [50]:
xgb_pred = xgb.predict(X_test)

# **Evaluation**

In [51]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [52]:
def evaluate_model(y_test, y_pred):
  return{
      "MAE": mean_absolute_error(y_test, y_pred),
      "RMSE": np.sqrt(mean_squared_error(y_test, y_pred)),
      "R2": r2_score(y_test, y_pred),
  }

In [53]:
lr_result = evaluate_model(y_test, lr_pred)
lr_result

{'MAE': 106498.2244891581,
 'RMSE': np.float64(142839.22551517934),
 'R2': 0.568930093964799}

In [54]:
rf_result = evaluate_model(y_test, rf_pred)
rf_result

{'MAE': 104174.44467499977,
 'RMSE': np.float64(148686.79113486002),
 'R2': 0.5329132915038959}

In [55]:
xgb_result = evaluate_model(y_test, xgb_pred)
xgb_result

{'MAE': 77008.73849999999,
 'RMSE': np.float64(93223.83873512012),
 'R2': 0.8163856178250534}