<a href="https://colab.research.google.com/github/Zuhair0000/Retail_Demand_Prediction/blob/main/retail_demand_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import Libraries**

In [297]:
import pandas as pd
import numpy as np

# **Load Dataset**

In [298]:
df = pd.read_csv("dataset.csv")
df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)

In [299]:
df = df.sort_values("Date")

df["year"] = df["Date"].dt.year
df["month"] = df["Date"].dt.month
df["week"] = df["Date"].dt.isocalendar().week.astype(int)

df = df.drop(columns=["Date"])

In [300]:
df = df.sort_values(["Store", "year", "week"])

df["lag_1"] = df.groupby("Store")["Weekly_Sales"].shift(1)
df["lag_4"] = df.groupby("Store")["Weekly_Sales"].shift(4)
df["rolling_4"] = df.groupby("Store")["Weekly_Sales"].rolling(4).mean().reset_index(0, drop=True)

df = df.dropna()

# **train-test split**

In [301]:
# train_df = df[df["year"] < 2011]
# test_df = df[df["year"] >= 2011]

split_idx = int(len(df) * 0.8)  # 80% train, 20% test
train_df = df.iloc[:split_idx]
test_df = df.iloc[split_idx:]

In [302]:
X_train = train_df.drop(columns=["Weekly_Sales", "Store"])
y_train = train_df["Weekly_Sales"]

X_test = test_df.drop(columns=["Weekly_Sales", "Store"])
y_test = test_df["Weekly_Sales"]

# **Data Preprocessing**

In [303]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [304]:
X_train.columns

Index(['Holiday_Flag', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment',
       'year', 'month', 'week', 'lag_1', 'lag_4', 'rolling_4'],
      dtype='object')

In [305]:
numerical_features = ["year", "month", "week",'Holiday_Flag', 'Temperature', 'Fuel_Price', "CPI", "Unemployment"]

In [306]:
preprocessor = ColumnTransformer(transformers=[
    ("num", Pipeline([
        ("imputer", SimpleImputer(strategy='mean')),
        ("scaler", StandardScaler())
    ]), numerical_features)
])

# **Model Training**

In [307]:
from sklearn.linear_model import LinearRegression
lr = Pipeline([
    ("preprocessing", preprocessor),
    ("model", LinearRegression())
])

lr.fit(X_train, y_train)

In [308]:
lr_pred = lr.predict(X_test)

In [309]:
from sklearn.ensemble import RandomForestRegressor
rf = Pipeline([
    ("preprocessor", preprocessor),
    ("model", RandomForestRegressor(n_estimators=100, random_state=42))
])

rf.fit(X_train, y_train)

In [310]:
rf_pred = rf.predict(X_test)

In [311]:
from xgboost import XGBRegressor
xgb = Pipeline([
    ("preprocessor", preprocessor),
    ("model", RandomForestRegressor(n_estimators=100, random_state=42))
])
xgb.fit(X_train, y_train)

In [312]:
xgb_pred = xgb.predict(X_test)

# **Evaluation**

In [313]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [314]:
def evaluate_model(y_test, y_pred):
  return{
      "MAE": mean_absolute_error(y_test, y_pred),
      "RMSE": np.sqrt(mean_squared_error(y_test, y_pred)),
      "R2": r2_score(y_test, y_pred),
  }

In [315]:
lr_result = evaluate_model(y_test, lr_pred)
lr_result

{'MAE': 127647.62776025906,
 'RMSE': np.float64(239241.60867061958),
 'R2': -0.20927802030779774}

In [316]:
rf_result = evaluate_model(y_test, rf_pred)
rf_result

{'MAE': 109312.57308499985,
 'RMSE': np.float64(153349.87132573695),
 'R2': 0.5031565564591896}

In [317]:
xgb_result = evaluate_model(y_test, xgb_pred)
xgb_result

{'MAE': 109312.57308499985,
 'RMSE': np.float64(153349.87132573695),
 'R2': 0.5031565564591896}