<a href="https://colab.research.google.com/github/Zuhair0000/Retail_Demand_Prediction/blob/main/retail_demand_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import Libraries**

In [192]:
import pandas as pd
import numpy as np

# **Load Dataset**

In [193]:
df = pd.read_csv("dataset.csv")
df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)
df = df.sort_values("Date")

df["year"] = df["Date"].dt.year
df["month"] = df["Date"].dt.month
df["week"] = df["Date"].dt.isocalendar().week.astype(int)

df = df.drop(columns=["Date"])

In [194]:
df = df.sort_values(["Store", "year", "week"])

df["lag_1"] = df.groupby("Store")["Weekly_Sales"].shift(1)
df["lag_4"] = df.groupby("Store")["Weekly_Sales"].shift(4)
df["rolling_4"] = df.groupby("Store")["Weekly_Sales"].rolling(4).mean().reset_index(0, drop=True)

df = df.dropna()

# **train-test split**

In [195]:
train_df = df[df["year"] < 2011]
test_df = df[df["year"] >= 2011]

In [196]:
X_train = train_df.drop(columns=["Weekly_Sales", "Store"])
y_train = train_df["Weekly_Sales"]

X_test = test_df.drop(columns=["Weekly_Sales", "Store"])
y_test = test_df["Weekly_Sales"]

# **Data Preprocessing**

In [197]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [198]:
X_train.columns

Index(['Holiday_Flag', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment',
       'year', 'month', 'week', 'lag_1', 'lag_4', 'rolling_4'],
      dtype='object')

In [199]:
numerical_features = ["year", "month", "week",'Holiday_Flag', 'Temperature', 'Fuel_Price', "CPI", "Unemployment"]

In [200]:
preprocessor = ColumnTransformer(transformers=[
    ("num", Pipeline([
        ("imputer", SimpleImputer(strategy='mean')),
        ("scaler", StandardScaler())
    ]), numerical_features)
])

# **Model Training**

In [201]:
from sklearn.linear_model import LinearRegression
lr = Pipeline([
    ("preprocessing", preprocessor),
    ("model", LinearRegression())
])

lr.fit(X_train, y_train)

In [202]:
lr_pred = lr.predict(X_test)

In [203]:
from sklearn.ensemble import RandomForestRegressor
rf = Pipeline([
    ("preprocessor", preprocessor),
    ("model", RandomForestRegressor(n_estimators=100, random_state=42))
])

rf.fit(X_train, y_train)

In [204]:
rf_pred = rf.predict(X_test)

In [205]:
from xgboost import XGBRegressor
xgb = Pipeline([
    ("preprocessor", preprocessor),
    ("model", RandomForestRegressor(n_estimators=100, random_state=42))
])
xgb.fit(X_train, y_train)

In [206]:
xgb_pred = xgb.predict(X_test)

# **Evaluation**

In [207]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [208]:
def evaluate_model(y_test, y_pred):
  return{
      "MAE": mean_absolute_error(y_test, y_pred),
      "RMSE": np.sqrt(mean_squared_error(y_test, y_pred)),
      "R2": r2_score(y_test, y_pred),
  }

In [209]:
lr_result = evaluate_model(y_test, lr_pred)
lr_result

{'MAE': 272716.86988071515,
 'RMSE': np.float64(370690.3535732414),
 'R2': -4.197030680829534}

In [210]:
rf_result = evaluate_model(y_test, rf_pred)
rf_result

{'MAE': 120761.50017115426,
 'RMSE': np.float64(149113.18368549625),
 'R2': 0.1590604130916463}

In [211]:
xgb_result = evaluate_model(y_test, xgb_pred)
xgb_result

{'MAE': 120761.50017115426,
 'RMSE': np.float64(149113.18368549625),
 'R2': 0.1590604130916463}