In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
filepath = "you dataset path here...!"
df = pd.read_csv(filepath)
print(df.head())

In [None]:
print(df.isnull().sum())

In [None]:
print(df.info())

In [None]:
df["Date"] = pd.to_datetime(df["Date"], format="%d-%m-%Y")
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month
df["Week"] = df["Date"].dt.isocalendar().week
df["Day"] = df["Date"].dt.day
df["DayofWeek"] = df["Date"].dt.dayofweek
df["isWeekend"] = df["DayofWeek"].apply(lambda x: 1 if x >= 5 else 0)

print(df.head())

In [None]:
print(df.isnull().sum())

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df["Date"], df["Weekly_Sales"], label="Sales Trend", color="blue")
plt.xlabel("Date")
plt.ylabel("Weekly_Sales")
plt.title("Sales Trend Over Time")
plt.legend()
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler

scalar = MinMaxScaler()
df[["Weekly_Sales"]] = scalar.fit_transform(df[["Weekly_Sales"]])

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop(columns= ["Weekly_Sales", "Date"])
y = df["Weekly_Sales"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training Data Shape : ", X_train.shape)
print("Testing Data Shape : ", X_test.shape)

In [None]:
pip install xgboost

In [None]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

#initializing and training xgboost model
xgb_model = XGBRegressor(n_estimators= 200, learning_rate= 0.05, max_depth=6, random_state=42)
xgb_model.fit(X_train, y_train)

#making predictions
y_pred_xgb = xgb_model.predict(X_test)

#evaluating model
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
mse_xgb = root_mean_squared_error(y_test, y_pred_xgb)

print(f"XGBoost Reuslts: ")
print(f"MAE: {mae_xgb: .2f}")
print(f"MSE: {mse_xgb: .2f}")

In [None]:
print("Mean Sales: ", df["Weekly_Sales"].mean())

In [None]:
mape= (mae_xgb / df["Weekly_Sales"].mean()) * 100
print(f"Mean Absolute Percentage Error : (mape) {mape: .2f}" )

In [None]:
pip install optuna

In [None]:
import optuna
from sklearn.model_selection import cross_val_score

#optimization function
def objective(trial):
    #suggest hyperparameters
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 500, step=50),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
    }

    #training xgboost model with suggested parameters
    model = XGBRegressor(**params, random_state=42)
    model.fit(X_train, y_train)

    #predicting and calculating RMSE
    y_pred = model.predict(X_test)
    rmse = root_mean_squared_error(y_test, y_pred)

    return rmse

In [None]:
study = optuna.create_study(direction="minimize") #we want to minimize rmse
study.optimize(objective, n_trials=30)

#best parameters found
print("Best Hyperparameters: ", study.best_params)

In [None]:
#getting best params from optuna
best_params = study.best_params

#training optimized xgboost model
xgb_optimized = XGBRegressor(**best_params, random_state=42)
xgb_optimized.fit(X_train, y_train)

#making predicitons
y_pred_xgb_opt = xgb_optimized.predict(X_test)

#evaluating optimized model
mae_opt = mean_absolute_error(y_test, y_pred_xgb_opt)
rmse_opt = root_mean_squared_error(y_test, y_pred_xgb_opt)

#result
print("Optimized XGBoost Model Results: ")
print(f"MAE: {mae_opt: .2f}")
print(f"RMSE: {rmse_opt: .2f}")

In [None]:
pip install tensorflow

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

#Sorting data by date
df = df.sort_values(by=["Date"])

#scaling sales data
scaler = MinMaxScaler()
df["Weekly_Sales"] = scalar.fit_transform(df[["Weekly_Sales"]])

#function to create sequence
def create_sequence(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+ seq_length]) #past sequence
        y.append(data[i+ seq_length]) #next value
    return np.array(X), np.array(y)

#defining sequence length
SEQ_LENGTH = 30 

#preparing dataset
X, y = create_sequence(df["Weekly_Sales"].values, SEQ_LENGTH)

#reshaping for LSTM (samples, timestamps, features)
X = X.reshape((X.shape[0], X.shape[1], 1))

#spliting into training and tesdting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training data shape: ", X_train.shape)
print("Testing data shape: ", X_test.shape)

In [None]:
#defining LSTM model 
FEATURES = 1

model = Sequential([
    Input(shape=(SEQ_LENGTH, FEATURES)),
    LSTM(50, return_sequences=True), #first lstm layer
    Dropout(0.2),
    LSTM(50, return_sequences=False), #second layer
    Dropout(0.2),
    Dense(25), #Dense layer
    Dense(1) #output layer
])

#compiling model
model.compile(optimizer="adam", loss="mse")

#Model summary
model.summary()

In [None]:
#training model 
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

In [None]:
# Initialize and fit the scaler
scaler = MinMaxScaler()
scaler.fit(y_train.reshape(-1, 1))  # Fit it using training labels

# Transform training and test data
y_train_scaled = scaler.transform(y_train.reshape(-1, 1))
y_test_scaled = scaler.transform(y_test.reshape(-1, 1))

# Make predictions
y_pred_lstm = model.predict(X_test)

# Now inverse transform will work
y_test_rescaled = scaler.inverse_transform(y_test_scaled)
y_pred_rescaled = scaler.inverse_transform(y_pred_lstm)

#plot actual vs predicted sales
plt.figure(figsize=(10,5))
plt.plot(y_test_rescaled, label="Actual Sales", color="blue")
plt.plot(y_pred_rescaled, label="Predicted Sales", color="red")
plt.xlabel("Times")
plt.ylabel("Sales")
plt.title("LSTM Sales Forecasting")
plt.legend()
plt.show()         

In [None]:
import joblib

joblib.dump(model, "sales_forecasting_model.pkl")

model.save("sales_forecasting_model.keras")
print("Model Saved Successfully")