# Modelling Trails - Deep Learning

In this notebook, we will train various deep learning models using PyTorch. We will compare their performance and select the best model for forecasting sales quantity.


In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import torch
from torch import nn
from tqdm.auto import tqdm
from torch.utils.data import Dataset, DataLoader
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

import os
os.chdir("../")
from src.utils.data_ingestion import download_dataset, extract_dataset, load_data 

In [2]:
download_dataset()
extract_dataset()

df = load_data("Data/Dataset/Train.csv")
df.head()

INFO:root:Dataset already exists in the data directory.
INFO:root:Dataset loaded successfully.


Unnamed: 0,Date,Category,Brand,Day_of_Week,Holiday_Indicator,Past_Purchase_Trends,Price,Discount,Competitor_Price,Sales_Quantity
0,26/09/24,Automotive,BrandA,3,0,17.9,462.3,33.96,359.45,227.0
1,26/09/24,Automotive,BrandA,3,0,66.3,321.28,24.76,49.47,370.0
2,26/09/24,Automotive,BrandA,3,0,14.4,74.85,32.19,245.11,299.0
3,26/09/24,Automotive,BrandA,3,0,34.7,469.12,43.68,144.08,426.0
4,22/09/24,Automotive,BrandA,6,1,93.5,221.76,36.79,478.07,470.0


In [3]:
df.isna().sum()

Date                      0
Category                  0
Brand                     0
Day_of_Week               0
Holiday_Indicator         0
Past_Purchase_Trends      0
Price                     0
Discount                  0
Competitor_Price          0
Sales_Quantity          464
dtype: int64

In [4]:
df.dropna(inplace=True)

### Data Preprocessing

#### Splitting the data

In [5]:
# Split the dataset into training and validation sets

train_size = int(len(df) * 0.8)
val_size = len(df) - train_size
train_data, val_data = df.iloc[:train_size], df.iloc[train_size:]

# View the shapes
train_data.shape, val_data.shape

((14415, 10), (3604, 10))

#### Creating Preprocessor for Features

In [6]:
# Splitting the columns into numerical and categorical columns
num_columns =  list(df.select_dtypes(include="number").columns)
col_columns = list(df.select_dtypes(exclude="number").columns)

num_columns, col_columns

(['Day_of_Week',
  'Holiday_Indicator',
  'Past_Purchase_Trends',
  'Price',
  'Discount',
  'Competitor_Price',
  'Sales_Quantity'],
 ['Date', 'Category', 'Brand'])

In [7]:
# Define numerical transformer 
numerical_transformer = Pipeline(
    steps = [
        ('scaler', StandardScaler())
    ]
)

# Define categorical transformer
categorical_transformer = Pipeline(
    steps=[
        ('one_hot_encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
    ]
)

# Create preprocessor
feature_preprocessor = ColumnTransformer(
    transformers=[
        ('num_transformer', numerical_transformer, num_columns),
        ('cat_transformer', categorical_transformer, col_columns)
    ]
)

In [8]:
# Transforms the data
train_features = feature_preprocessor.fit_transform(train_data)
val_features = feature_preprocessor.transform(val_data)

#### Target transformer


In [9]:
# Create target transformer
target_transformer = Pipeline(
   steps=[
        ('target_scaler', StandardScaler())
    ]
    )

# Transforms the target data

train_target = target_transformer.fit_transform(np.array(train_data["Sales_Quantity"]).reshape(-1, 1)).squeeze()
val_target = target_transformer.transform(np.array(val_data["Sales_Quantity"]).reshape(-1, 1)).squeeze()

In [10]:
train_target.shape

(14415,)

In [11]:
train_target[:5]

array([-0.20829116,  0.80290733,  0.30084375,  1.19890114,  1.51003914])

#### Creating windowed dataset

In [12]:
def create_windowed_dataset(features: np.ndarray, target: np.ndarray, window_size: int = 7, forecast_steps: int = 1):
    """
    Creates a windowed dataset for a given feature and target array.
    
    Args:
        features (np.ndarray): Input features.
        target (np.ndarray): Target values.
        window_size (int): Size of the window.
        forecast_steps (int): Number of steps to forecast ahead.
        
    Returns:
        X (np.ndarray): Windowed features.
        y (np.ndarray): Windowed target values.
    """
    X = []
    y = []
    for i in range(len(features) - window_size - forecast_steps + 1):
        X.append(features[i:i + window_size])
        y.append(target[i + window_size:i + window_size + forecast_steps])
    
    return np.array(X), np.array(y)

In [13]:
X_train, y_train = create_windowed_dataset(np.array(train_features), train_target, window_size=7, forecast_steps=1)
X_val, y_val = create_windowed_dataset(np.array(val_features),val_target, window_size=7, forecast_steps=1)

In [14]:
X_train.shape, y_train.shape

((14408, 7, 358), (14408, 1))

#### Creating DataLoaders

In [15]:
# Create Dataset
class SalesDataset(Dataset):
    def __init__(self, features: np.ndarray, target: np.ndarray):
        super().__init__()
        self.features =features 
        self.target = target
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx: int):
        return torch.tensor(self.features[idx], dtype=torch.float32), torch.tensor(self.target[idx], dtype=torch.float32)

In [16]:
# Create Datset and Dataloaders

train_dataset = SalesDataset(X_train, y_train)
val_dataset = SalesDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [17]:
for X, y in train_dataset:
    print(X.shape, y.shape)
    break

torch.Size([7, 358]) torch.Size([1])


## Data Preparation for Deep Learning models


## Create a script for training the dataset


In [18]:
import torch
import numpy as np
from torch import nn
from torch.utils.data import DataLoader
from tqdm import tqdm

def train(model: nn.Module, train_loader: DataLoader, val_loader: DataLoader, loss_fn: nn.Module, optimizer: torch.optim.Optimizer, epochs: int = 10):
    """ Trains the model on the dataset
        Args:  
            model (nn.Module): model to train the model
            train_loader (DataLoader): Training DataLoader
            val_loader (DataLoader): Validation DataLoader
            loss_fn (nn.Module): Loss function
            optimizer (torch.optim.Optimizer): Optimizer
            epochs (int): Number of epochs to train the model
        Returns:
            None
    """

    for epoch in tqdm(range(epochs), desc="Epochs: "):
        train_loss, test_loss = 0.0, 0.0

        model.train()
        ### Training loss
        for X, y in tqdm(train_loader, desc="Training...", leave=False):
            outputs = model(X)
            loss = loss_fn(outputs, y)
            train_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        #### Testing time
        model.eval()

        with torch.inference_mode():
            for X, y in tqdm(val_loader, desc="Evaluating...", leave=False):
                outputs = model(X)
                test_loss += loss_fn(outputs, y).item()

        train_loss /= len(train_loader)
        test_loss /= len(val_loader)

        print(f"Epoch: {epoch + 1}/{epochs} | Train Loss: {np.sqrt(train_loss):.4f} | Test Loss : {np.sqrt(test_loss):.4f}")

### Models


In [19]:
hyperparams = {
    'input_size' : train_features.shape[1],
    'hidden_size': 128,
    'num_layers': 2,
    'batch_size' : 32,
    'forecast_steps': 1,
    'num_epochs': 10
}

In [20]:
class LSTMModel(nn.Module):
        def __init__(self, input_size, hidden_size, num_layers, batch_size, forecast_steps):
            super().__init__()
            self.hidden_size = hidden_size
            self.num_layers = num_layers
            self.batch_size = batch_size
            self.forecast_steps = forecast_steps
            self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True)
            self.relu = nn.ReLU()
            self.fc = nn.Linear(hidden_size, forecast_steps)

        def forward(self, x):
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
            out, _ = self.lstm(x, (h0, c0))
            out = self.relu(out)
            out = self.fc(out[:, -1, :])
            return out

In [21]:
# Create the model and train it

model = LSTMModel(input_size=hyperparams['input_size'], num_layers =hyperparams['num_layers'], hidden_size=hyperparams['hidden_size'], batch_size=hyperparams['batch_size'], forecast_steps=hyperparams['forecast_steps'])
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, train_loader, val_loader, loss_fn, optimizer, epochs=hyperparams['num_epochs'])

Epochs:  10%|█         | 1/10 [00:07<01:04,  7.13s/it]

Epoch: 1/10 | Train Loss: 1.0007 | Test Loss : 0.9909


Epochs:  20%|██        | 2/10 [00:17<01:10,  8.86s/it]

Epoch: 2/10 | Train Loss: 0.9988 | Test Loss : 0.9923


Epochs:  30%|███       | 3/10 [00:26<01:03,  9.01s/it]

Epoch: 3/10 | Train Loss: 0.9918 | Test Loss : 0.9936


Epochs:  40%|████      | 4/10 [00:33<00:48,  8.16s/it]

Epoch: 4/10 | Train Loss: 0.9763 | Test Loss : 0.9997


Epochs:  50%|█████     | 5/10 [00:40<00:38,  7.73s/it]

Epoch: 5/10 | Train Loss: 0.9549 | Test Loss : 1.0216


Epochs:  60%|██████    | 6/10 [00:47<00:30,  7.54s/it]

Epoch: 6/10 | Train Loss: 0.9227 | Test Loss : 1.0524


Epochs:  70%|███████   | 7/10 [00:56<00:24,  8.11s/it]

Epoch: 7/10 | Train Loss: 0.8842 | Test Loss : 1.0685


Epochs:  80%|████████  | 8/10 [01:03<00:15,  7.70s/it]

Epoch: 8/10 | Train Loss: 0.8402 | Test Loss : 1.1203


Epochs:  90%|█████████ | 9/10 [01:10<00:07,  7.42s/it]

Epoch: 9/10 | Train Loss: 0.7883 | Test Loss : 1.1663


Epochs: 100%|██████████| 10/10 [01:16<00:00,  7.70s/it]

Epoch: 10/10 | Train Loss: 0.7517 | Test Loss : 1.1876



