# Neural Network trails

In this notebook, We will train various neural networks on the data.

In [1]:
# Import the required libraries
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torchinfo import summary
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import root_mean_squared_error

# Set the device to if available else cpu
device = "cuda" if torch.cuda.is_available() else "cpu"

### Import the datasets

In [2]:
# Import the dataset
train = pd.read_csv("../Dataset/processed/train.csv")

# convert the timestamp to datetime format
train["TimeStamp"] = pd.to_datetime(train["TimeStamp"])

# Set the timestamp as the index
train = train.set_index("TimeStamp")
print(train.shape)
# Get first 5 rows of the dataset
train.head()


(38373, 59)


Unnamed: 0_level_0,Temperature,Rainfall_last_hour,Snowfall_last_hour,Cloud_Cover,Traffic_Vol,Holiday_No,Holiday_Yes,Weather_Airborne particles,Weather_Airborne smoke,Weather_Clear skies,...,Weather_Desc_Strong drizzle,Weather_Desc_Sudden windstorm,Weather_Desc_Torrential downpour,Day_Friday,Day_Monday,Day_Saturday,Day_Sunday,Day_Thursday,Day_Tuesday,Day_Wednesday
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-02-10 09:00:00,260.1769,0.693147,0.693147,40,5555.0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2008-02-10 10:00:00,292.7521,0.693147,0.693147,75,4525.0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2008-02-10 11:00:00,293.4369,0.693147,0.693147,90,4772.0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2008-02-10 12:00:00,295.1524,0.693147,0.693147,90,5031.0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2008-02-10 13:00:00,360.6201,0.693147,0.693147,75,4928.0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [3]:
train.tail()

Unnamed: 0_level_0,Temperature,Rainfall_last_hour,Snowfall_last_hour,Cloud_Cover,Traffic_Vol,Holiday_No,Holiday_Yes,Weather_Airborne particles,Weather_Airborne smoke,Weather_Clear skies,...,Weather_Desc_Strong drizzle,Weather_Desc_Sudden windstorm,Weather_Desc_Torrential downpour,Day_Friday,Day_Monday,Day_Saturday,Day_Sunday,Day_Thursday,Day_Tuesday,Day_Wednesday
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-06-30 19:00:00,796.9329,0.693147,0.693147,90,3252.0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2014-06-30 20:00:00,736.0369,0.693147,0.693147,91,2941.0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2014-06-30 21:00:00,746.9289,0.693147,0.693147,91,2703.0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2014-06-30 22:00:00,672.3649,0.693147,0.693147,75,2762.0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2014-06-30 23:00:00,628.5049,0.693147,0.693147,40,2023.0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [4]:
# Import the test dataset
test = pd.read_csv("../Dataset/processed/test.csv").drop("Traffic_Vol", axis=1)

# Convert the timestamp to datetime format
test["TimeStamp"] = pd.to_datetime(test["TimeStamp"])

# Set the timestamp as the index
test = test.set_index("TimeStamp")

# View the first 5 rows of the test data
test.head()

Unnamed: 0_level_0,Temperature,Rainfall_last_hour,Snowfall_last_hour,Cloud_Cover,Holiday_No,Holiday_Yes,Weather_Airborne particles,Weather_Airborne smoke,Weather_Clear skies,Weather_Cloudy skies,...,Weather_Desc_Strong drizzle,Weather_Desc_Sudden windstorm,Weather_Desc_Torrential downpour,Day_Friday,Day_Monday,Day_Saturday,Day_Sunday,Day_Thursday,Day_Tuesday,Day_Wednesday
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-07 00:00:00,24.5,0.693147,0.0,1,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
2014-01-07 01:00:00,24.3,0.693147,0.0,1,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
2014-01-07 02:00:00,23.6,0.693147,0.0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2014-01-07 03:00:00,23.27,0.693147,0.0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2014-01-07 04:00:00,22.41,0.693147,0.0,40,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [5]:
test.tail()

Unnamed: 0_level_0,Temperature,Rainfall_last_hour,Snowfall_last_hour,Cloud_Cover,Holiday_No,Holiday_Yes,Weather_Airborne particles,Weather_Airborne smoke,Weather_Clear skies,Weather_Cloudy skies,...,Weather_Desc_Strong drizzle,Weather_Desc_Sudden windstorm,Weather_Desc_Torrential downpour,Day_Friday,Day_Monday,Day_Saturday,Day_Sunday,Day_Thursday,Day_Tuesday,Day_Wednesday
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-09-30 19:00:00,10.7,0.693147,0.0,75,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
2014-09-30 20:00:00,9.91,0.693147,0.0,90,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
2014-09-30 21:00:00,9.78,0.693147,0.0,90,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2014-09-30 22:00:00,9.64,0.693147,0.0,90,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
2014-09-30 23:00:00,9.77,0.693147,0.0,90,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0


The test set contains 3 months of data from July to September

### Splitting the data 

In [6]:
# Split the data into features and labels
features, target = train.drop("Traffic_Vol", axis=1).values, train["Traffic_Vol"].values

# Split the data into training and test sets
split_size = int(0.8 * (len(train)))
X_train, X_val, y_train, y_val = features[:split_size], features[split_size:], target[:split_size], target[split_size:]

# Check the shapes of training and test sets
X_train.shape, y_train.shape, X_val.shape, y_val.shape

((30698, 58), (30698,), (7675, 58), (7675,))

## Create Dataset and DataLoaders

In [7]:
# Creating the dataset
class TrafficDataset(Dataset):
    def __init__(self, features, target):
        self.features = features
        self.target = target

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        
        # Convert the sample to torch tensors
        X, y = torch.tensor(self.features[idx], dtype=torch.float32), torch.tensor(self.target[idx], dtype=torch.float32)
        
        return X, y

In [8]:
# Test the dataset
train_dataset = TrafficDataset(X_train, y_train)

In [9]:
train_dataset[0]

(tensor([260.1769,   0.6931,   0.6931,  40.0000,   1.0000,   0.0000,   0.0000,
           0.0000,   0.0000,   1.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   1.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   1.0000,   0.0000,
           0.0000,   0.0000]),
 tensor(5555.))

In [10]:
# Create train and test datasets
train_data = TrafficDataset(X_train, y_train)
val_data = TrafficDataset(X_val, y_val)

In [11]:
# Create DataLoaders for train val and test sets
BATCH_SIZE = 128
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=False)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False)

## Modelling

### Create a `train` and `evaluate` helper function

In [12]:
# Create the train step function

def train_step(model: nn.Module, dataloader: DataLoader, loss_fn: nn.Module, optimizer: torch.optim.Optimizer, device = device):
    
    epoch_train_loss = 0
    
    # Model dot train
    model.train()

    # Itereate through the DataLoader
    for X, y in tqdm(dataloader, desc="Training", leave=False):

        # Move the tensors to the target device
        X, y = X.to(device), y.to(device)

        # Do the forward pass
        outputs = model(X).squeeze()

        # Calculate the loss
        loss = loss_fn(outputs, y)
        
        epoch_train_loss += loss.item()
        
        # optimizer zero grad
        optimizer.zero_grad()

        # loss backward
        loss.backward()
        
        # optimizer step
        optimizer.step()

    return epoch_train_loss / len(dataloader)
        

In [13]:
# Create the test step function

def test_step(model: nn.Module, dataloader: DataLoader, loss_fn: nn.Module, device: str=device):

    ### Test time !
    epoch_test_loss = 0
    
    # Call model dot eval
    model.eval()

    # with torch inference modee
    with torch.inference_mode():

        # Iterate through the dataloader
        for X, y in tqdm(dataloader, desc="Testing", leave=False):

            # Move the tensors to the target device
            X, y = X.to(device), y.to(device)
            
            # Do the forward pass
            outputs = model(X).squeeze()

            # Calculate the loss
            loss = loss_fn(outputs, y)
            epoch_test_loss += loss.item()

    return epoch_test_loss / len(dataloader)

In [16]:
# Now create the optimization loop function

def train(model: nn.Module,
         train_loader: DataLoader,
         val_loader: DataLoader,
         loss_fn: nn.Module,
         optimizer: torch.optim.Optimizer,
         epochs: int = 5,
         device: str = device):

    # for epoch in arange
    for epoch in tqdm(range(epochs), desc="Epochs"):

        # Do the train step
        train_loss = train_step(model, train_loader, loss_fn, optimizer, device)

        # Do the test step
        test_loss = test_step(model, val_loader, loss_fn, device)

        print(f"Epoch: {epoch} | Train Loss: {np.sqrt(train_loss)} | Test Loss: {np.sqrt(test_loss)}")