In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import joblib

# For reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check if a GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
X_train_final = np.load("../processed/X_train_final.npy")
X_test_final = np.load("../processed/X_test_final.npy")
y_train_df = pd.read_csv("../processed/y_train.csv")
y_train = y_train_df["time_taken_minutes"].values

print(f"Shape of training features: {X_train_final.shape}")
print(f"Shape of training target: {y_train.shape}")

Shape of training features: (45593, 25)
Shape of training target: (45593,)


In [5]:
class DeliveryTimeDataset(Dataset):
	def __init__(self, features, targets=None):
		self.features = torch.tensor(features, dtype=torch.float32)
		self.targets = torch.tensor(targets, dtype=torch.float32).view(-1, 1) if targets is not None else None

	def __len__(self):
		return len(self.features)

	def __getitem__(self, idx):
		if self.targets is not None:
			return self.features[idx], self.targets[idx]
		else:
			return self.features[idx]

In [9]:
train_dataset = DeliveryTimeDataset(X_train_final, y_train)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, num_workers=4)

In [10]:
# Inspecting a batch of data
features_batch, targets_batch = next(iter(train_loader))
print(f"Shape of a feature batch: {features_batch.shape}")
print(f"Shape of a target batch: {targets_batch.shape}")

Shape of a feature batch: torch.Size([64, 25])
Shape of a target batch: torch.Size([64, 1])


In [11]:
class DeliveryTimeModel(nn.Module):
	def __init__(self, input_dim):
		super(DeliveryTimeModel, self).__init__()

		self.network = nn.Sequential(
			nn.Linear(input_dim, 128),
			nn.ReLU(),
			nn.Dropout(0.3), # Dropout for regularization

			nn.Linear(128, 64),
			nn.ReLU(),
			nn.Dropout(0.3),

			nn.Linear(64, 32),
			nn.ReLU(),

			nn.Linear(32, 1) # 1 neuron for regression output
		)

	def forward(self, x):
		return self.network(x)

In [16]:
input_size = X_train_final.shape[1]
model = DeliveryTimeModel(input_dim=input_size).to(device)
model

DeliveryTimeModel(
  (network): Sequential(
    (0): Linear(in_features=25, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=64, out_features=32, bias=True)
    (7): ReLU()
    (8): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [17]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0005)

In [18]:
# Training loop
EPOCHS = 50
for epoch in range(EPOCHS):
	model.train()

	running_loss = 0.0
	for i, (features, targets) in enumerate(train_loader):
		features, targets = features.to(device), targets.to(device)

		predictions = model(features)
		loss = criterion(predictions, targets)

		optimizer.zero_grad() # Since we have loss already, we can clear gradients
		loss.backward() # Backpropagation
		optimizer.step() # Update weights

		running_loss += loss.item()

	epoch_loss = running_loss / len(train_loader)
	print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {epoch_loss:.4f}")

print("Finished training.")

Epoch [1/50], Loss: 90.7885
Epoch [2/50], Loss: 43.6608
Epoch [3/50], Loss: 40.4026
Epoch [4/50], Loss: 38.4261
Epoch [5/50], Loss: 36.4508
Epoch [6/50], Loss: 34.8755
Epoch [7/50], Loss: 34.0279
Epoch [8/50], Loss: 32.9314
Epoch [9/50], Loss: 32.3844
Epoch [10/50], Loss: 31.9744
Epoch [11/50], Loss: 31.4546
Epoch [12/50], Loss: 30.7494
Epoch [13/50], Loss: 30.4815
Epoch [14/50], Loss: 30.1585
Epoch [15/50], Loss: 29.5894
Epoch [16/50], Loss: 29.4186
Epoch [17/50], Loss: 28.9220
Epoch [18/50], Loss: 28.8814
Epoch [19/50], Loss: 28.5883
Epoch [20/50], Loss: 28.3768
Epoch [21/50], Loss: 28.3908
Epoch [22/50], Loss: 28.4344
Epoch [23/50], Loss: 27.9184
Epoch [24/50], Loss: 28.0651
Epoch [25/50], Loss: 27.6314
Epoch [26/50], Loss: 27.2361
Epoch [27/50], Loss: 27.5281
Epoch [28/50], Loss: 27.1668
Epoch [29/50], Loss: 26.9722
Epoch [30/50], Loss: 26.4862
Epoch [31/50], Loss: 26.0432
Epoch [32/50], Loss: 25.2987
Epoch [33/50], Loss: 24.9296
Epoch [34/50], Loss: 24.6173
Epoch [35/50], Loss: 24

In [19]:
# Testing
model.eval() # Set to evaluation mode
all_predictions = []
all_targets = []

with torch.no_grad():
	for features, targets in train_loader:
		features, targets = features.to(device), targets.to(device)
		predictions = model(features)

		all_predictions.extend(predictions.cpu().numpy())
		all_targets.extend(targets.cpu().numpy())

# Flatten the lists
all_predictions = np.concatenate(all_predictions, axis=0)
all_targets = np.concatenate(all_targets, axis=0)

rmse = np.sqrt(np.mean((all_predictions - all_targets) ** 2))
print(f"Training RMSE: {rmse:.4f} minutes")


Training RMSE: 4.2415 minutes


In [27]:
# Inspecting the RMSE against some basic statistics of the target variable
min_time = np.min(y_train)
max_time = np.max(y_train)
mode = pd.Series(y_train).mode()[0]
mean_time = np.mean(y_train)
median_time = np.median(y_train)
print(f"Min time: {min_time}, Max time: {max_time}, Mode: {mode}, Mean: {mean_time}, Median: {median_time}")

baseline_rmse = y_train.std()
print(f"Standard Deviation (Baseline RMSE): {baseline_rmse:.4f} minutes")
improvement = (baseline_rmse - rmse) / baseline_rmse * 100
print(f"Improvement over baseline: {improvement:.2f}%")

Min time: 10.0, Max time: 54.0, Mode: 26.0, Mean: 26.29460662821047, Median: 26.0
Standard Deviation (Baseline RMSE): 9.3837 minutes
Improvement over baseline: 54.80%


An improvement of around 40% over the baseline is quite significant, indicating that the model is effectively capturing patterns in the data to predict delivery times more accurately than a naive approach. This suggests that our feature engineering and model architecture choices are beneficial for this regression task.

Things learned:
- How to prepare data and a preprocessing pipeline using pandas, numpy and sklearn.
- How to load data with PyTorch DataLoader.
- How to build a feedforward neural network for regression using PyTorch.
- How to train and evaluate the model, including calculating RMSE and comparing it to a baseline
- How to evaluate time and location data for feature engineering.