In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Settings
pd.set_option('display.max_columns', None)
sns.set_style('whitegrid')
%matplotlib inline

### Packages
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
data = pd.read_csv('../data/modeling-data (1).csv')
data = data.drop('Festival', axis = 1)
data['City'] = data['City'].apply(lambda value: 'Urban' if value == 'Semi-Urban' else value)

import scipy.stats as stats
ContinuousFeatures = data.drop(['Vehicle_condition', 'multiple_deliveries',
                               'OrderTime', 'Delivery_person_Age', 'Time_taken(min)'], axis = 1).select_dtypes(exclude = 'O').columns

data.drop('Time_taken(min)', axis = 1).columns
data = pd.get_dummies(data, drop_first = True)


In [2]:
# Reseting Index

data.reset_index(drop = True, inplace = True)
X = data.drop('Time_taken(min)', axis = 1)
y = data['Time_taken(min)']
from sklearn.metrics import mean_absolute_error, mean_squared_error
def GetMetrics(y, predictions):
    return {'MSE' : mean_squared_error(y, predictions),
            'RMSE' : np.sqrt(mean_squared_error(y, predictions)),
            'MAE': mean_absolute_error(y, predictions)}


from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.10, random_state = 42)

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X_train_pre = scaler.fit_transform(X_train)
X_test_pre = scaler.transform(X_test)
y_test, y_train = y_test.to_numpy(), y_train.to_numpy()

In [3]:
# Define Neural Network with adjustments
class Regression_nn_model(nn.Module):
    def __init__(self, num_feature):
        super().__init__()

        self.layer1 = nn.Linear(num_feature, 27)  # Increased number of neurons
        self.dropout1 = nn.Dropout(p=0.3)  # Adjusted dropout rate
        self.layer2 = nn.ReLU()
        self.layer3 = nn.Linear(27, 16)  # Adjusted number of neurons
        self.dropout2 = nn.Dropout(p=0.3)  # Adjusted dropout rate
        self.layer4 = nn.ReLU()
        self.layer5 = nn.Linear(16, 8)  # Adjusted number of neurons
        self.layer6 = nn.ReLU()
        self.layer7 = nn.Linear(8, 1)

    def forward(self, x):
        output = self.layer1(x)
        output = self.dropout1(output)
        output = self.layer2(output)
        output = self.layer3(output)
        output = self.dropout2(output)
        output = self.layer4(output)
        output = self.layer5(output)
        output = self.layer6(output)
        output = self.layer7(output)
        return output




In [4]:
import torch
from torch.utils.data import Dataset

class myDataset(Dataset):
    def __init__(self, X, y):
        self.features = torch.tensor(X, dtype=torch.float32)
        self.targets = torch.tensor(y, dtype=torch.float32)
        print(f"Features shape: {self.features.shape}")
        print(f"Targets shape: {self.targets.shape}")
        
    def __len__(self):
        return len(self.targets)
    
    def __getitem__(self, index):
        return self.features[index], self.targets[index]

# Assuming you have X_train_pre and y_train as your data
train_ds = myDataset(X_train_pre, y_train)


: 

: 

In [None]:

type(X_test_pre), type(y_test)

In [None]:
X_train_pre.shape, y_train.shape

In [None]:
X_train_pre[4320]

In [None]:
# Initialize model, optimizer with weight decay
batch_size = 32
# Create DataLoader instances
train_loader = DataLoader(train_ds, batch_size= batch_size, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
torch.manual_seed(1)
model = Regression_nn_model(num_feature=27)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)  # Adjusted learning rate and added weight decay

num_epochs = 20

loss_list = []
train_acc_list = []
epoch_loss_list = []

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (features, targets) in enumerate(train_loader):
        logits = model(features)
        loss = F.mse_loss(logits, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
    avg_loss = running_loss / len(train_loader)
    epoch_loss_list.append(avg_loss)
    print(f"Epoch: {epoch+1:03d}/{num_epochs:03d} | Average Train Loss: {avg_loss:.2f}")



In [None]:
# Evaluate on Validation and Test Sets

def evaluate_model(model, loader, num_epochs):
    test_avg_loss = []
    for epoch in range(num_epochs):
        model.eval()
        total_loss = 0.0
        
        with torch.no_grad():
            for features, targets in loader:
                logits = model(features)
                loss = F.mse_loss(logits, targets, reduction='sum')
                total_loss += loss.item()
        avg_loss = total_loss / len(loader.dataset)
        test_avg_loss.append(avg_loss)
    return test_avg_loss


test_avg_loss = evaluate_model(model, test_loader, num_epochs)
plt.plot(range(1, num_epochs + 1), epoch_loss_list, label='Training Loss')
plt.plot(range(1, num_epochs + 1), test_avg_loss, label='Test Loss', linestyle='--')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss over Epochs')
plt.legend()
plt.show()