In [None]:
import pandas as pd

df = pd.read_pickle('./train_cleaned.pkl')
df = df.drop(['ORIGIN_STAND'], axis=1)
print(df.head())

In [None]:
'''
Configure device for training with PyTorch
'''

import torch
import torch.nn as nn
import torchvision.transforms as transforms

device = ('mps' if torch.backends.mps.is_available() else 'cpu')
print(device)

In [None]:
# Load data for training and testing
X = df[['TAXI_ID', 'TIMESTAMP', 'DAY_TYPE']]
y = df[['travel_time']]
print(X.shape)

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

# split the data into train and test portions
X_train, X_test, y_train, y_test = train_test_split(np.array(X), np.array(y), train_size=0.7, shuffle=True)
X_train = torch.tensor((X_train), dtype=torch.float32, device=device)
y_train = torch.tensor((y_train), dtype=torch.float32, device=device)
X_test = torch.tensor((X_test), dtype=torch.float32, device=device)
y_test = torch.tensor((y_test), dtype=torch.float32, device=device)

print(X_train.shape)

In [None]:
model = nn.Sequential(
    nn.Linear(3, 84),
    nn.ReLU(),
    nn.Linear(84, 32),
    nn.ReLU(),
    nn.Linear(32, 20),
    nn.ReLU(),
    nn.Linear(20, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
)

model.to(device)
num_param = sum(p.numel() for p in model.parameters())
print("Number of parameters: " + str(num_param))

In [None]:
loss_fn = nn.MSELoss()
learning_rate = 1e-3
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

In [None]:
def test(X_test, y_test, model, criterion):
    test_loss = 0.0
    with torch.no_grad():
        for data, labels in zip(X_test, y_test):
            data, labels = data.to(device), labels.to(device)

            y_pred = model(data)
            
            loss = criterion(labels, y_pred)
            test_loss += loss
    return test_loss.cpu().detach().numpy() / len(X_test)

In [None]:
from tqdm import tqdm

# hyperparameters
num_epoch = 10

losses = []

for epoch in tqdm(range(num_epoch)):
    running_loss = 0.0

    for data, labels in zip(X_train, y_train):
        data, labels = data.to(device), labels.to(device)

        optimizer.zero_grad()

        y_pred = model(data)

        loss = loss_fn(labels, y_pred)
        loss.backward()
        optimizer.step()

        running_loss += loss.cpu().detach().numpy()

    losses.append(running_loss / len(X_train))

print("Finished Training")

print("Testing")
test_loss = test(X_test, y_test, model, loss_fn)
print(f'\n\ntest loss: {test_loss}')