In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import time
import json
import random
import itertools
import os
import warnings
warnings.filterwarnings('ignore')
import copy

from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import train_test_split, GridSearchCV, HalvingGridSearchCV
from sklearn.metrics import accuracy_score, recall_score, precision_score, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer
from sklearn import decomposition
from sklearn.decomposition import PCA


import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.tensorboard import SummaryWriter

def fix_random(seed: int) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
fix_random(42)


In [2]:
torch.cuda.empty_cache()

In [3]:
df = pd.read_csv('dataset.csv')

# PyTorch Device
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

print('Using device:', device)

Using device: cuda


In [5]:
X = df.drop('rating', axis=1)
Y = df['rating']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.1, random_state=42)

X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
X_test = X_test.to_numpy()
Y_train = Y_train.to_numpy()
Y_val = Y_val.to_numpy()
Y_test = Y_test.to_numpy()

In [6]:
pca = PCA(n_components=0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_val = pca.transform(X_val)
X_test = pca.transform(X_test)

In [6]:
print(f'Number of training samples: {X_train.shape[0]}')
print(f'Number of validation samples: {X_val.shape[0]}')
print(f'Number of testing samples: {X_test.shape[0]}')
print(f'\nNumber of features: {X_train.shape[1]}')

Number of training samples: 9934
Number of validation samples: 1104
Number of testing samples: 2760

Number of features: 1148


In [12]:
print(f'Number of training samples: {X_train.shape[0]}')
print(f'Number of validation samples: {X_val.shape[0]}')
print(f'Number of testing samples: {X_test.shape[0]}')
print(f'\nNumber of features: {X_train.shape[1]}')

val_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(Y_val, dtype=torch.float32)), batch_size=Y_val.shape[0], shuffle=False)
test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(Y_test, dtype=torch.float32)), batch_size=Y_test.shape[0], shuffle=False)

Number of training samples: 9934
Number of validation samples: 1104
Number of testing samples: 2760

Number of features: 1148


In [13]:
def get_model(input_size, hidden_size,dropout_prob=0, depth=1):
    model = nn.Sequential(
        nn.Linear(input_size, hidden_size),
        nn.ReLU(),
        nn.Dropout(dropout_prob)
    )
    for i in range(depth):
        model.append(torch.nn.Linear(hidden_size, hidden_size))
        model.append(torch.nn.ReLU())
        model.append(torch.nn.Dropout(dropout_prob))

    model.append(torch.nn.Linear(hidden_size, 1))
    return torch.nn.Sequential(*model)


In [14]:
# Hyperparameters
hidden_sizes =  [256, 512, 1024]
nums_epochs = [200]
depth = [3, 4, 5]
batch= [8, 16, 32]
learning_rate = [0.01, 0.001]
step_size_lr_decay = [10, 20]
momentum = [0.9]
dropout_prob = 0.2
patience = 10

hyperparameters = itertools.product(hidden_sizes, depth, nums_epochs, batch, learning_rate, step_size_lr_decay, momentum)
n_comb = len(hidden_sizes)*len(depth)*len(nums_epochs)*len(batch)*len(learning_rate)*len(step_size_lr_decay)*len(momentum)
print (f'Number of hyperparameter combinations: {n_comb}')

Number of hyperparameter combinations: 108


In [15]:
#function to evaluate the model
def test_model(model,criterion,test_loader, device):
    model.eval()
    y_pred = torch.tensor([]).to(device)
    y_true = torch.tensor([]).to(device)
    test_loss = 0
    for X, Y in test_loader:
        X = X.to(device)
        Y = Y.to(device)
        Y_hat = model(X)
        loss = criterion(Y_hat, Y.unsqueeze(1))
        test_loss += loss.item()
        y_pred = torch.cat((y_pred, Y_hat.squeeze()))
        y_true = torch.cat((y_true, Y.detach()))
    test_loss /= len(test_loader)
    return test_loss, y_pred.detach().cpu().numpy(), y_true.detach().cpu().numpy()


In [16]:
#function to train the model
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, writer, device, patience, num_epochs):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1000000
    best_epoch = 0
    early_stop_counter = 0
    start = time.time()

    for epoch in range(num_epochs):
        model.train()
        start_epoch = time.time()
        train_loss = 0
        for X, Y in train_loader:
            X = X.to(device)
            Y = Y.to(device)
            optimizer.zero_grad()
            Y_hat = model(X)
            loss = criterion(Y_hat.squeeze(), Y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)
        writer.add_scalar('Loss/train', train_loss, epoch)
        model.eval()
        val_loss,y_pred,y_true = test_model(model,criterion,val_loader,device)
        writer.add_scalar('Loss/val', val_loss, epoch)
        scheduler.step(val_loss)
        if val_loss < best_loss:
            best_loss = val_loss
            best_epoch = epoch
            best_model_wts = copy.deepcopy(model.state_dict())
            early_stop_counter = 0
        else:
            early_stop_counter += 1
        if early_stop_counter >= patience:
            print(f'Early stopping at epoch {epoch}')
            break
        
        print('Epoch [{}/{}] - {:.2f} seconds - val_loss: {:.6f} - patience: {}'.format(epoch+1,
              num_epochs, time.time() - start_epoch, val_loss, early_stop_counter), end='\r')

    print('\nTraining ended after {:.2f} seconds - Best val_loss: {:.6f}'.format(time.time() - start, best_loss))

    model.load_state_dict(best_model_wts)
    return model, best_epoch, best_loss




In [17]:
#grid search loop
best_mse = float('inf')
current_iter = 0
for i, (hidden_size, depth, num_epochs, batch, lr, step_size, momentum) in enumerate(hyperparameters):
    current_iter += 1
    print(f'\nHyperparameter combination {i+1}/{n_comb}')
    print(f'current_iter: {current_iter},hidden_size: {hidden_size}, depth: {depth}, num_epochs: {num_epochs}, batch_size: {batch}, lr: {lr}, step_size: {step_size}, momentum: {momentum}')
    writer = SummaryWriter(f'run/NoPCA/Dnn/hidden_size={hidden_size}, depth={depth}, num_epochs={num_epochs}, batch_size={batch}, lr={lr}, step_size={step_size}, momentum={momentum}')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(Y_train, dtype=torch.float32)), batch_size=batch, shuffle=True)
    model = get_model(X_train.shape[1], hidden_size, dropout_prob, depth).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr,momentum=momentum)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=step_size,gamma=0.1)
    model, best_epoch, best_loss = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, writer, device, patience, num_epochs)
   
    
    print(f'Best epoch: {best_epoch+1} - Best val_loss: {best_loss}')
    print(f'Testing model...')
    test_loss,_,_ = test_model(model,criterion,test_loader, device)
    writer.add_hparams({'hidden_size': hidden_size, 'depth': depth, 'batch': batch,'lr': lr, 'step_size': step_size, 'momentum': momentum}, {'hparam/mse': test_loss})
    if test_loss < best_mse:
        best_mse = test_loss
        best_model = model
        
        history_loss = best_epoch
        history_val_loss = best_loss

        torch.save(model.state_dict(), 'best_model_nopca.pth')
        # save config
        with open('best_model_config_nopca.json', 'w') as f:
            json.dump({'hidden_size': hidden_size, 'depth': depth, 'num_epochs': num_epochs, 'batch': batch,
                       'lr': lr, 'step_size': step_size}, f)   

    writer.flush()
    print(f'Model MSE: {test_loss}, Best MSE: {best_mse}')
    print(f'Hyperparameter combination {i+1}/{n_comb} finished\n')


Hyperparameter combination 1/108
current_iter: 1,hidden_size: 256, depth: 3, num_epochs: 200, batch_size: 8, lr: 0.01, step_size: 10, momentum: 0.9
Early stopping at epoch 31nds - val_loss: 0.005250 - patience: 9

Training ended after 117.95 seconds - Best val_loss: 0.004856
Best epoch: 22 - Best val_loss: 0.004855860490351915
Testing model...
Model MSE: 0.005006830673664808, Best MSE: 0.005006830673664808
Hyperparameter combination 1/108 finished


Hyperparameter combination 2/108
current_iter: 2,hidden_size: 256, depth: 3, num_epochs: 200, batch_size: 8, lr: 0.01, step_size: 20, momentum: 0.9
Early stopping at epoch 31nds - val_loss: 0.005327 - patience: 9

Training ended after 113.17 seconds - Best val_loss: 0.004940
Best epoch: 22 - Best val_loss: 0.004940300714224577
Testing model...
Model MSE: 0.005045547615736723, Best MSE: 0.005006830673664808
Hyperparameter combination 2/108 finished


Hyperparameter combination 3/108
current_iter: 3,hidden_size: 256, depth: 3, num_epochs: 20

In [34]:
#best configuration with PCA
criterion =  torch.nn.MSELoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
with open('best_model_config.json', 'r') as f:
        best_model_config = json.load(f)

best_model = get_model(X_train.shape[1], best_model_config['hidden_size'], dropout_prob, best_model_config['depth'])
best_model.load_state_dict(torch.load('best_model.pth'))
best_model.to(device)
best_model.eval()

# evaluate best model
best_mse,y_pred,y_true= test_model(best_model, criterion, test_loader,device)   
print("Best model - MSE: {:.6f}".format(best_mse))
r2 = r2_score(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
print("R2: {:.6f} - MSE: {:.6f}".format(r2, mse))


Best model - MSE: 0.004884
R2: 0.978349 - MSE: 0.004884


In [18]:
#best configuration without PCA
criterion =  torch.nn.MSELoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
with open('best_model_config_nopca.json', 'r') as f:
        best_model_config = json.load(f)

best_model = get_model(X_train.shape[1], best_model_config['hidden_size'], dropout_prob, best_model_config['depth'])
best_model.load_state_dict(torch.load('best_model_nopca.pth'))
best_model.to(device)
best_model.eval()

# evaluate best model
best_mse,y_pred,y_true= test_model(best_model, criterion, test_loader,device)   
print("Best model No PCA - MSE: {:.6f}".format(best_mse))
r2 = r2_score(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
print("R2: {:.6f} - MSE: {:.6f}".format(r2, mse))

Best model No PCA - MSE: 0.004440
R2: 0.980314 - MSE: 0.004440
