In [1]:
import torch
import time
import os
import copy
import json
import random
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from pytorch_tabnet.metrics import Metric
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.metrics import r2_score, mean_squared_error

def fix_random(seed: int) -> None:
    """Fix all the possible sources of randomness.

    Args:
        seed: the seed to use. 
    """
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True  # slower

fix_random(42)

In [2]:
df = pd.read_csv('dataset.csv')

# PyTorch Device
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

print('Using device:', device)

Using device: mps


In [3]:
X = df.drop('rating', axis=1)
Y = df['rating']

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.1, random_state=42)

X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
X_test = X_test.to_numpy()

Y_train = Y_train.to_numpy()
Y_val = Y_val.to_numpy()
Y_test = Y_test.to_numpy()

Y_train = Y_train.reshape(-1, 1)
Y_val = Y_val.reshape(-1, 1)

# PCA
pca = PCA(n_components=0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_val = pca.transform(X_val)
X_test = pca.transform(X_test)

print(f'Number of training samples: {X_train.shape[0]}')
print(f'Number of validation samples: {X_val.shape[0]}')
print(f'Number of testing samples: {X_test.shape[0]}')
print(f'\nNumber of features: {X_train.shape[1]}')

Number of training samples: 9946
Number of validation samples: 1106
Number of testing samples: 2764

Number of features: 552


In [4]:
# hyperparameters
nums_epochs = [200]
batch_sizes = [32, 64, 128]
patience = [10]
n_d_a = [64, 128, 256]
n_shared = [2]
n_indipendent = [2]
n_step = [3,4,5,6,7,8,9,10]
gamma = [1.3]
epsilon = [1e-8]

hyperparameters = itertools.product(n_d_a, n_step,n_indipendent,n_shared, gamma, epsilon,nums_epochs, batch_sizes)
n_comb = len(n_d_a)*len(n_step)*len(n_indipendent)*len(n_shared)*len(gamma)*len(epsilon)*len(nums_epochs)*len(batch_sizes)
print (f'Number of hyperparameter combinations: {n_comb}')

Number of hyperparameter combinations: 72


In [5]:
def get_model(n_d_a, n_step,n_indipendent,n_shared, gamma, epsilon):
    model = TabNetRegressor(
        # n_d: the dimensionality of the output space of the feature transformer network (default 64)
        n_d=n_d_a,
        # n_a: the dimensionality of the output space of the attention network (default 64)
        n_a=n_d_a,
        # n_steps: the number of sequential steps in the attention mechanism (default 3)
        n_steps=n_step,
        # gamma: the scaling factor for the feature transformer network (default 1.3)
        gamma=gamma,
        # optimizerm name of optimizer to use (default Adam)
        optimizer_fn=torch.optim.Adam,
        # n_independent: the number of independent feature transformer networks to use (default 2)
        n_independent=n_indipendent,
        # n_shared: the number of shared feature transformer networks to use (default 2)
        n_shared=n_shared,
        # epsilon: a small value to add to the denominator of the feature importance calculation to avoid division by zero (default 1e-15)
        epsilon=epsilon,
        # seed: the random seed to use for reproducibility (default None)
        seed=42,  
        verbose=1
    )
    return model

In [6]:
current_iter = 0

best_model = None
best_mse = float('inf')
criterion = torch.nn.MSELoss()

for n_d_a, n_step,n_indipendent,n_shared, gamma, epsilon,nums_epochs, batch_sizes in hyperparameters:
    current_iter += 1

    print("\nIterations {}/{} - Hyperparameters:  batch_sizes={}, nums_epochs={}, n_d = {}, n_a={}, n_step={}, n_indipendent={}, n_shared={}, gamma={}, epsilon={}".format(
        current_iter, n_comb, batch_sizes, nums_epochs, n_d_a, n_d_a, n_step, n_indipendent, n_shared, gamma, epsilon ))

    model = get_model(n_d_a, n_step, n_indipendent, n_shared, gamma, epsilon)

    # train
    model.fit(
                X_train=X_train,
                y_train=Y_train,
                eval_set=[(X_val, Y_val)],
                eval_metric=['mse'],
                # patience: the number of epochs to wait without improvement in validation loss before early stopping (default 10)
                patience=10,
                # batch_size: the number of samples per batch (default 1024)
                batch_size=batch_sizes,
                # virtual_batch_size: the number of samples per virtual batch (default 128)
                virtual_batch_size=128,
                # num_workers: the number of worker processes to use for data loading (default 0)
                num_workers=0,
                # drop_last: whether to drop the last incomplete batch if the dataset size is not divisible by the batch size (default False)
                drop_last=False,
                # max_epochs: the maximum number of epochs to train for (default 100)
                max_epochs=nums_epochs,
            )

    # Predict
    preds = model.predict(X_test)

    # Calculate the MSE
    mse = mean_squared_error(Y_test, preds)
    
    if mse < best_mse:
        best_mse = mse
        best_model = model
        # # save model
        # torch.save(best_model, 'best_tab_model.pt')
        # # save config
        # with open('best_tab_model_config.json', 'w') as f:
        #     json.dump({'n_d':n_d, 'n_a':n_a, 'n_step':n_step, 'n_indipendent':n_indipendent, 'n_shared':n_shared, 'gamma':gamma, 'epsilon':epsilon, 'batch_sizes':batch_sizes, 'nums_epochs':nums_epochs }, f)
            
            
    print("Model MSE: {:.6f} - Best MSE: {:.6f}".format(mse, best_mse))
    print("Model R2 Score: {:.6f} - Best R2 Score: {:.6f}".format(r2_score(Y_test, preds), r2_score(Y_test, best_model.predict(X_test))))



Iterations 1/72 - Hyperparameters:  batch_sizes=32, nums_epochs=200, n_d = 64, n_a=64, n_step=3, n_indipendent=2, n_shared=2, gamma=1.3, epsilon=1e-08




epoch 0  | loss: 0.76327 | val_0_mse: 0.37864 |  0:00:13s
epoch 1  | loss: 0.2808  | val_0_mse: 0.23665 |  0:00:27s
epoch 2  | loss: 0.20526 | val_0_mse: 0.10049 |  0:00:42s
epoch 3  | loss: 0.10957 | val_0_mse: 0.05906 |  0:00:55s
epoch 4  | loss: 0.07281 | val_0_mse: 0.06235 |  0:01:09s
epoch 5  | loss: 0.04873 | val_0_mse: 0.01915 |  0:01:22s
epoch 6  | loss: 0.03963 | val_0_mse: 0.01618 |  0:01:35s
epoch 7  | loss: 0.03608 | val_0_mse: 0.01176 |  0:01:48s
epoch 8  | loss: 0.03231 | val_0_mse: 0.01518 |  0:02:01s
epoch 9  | loss: 0.03567 | val_0_mse: 0.01816 |  0:02:17s
epoch 10 | loss: 0.03959 | val_0_mse: 0.01679 |  0:02:30s
epoch 11 | loss: 0.03309 | val_0_mse: 0.0165  |  0:02:44s
epoch 12 | loss: 0.0302  | val_0_mse: 0.01721 |  0:02:58s
epoch 13 | loss: 0.02811 | val_0_mse: 0.01465 |  0:03:11s
epoch 14 | loss: 0.03189 | val_0_mse: 0.05378 |  0:03:25s
epoch 15 | loss: 0.02905 | val_0_mse: 0.0543  |  0:03:38s
epoch 16 | loss: 0.02978 | val_0_mse: 0.0331  |  0:03:52s
epoch 17 | los



Model MSE: 0.012397 - Best MSE: 0.012397
Model R2 Score: 0.944071 - Best R2 Score: 0.944071

Iterations 2/72 - Hyperparameters:  batch_sizes=64, nums_epochs=200, n_d = 64, n_a=64, n_step=3, n_indipendent=2, n_shared=2, gamma=1.3, epsilon=1e-08




epoch 0  | loss: 0.91322 | val_0_mse: 0.44907 |  0:00:09s
epoch 1  | loss: 0.17522 | val_0_mse: 0.20618 |  0:00:18s
epoch 2  | loss: 0.14037 | val_0_mse: 0.10915 |  0:00:28s
epoch 3  | loss: 0.12945 | val_0_mse: 0.11945 |  0:00:38s
epoch 4  | loss: 0.11363 | val_0_mse: 0.1006  |  0:00:47s
epoch 5  | loss: 0.09397 | val_0_mse: 0.06025 |  0:00:56s
epoch 6  | loss: 0.06891 | val_0_mse: 0.04598 |  0:01:06s
epoch 7  | loss: 0.06752 | val_0_mse: 0.08414 |  0:01:16s
epoch 8  | loss: 0.03783 | val_0_mse: 0.02421 |  0:01:25s
epoch 9  | loss: 0.02932 | val_0_mse: 0.01594 |  0:01:35s
epoch 10 | loss: 0.03177 | val_0_mse: 0.0177  |  0:01:44s
epoch 11 | loss: 0.0226  | val_0_mse: 0.02537 |  0:01:54s
epoch 12 | loss: 0.02003 | val_0_mse: 0.02292 |  0:02:03s
epoch 13 | loss: 0.01869 | val_0_mse: 0.01581 |  0:02:13s
epoch 14 | loss: 0.02052 | val_0_mse: 0.01207 |  0:02:22s
epoch 15 | loss: 0.02142 | val_0_mse: 0.01572 |  0:02:32s
epoch 16 | loss: 0.02508 | val_0_mse: 0.00878 |  0:02:43s
epoch 17 | los



Model MSE: 0.008318 - Best MSE: 0.008318
Model R2 Score: 0.962473 - Best R2 Score: 0.962473

Iterations 3/72 - Hyperparameters:  batch_sizes=128, nums_epochs=200, n_d = 64, n_a=64, n_step=3, n_indipendent=2, n_shared=2, gamma=1.3, epsilon=1e-08




epoch 0  | loss: 1.65315 | val_0_mse: 1.25198 |  0:00:05s
epoch 1  | loss: 0.30322 | val_0_mse: 0.36639 |  0:00:11s
epoch 2  | loss: 0.2582  | val_0_mse: 0.3643  |  0:00:16s
epoch 3  | loss: 0.24437 | val_0_mse: 0.15897 |  0:00:22s
epoch 4  | loss: 0.14972 | val_0_mse: 0.10124 |  0:00:27s
epoch 5  | loss: 0.10065 | val_0_mse: 0.11218 |  0:00:33s
epoch 6  | loss: 0.07446 | val_0_mse: 0.06795 |  0:00:38s
epoch 7  | loss: 0.05395 | val_0_mse: 0.03607 |  0:00:44s
epoch 8  | loss: 0.04008 | val_0_mse: 0.04405 |  0:00:50s
epoch 9  | loss: 0.03634 | val_0_mse: 0.02483 |  0:00:56s
epoch 10 | loss: 0.02516 | val_0_mse: 0.02107 |  0:01:01s
epoch 11 | loss: 0.02424 | val_0_mse: 0.01704 |  0:01:09s
epoch 12 | loss: 0.02067 | val_0_mse: 0.01158 |  0:01:15s
epoch 13 | loss: 0.01928 | val_0_mse: 0.02224 |  0:01:20s
epoch 14 | loss: 0.01694 | val_0_mse: 0.01275 |  0:01:26s
epoch 15 | loss: 0.01658 | val_0_mse: 0.02593 |  0:01:31s
epoch 16 | loss: 0.01725 | val_0_mse: 0.02399 |  0:01:37s
epoch 17 | los



Model MSE: 0.007774 - Best MSE: 0.007774
Model R2 Score: 0.964929 - Best R2 Score: 0.964929

Iterations 4/72 - Hyperparameters:  batch_sizes=32, nums_epochs=200, n_d = 64, n_a=64, n_step=4, n_indipendent=2, n_shared=2, gamma=1.3, epsilon=1e-08




epoch 0  | loss: 0.85539 | val_0_mse: 0.3611  |  0:00:18s
epoch 1  | loss: 0.26385 | val_0_mse: 0.21781 |  0:00:39s


In [None]:
# Predict
preds = model.predict(X_test)

# Calculate the RMSE
rmse = mean_squared_error(Y_test, preds)

# Evaluate the model
print(f'R2 Score: {r2_score(Y_test, preds)}')
print(f'RMSE: {rmse}')

In [None]:
# Plot the results with sns
sns.set_style('darkgrid')
sns.set_context('talk')
sns.set_palette('colorblind')

plt.figure(figsize=(10, 6))
plt.scatter(Y_test, preds, s=1)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('True Values vs Predictions')
plt.show()