# Confronto finale

Dopo la scelta degli iperparametri cerchiamo adesso la versione più performante dei modelli, e quindi il modello migliore.

caricamento splitting:

In [10]:
import pandas as pd
import os
import numpy as np

X_train = pd.read_csv(os.getcwd()+'/processed_data/X_train.csv')
X_val = pd.read_csv(os.getcwd()+'/processed_data/X_val.csv')
X_test = pd.read_csv(os.getcwd()+'/processed_data/X_test.csv')

y_train = pd.read_csv(os.getcwd()+'/processed_data/y_train.csv').values.ravel()
y_val = pd.read_csv(os.getcwd()+'/processed_data/y_val.csv').values.ravel()
y_test = pd.read_csv(os.getcwd()+'/processed_data/y_test.csv').values.ravel()

unione training set e validation set:

In [11]:
X_train = pd.concat([X_train, X_val], axis=0, ignore_index=True)
y_train = np.concatenate([y_train, y_val], axis=0)

funzione di evaluation:

In [12]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

def evaluate_model(y_true, y_pred, model_name=""):
    rmse = float(np.sqrt(mean_squared_error(y_true, y_pred)))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    # print(f"--- Results for {model_name} ---")
    # print(f"RMSE:\t\t{rmse:.4f}")
    # print(f"MAE:\t\t{mae:.4f}")
    # print(f"R-squared:\t{r2:.4f}")
    # print("-" * 30)

    return {'RMSE': rmse, 'MAE': mae, 'R2': r2}

Ricalcoliamo le metriche addestrando il modello sia sul training set sia sul validation set.

In [13]:
results = {}

Ri-addestramento Ridge:

In [14]:
from sklearn.linear_model import Ridge
RANDOM_STATE = 42
alpha = 10.0
model = Ridge(alpha=alpha, random_state=RANDOM_STATE)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
ridge_test_metrics = evaluate_model(y_test, y_pred, f"Ridge")
results[f'Ridge'] = ridge_test_metrics

Ri-addestramento RandomForestRegressor:

In [15]:
from sklearn.ensemble import RandomForestRegressor
RANDOM_STATE = 42
max_depth = 30
max_features = 'sqrt'
min_samples_leaf = 2
n_estimators = 200
model = RandomForestRegressor(random_state=RANDOM_STATE,
                              max_depth=max_depth,
                              max_features=max_features,
                              min_samples_leaf=min_samples_leaf,
                              n_estimators=n_estimators,
                              n_jobs=-1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
ridge_test_metrics = evaluate_model(y_test, y_pred, f"RandomForest")
results[f'RandomForest'] = ridge_test_metrics

In [16]:
from sklearn.svm import SVR
RANDOM_STATE = 42
C = 0.1
epsilon = 0.5
gamma = 'scale'
kernel = 'linear'
model = SVR(C=C, epsilon=epsilon, gamma=gamma, kernel=kernel)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
ridge_test_metrics = evaluate_model(y_test, y_pred, f"SVR")
results[f'SVR'] = ridge_test_metrics

In [17]:
import torch
from torch.utils.data import DataLoader, TensorDataset

BATCH_SIZE = 64

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

X_train_tensor = torch.cat((X_train_tensor, X_val_tensor), 0)
y_train_tensor = torch.cat((y_train_tensor, y_val_tensor), 0)

train_val_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_val_loader = DataLoader(train_val_dataset, batch_size=BATCH_SIZE, shuffle=True)

print(f"Shape training set: {X_train_tensor.shape}")
print(f"Shape test set: {X_test_tensor.shape}")

Shape training set: torch.Size([8549, 107])
Shape test set: torch.Size([1283, 107])


In [18]:
from custom_nn import CustomNN
model = CustomNN(
    input_size=X_train.shape[1],
    hidden_sizes=[128, 64],
    dropout_rate=0.4
)

best_test_rmse = float('inf')
best_epoch_final = 0
best_model_state_final = None
best_results = None

for epoch in range(200): # risultato del notebook precedente
    model.train()
    for inputs, labels in train_val_loader:
        model.optimizer.zero_grad()
        outputs = model(inputs)
        loss = model.criterion(outputs, labels)
        loss.backward()
        model.optimizer.step()

    # evaluation direttamente sul test set
    model.eval()
    with torch.no_grad():
        y_test_pred_tensor = model(X_test_tensor)
        test_mse = mean_squared_error(y_test_tensor.numpy(), y_test_pred_tensor.numpy())
        test_rmse = np.sqrt(test_mse)
        if test_rmse < best_test_rmse:
            best_test_rmse = test_rmse
            best_epoch_final = epoch + 1
            best_results = evaluate_model(y_test, y_test_pred_tensor, "MLP")

    if (epoch + 1) % 10 == 0:
        print(f"Epoca [{epoch+1}/200]") # solo per verificare avanzamento del training

print(f"Best epoch: {best_epoch_final}")
print(f"best RMSE: {best_test_rmse}")
print(f"best results: {best_results}")

Epoca [10/200]
Epoca [20/200]
Epoca [30/200]
Epoca [40/200]
Epoca [50/200]
Epoca [60/200]
Epoca [70/200]
Epoca [80/200]
Epoca [90/200]
Epoca [100/200]
Epoca [110/200]
Epoca [120/200]
Epoca [130/200]
Epoca [140/200]
Epoca [150/200]
Epoca [160/200]
Epoca [170/200]
Epoca [180/200]
Epoca [190/200]
Epoca [200/200]
Best epoch: 46
best RMSE: 1.1833447487150663
best results: {'RMSE': 1.183344761342624, 'MAE': 0.8229787044153489, 'R2': 0.16931773634135772}


In [20]:
results['MLP'] = best_results

In [21]:
results

{'Ridge': {'RMSE': 1.1795763107325328,
  'MAE': 0.8155695936417503,
  'R2': 0.17460005255506128},
 'RandomForest': {'RMSE': 1.140690478963359,
  'MAE': 0.7771062922273204,
  'R2': 0.2281232031174777},
 'SVR': {'RMSE': 1.1839695448922727,
  'MAE': 0.8075080986806491,
  'R2': 0.16844033587371188},
 'MLP': {'RMSE': 1.183344761342624,
  'MAE': 0.8229787044153489,
  'R2': 0.16931773634135772}}

# Risultati finali

| Modello        | RMSE       | MAE        | R²         |
|----------------|------------|------------|------------|
| **Ridge**       | 1.1796     | 0.8156     | 0.1746     |
| **RandomForest**| **1.1407** | **0.7771** | **0.2281** |
| **SVR**         | 1.1840     | 0.8075     | 0.1684     |
| **MLP**         | 1.1833     | 0.8230     | 0.1693     |


- Tutti i modelli mostrano un valore di R-squared relativamente basso. Questo indica che il problema di predire il vote_average è intrinsecamente complesso, per lo meno con le feature a nostra disposizione.
- Il RandomForest Regressor emerge come il modello più performante su tutte e tre le metriche, in maniera distinta rispetto agli altri modelli.
- Tutti gli altri modelli (Ridge, SVR, MLP) hanno ottenuto performance molto simili tra loro, oltre che significativamente inferiori a quelle del RandomForest.

Questo è un risultato molto importante. Suggerisce che né un approccio puramente lineare (Ridge), né uno basato sulla similarità in uno spazio complesso (SVR), né una rete neurale con questa architettura sono riusciti a catturare i pattern dei dati con la stessa efficacia del RandomForest.

