In [1]:
import numpy as np
import pandas as pd
import os
import sys
import re
import random
import torch
from typing import List, Dict, Optional
import matplotlib.pyplot as plt
from tqdm import tqdm
import optuna
from optuna.pruners import MedianPruner
from optuna.exceptions import TrialPruned
import warnings

In [11]:
# for tuning
CONFIG = {
    'num_partitions': 3, # max 34
    'tuning_epoch': 12,
    'checkpoint_interval': 5,
    'eval_interval': 3,
    'num_tuning_trials': 30,
    'seed': 42 # or None
}

In [12]:
sys.path.append('..') 

from src.models.autoencoder import AutoEncoder
from src.models.autoencoder_trainer import *
from src.data.data_utils import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [13]:
data_dir = '../NETFLIX_DATA/partitions/train'
val_dir = '../NETFLIX_DATA/partitions/validation'
checkpoint_dir = '../model_checkpoints'

In [5]:
# retreive training data info
train_partition_files = get_data(data_dir)
print(f"Number of training partitions: {len(train_partition_files)}")
val_partition_files = get_data(val_dir)
print(f"Number of validation partitions: {len(val_partition_files)}")

if CONFIG['seed'] is not None:
    random.seed(CONFIG['seed'])

# testing
sample_train_partitions = random.sample(train_partition_files, CONFIG['num_partitions'])

sample_val_partitions = []
for partition in sample_train_partitions:
  val_partition = partition.copy()
  val_partition['path'] = partition['path'].replace('train', 'validation')
  sample_val_partitions.append(val_partition)


print(f"Train EX: {sample_train_partitions[0]}")
print(f"Val EX: {sample_val_partitions[0]}")

Number of training partitions: 34
Number of validation partitions: 34
Train EX: {'path': '../NETFLIX_DATA/partitions/train/part_1_7.parquet', 'part': 1, 'group': 7}
Val EX: {'path': '../NETFLIX_DATA/partitions/validation/part_1_7.parquet', 'part': 1, 'group': 7}


In [6]:
# build user_map and movie_map
user_map, movie_map = map_id(sample_train_partitions)

Mapping IDs: 100%|██████████| 3/3 [00:00<00:00,  3.61it/s]

Map successful for 406244 users, 1653 movies





In [7]:
# preload user rating profiles

train_user_data = AutoEncoder.load_user_data(partitions=sample_train_partitions, 
                                             user_map=user_map)

validation_user_data = AutoEncoder.load_validation_data(partitions=sample_val_partitions, 
                                                        user_map=user_map,
                                                        movie_map=movie_map)

Loading users' rating profile 1/3: 100%|██████████| 1828517/1828517 [01:02<00:00, 29238.99it/s]
Loading users' rating profile 2/3: 100%|██████████| 2772664/2772664 [01:32<00:00, 29836.29it/s]
Loading users' rating profile 3/3: 100%|██████████| 2776385/2776385 [01:33<00:00, 29643.55it/s]
Loading validation pairs 1/3: 100%|██████████| 176307/176307 [00:05<00:00, 31333.89it/s]
Loading validation pairs 2/3: 100%|██████████| 276488/276488 [00:08<00:00, 31798.67it/s]
Loading validation pairs 3/3: 100%|██████████| 338140/338140 [00:10<00:00, 31777.64it/s]


In [8]:
# optuna objective
def objective(trial):
    params = {
        "num_epochs": CONFIG['tuning_epoch'],
        "batch_size": 512,
        "learning_rate": trial.suggest_float("learning_rate", 0.0001, 0.001, log=True),
        "hidden_dims": trial.suggest_categorical("hidden_dims", 
                                                 [[1024,256,128], [512,256,128], [256,128], [512,128]]),
        "dropout": trial.suggest_float("dropout", 0.3, 0.7),
        "l2_reg": trial.suggest_float("l2_reg", 0.00001, 0.01, log=True),
        "checkpoint_interval": CONFIG['checkpoint_interval'],
        "eval_interval": CONFIG['eval_interval'],
    }

                #[[512,256,128], [256,128], [512,128]]

    try:
        print(params)
        
        model, rmse = train_autoencoder(
            train_partitions=sample_train_partitions,
            user_map=user_map,
            movie_map=movie_map,
            validation_partitions=sample_val_partitions,
            checkpoint_dir=checkpoint_dir,
            trial=trial,
            user_data=train_user_data,
            validation_data=validation_user_data,
            **params
        )

        return float(rmse)
    except optuna.TrialPruned:
        raise # reraise prune error for tuning

In [None]:
# tuning
warnings.filterwarnings("ignore", module="optuna.*")

study = optuna.create_study(
    study_name=f"autoencoder_tuning_{CONFIG['num_partitions']}_samples", 
    direction='minimize',
    
    # prune after 2 trials, after 1 if really bad
    pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=6, interval_steps=2),
    
    sampler=optuna.samplers.TPESampler(),
    storage=f"sqlite:///optuna_study_{CONFIG['num_partitions']}_samples.db",
    load_if_exists=True
)

study.optimize(objective, n_trials=CONFIG['num_tuning_trials'], timeout=6*3600)

[I 2025-06-13 13:00:24,005] Using an existing study with name 'autoencoder_tuning_3_samples' instead of creating a new one.


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.0002172479460451583, 'hidden_dims': [1024, 256, 128], 'dropout': 0.37158451497450784, 'l2_reg': 2.7264522069379994e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  7.84it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:19<00:00,  9.96it/s, loss=1.0174] 


Epoch 1 - Average Loss: 1.6307


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.13it/s, loss=0.9561]


Epoch 2 - Average Loss: 1.0146


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.10it/s, loss=0.9032]


Epoch 3 - Average Loss: 0.9628


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 43.47it/s]


Validation | Loss: 1.1502, RMSE: 1.0725


Epoch 4/12: 100%|██████████| 794/794 [01:17<00:00, 10.22it/s, loss=0.9263]


Epoch 4 - Average Loss: 0.9131


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.9360]


Epoch 5 - Average Loss: 0.8887
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.9089]


Epoch 6 - Average Loss: 0.8698


Evaluating: 100%|██████████| 979/979 [00:27<00:00, 36.01it/s]


Validation | Loss: 0.9961, RMSE: 0.9981


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=0.8378]


Epoch 7 - Average Loss: 0.8574


Epoch 8/12: 100%|██████████| 794/794 [01:17<00:00, 10.20it/s, loss=0.8036]


Epoch 8 - Average Loss: 0.8445


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.09it/s, loss=0.8981]


Epoch 9 - Average Loss: 0.8348


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 42.65it/s]


Validation | Loss: 0.9527, RMSE: 0.9760


Epoch 10/12: 100%|██████████| 794/794 [01:18<00:00, 10.18it/s, loss=0.8478]


Epoch 10 - Average Loss: 0.8240
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:18<00:00, 10.17it/s, loss=0.8474]


Epoch 11 - Average Loss: 0.8165


Epoch 12/12: 100%|██████████| 794/794 [01:17<00:00, 10.24it/s, loss=0.7537]


Epoch 12 - Average Loss: 0.8115


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 43.70it/s]


Validation | Loss: 0.9691, RMSE: 0.9844


[I 2025-06-13 13:17:49,797] Trial 22 finished with value: 0.9844337391747607 and parameters: {'learning_rate': 0.0002172479460451583, 'hidden_dims': [1024, 256, 128], 'dropout': 0.37158451497450784, 'l2_reg': 2.7264522069379994e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.0001112826969543508, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4810501964465725, 'l2_reg': 0.00025959626016086655, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.05it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:18<00:00, 10.14it/s, loss=1.4381]


Epoch 1 - Average Loss: 2.3389


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.12it/s, loss=1.3216]


Epoch 2 - Average Loss: 1.3172


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.11it/s, loss=1.1410]


Epoch 3 - Average Loss: 1.2269


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 44.06it/s]


Validation | Loss: 1.1399, RMSE: 1.0677


Epoch 4/12: 100%|██████████| 794/794 [01:18<00:00, 10.13it/s, loss=1.0441]


Epoch 4 - Average Loss: 1.1628


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=1.0338]


Epoch 5 - Average Loss: 1.1125
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.9697]


Epoch 6 - Average Loss: 1.0746


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 44.87it/s]


Validation | Loss: 1.0211, RMSE: 1.0105


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=1.0650]


Epoch 7 - Average Loss: 1.0411


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.13it/s, loss=1.1647]


Epoch 8 - Average Loss: 1.0114


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.15it/s, loss=1.0037]


Epoch 9 - Average Loss: 0.9906


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.22it/s]
[I 2025-06-13 13:30:46,370] Trial 23 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.000153554295478224, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4773823017505224, 'l2_reg': 4.5618504500867436e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.31it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=1.2055]


Epoch 1 - Average Loss: 2.0131


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.0757]


Epoch 2 - Average Loss: 1.0940


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=1.0011]


Epoch 3 - Average Loss: 1.0424


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 44.94it/s]


Validation | Loss: 1.0665, RMSE: 1.0327


Epoch 4/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.9228]


Epoch 4 - Average Loss: 1.0006


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=1.0145]


Epoch 5 - Average Loss: 0.9694
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.8879]


Epoch 6 - Average Loss: 0.9376


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.45it/s]


Validation | Loss: 0.9709, RMSE: 0.9853


Epoch 7/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.8824]


Epoch 7 - Average Loss: 0.9154


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.9583]


Epoch 8 - Average Loss: 0.8999


Epoch 9/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.8768]


Epoch 9 - Average Loss: 0.8888


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.38it/s]


Validation | Loss: 0.9641, RMSE: 0.9819


Epoch 10/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.8833]


Epoch 10 - Average Loss: 0.8804
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.8850]


Epoch 11 - Average Loss: 0.8724


Epoch 12/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.8744]


Epoch 12 - Average Loss: 0.8671


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.43it/s]


Validation | Loss: 0.9583, RMSE: 0.9789


[I 2025-06-13 13:48:09,761] Trial 24 finished with value: 0.9789435394933435 and parameters: {'learning_rate': 0.000153554295478224, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4773823017505224, 'l2_reg': 4.5618504500867436e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00018071632678472342, 'hidden_dims': [1024, 256, 128], 'dropout': 0.44409505556805356, 'l2_reg': 2.3433020923545744e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.04it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:19<00:00, 10.05it/s, loss=1.0407]


Epoch 1 - Average Loss: 1.7433


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.1686]


Epoch 2 - Average Loss: 1.0397


Epoch 3/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=1.0300]


Epoch 3 - Average Loss: 0.9893


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.34it/s]


Validation | Loss: 1.0762, RMSE: 1.0374


Epoch 4/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.9725]


Epoch 4 - Average Loss: 0.9450


Epoch 5/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.9030]


Epoch 5 - Average Loss: 0.9097
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.9905]


Epoch 6 - Average Loss: 0.8874


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.23it/s]


Validation | Loss: 0.9753, RMSE: 0.9876


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.12it/s, loss=0.8296]


Epoch 7 - Average Loss: 0.8733


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.05it/s, loss=0.8553]


Epoch 8 - Average Loss: 0.8623


Epoch 9/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.9238]


Epoch 9 - Average Loss: 0.8551


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.07it/s]
[I 2025-06-13 14:01:10,147] Trial 25 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.0002996329410391738, 'hidden_dims': [1024, 256, 128], 'dropout': 0.5553490166682296, 'l2_reg': 6.126614199359755e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  9.38it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:18<00:00, 10.12it/s, loss=1.2339]


Epoch 1 - Average Loss: 1.6887


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.0705]


Epoch 2 - Average Loss: 1.0928


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.9830]


Epoch 3 - Average Loss: 1.0353


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.95it/s]


Validation | Loss: 1.0154, RMSE: 1.0076


Epoch 4/12: 100%|██████████| 794/794 [01:19<00:00, 10.01it/s, loss=0.9189]


Epoch 4 - Average Loss: 0.9943


Epoch 5/12: 100%|██████████| 794/794 [01:19<00:00, 10.01it/s, loss=1.0026]


Epoch 5 - Average Loss: 0.9655
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.8914]


Epoch 6 - Average Loss: 0.9484


Evaluating: 100%|██████████| 979/979 [00:23<00:00, 42.51it/s]


Validation | Loss: 0.9658, RMSE: 0.9828


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.9373]


Epoch 7 - Average Loss: 0.9386


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.12it/s, loss=0.8854]


Epoch 8 - Average Loss: 0.9292


Epoch 9/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.8501]


Epoch 9 - Average Loss: 0.9217


Evaluating: 100%|██████████| 979/979 [00:26<00:00, 36.59it/s]


Validation | Loss: 0.9660, RMSE: 0.9829


Epoch 10/12: 100%|██████████| 794/794 [01:18<00:00, 10.11it/s, loss=0.8811]


Epoch 10 - Average Loss: 0.9166
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:18<00:00, 10.09it/s, loss=0.8300]


Epoch 11 - Average Loss: 0.9126


Epoch 12/12: 100%|██████████| 794/794 [01:18<00:00, 10.17it/s, loss=0.8564]


Epoch 12 - Average Loss: 0.9081


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 43.89it/s]


Validation | Loss: 0.9585, RMSE: 0.9790


[I 2025-06-13 14:18:37,938] Trial 26 finished with value: 0.9790274309613106 and parameters: {'learning_rate': 0.0002996329410391738, 'hidden_dims': [1024, 256, 128], 'dropout': 0.5553490166682296, 'l2_reg': 6.126614199359755e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00013159877098839053, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4913074875309696, 'l2_reg': 2.099658309446873e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  6.81it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:18<00:00, 10.05it/s, loss=1.0529]


Epoch 1 - Average Loss: 1.9200


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.17it/s, loss=1.1306]


Epoch 2 - Average Loss: 1.0732


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.11it/s, loss=0.8913]


Epoch 3 - Average Loss: 1.0200


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.83it/s]


Validation | Loss: 1.1112, RMSE: 1.0541


Epoch 4/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=1.2811]


Epoch 4 - Average Loss: 0.9833


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.11it/s, loss=1.0391]


Epoch 5 - Average Loss: 0.9534
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:18<00:00, 10.09it/s, loss=0.9114]


Epoch 6 - Average Loss: 0.9238


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.40it/s]


Validation | Loss: 0.9893, RMSE: 0.9947


Epoch 7/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.9144]


Epoch 7 - Average Loss: 0.9021


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.09it/s, loss=0.9135]


Epoch 8 - Average Loss: 0.8862


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.9506]


Epoch 9 - Average Loss: 0.8737


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.75it/s]


Validation | Loss: 0.9534, RMSE: 0.9764


Epoch 10/12: 100%|██████████| 794/794 [01:18<00:00, 10.10it/s, loss=0.9304]


Epoch 10 - Average Loss: 0.8651
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.8016]


Epoch 11 - Average Loss: 0.8596


Epoch 12/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=0.8358]


Epoch 12 - Average Loss: 0.8521


Evaluating: 100%|██████████| 979/979 [00:25<00:00, 38.40it/s]


Validation | Loss: 0.9488, RMSE: 0.9741


[I 2025-06-13 14:36:00,861] Trial 27 finished with value: 0.9740796629144236 and parameters: {'learning_rate': 0.00013159877098839053, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4913074875309696, 'l2_reg': 2.099658309446873e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00012558323541207408, 'hidden_dims': [1024, 256, 128], 'dropout': 0.5980201978898604, 'l2_reg': 2.0747243597817864e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.58it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=1.1630]


Epoch 1 - Average Loss: 2.1228


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.9766]


Epoch 2 - Average Loss: 1.1379


Epoch 3/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.0551]


Epoch 3 - Average Loss: 1.0666


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.02it/s]


Validation | Loss: 1.1228, RMSE: 1.0596


Epoch 4/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.1592]


Epoch 4 - Average Loss: 1.0272


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.9170]


Epoch 5 - Average Loss: 0.9955
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.01it/s, loss=0.9631]


Epoch 6 - Average Loss: 0.9710


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.23it/s]


Validation | Loss: 1.0374, RMSE: 1.0185


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.10it/s, loss=0.9273]


Epoch 7 - Average Loss: 0.9554


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=0.9287]


Epoch 8 - Average Loss: 0.9411


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.09it/s, loss=0.8985]


Epoch 9 - Average Loss: 0.9234


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.17it/s]
[I 2025-06-13 14:49:00,507] Trial 28 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00023149341061040393, 'hidden_dims': [512, 256, 128], 'dropout': 0.5055631844143604, 'l2_reg': 0.0001051960544369806, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.31it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:18<00:00, 10.05it/s, loss=1.1607]


Epoch 1 - Average Loss: 1.9430


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=1.1281]


Epoch 2 - Average Loss: 1.1565


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.11it/s, loss=1.0648]


Epoch 3 - Average Loss: 1.0943


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.69it/s]


Validation | Loss: 1.0496, RMSE: 1.0245


Epoch 4/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=1.0033]


Epoch 4 - Average Loss: 1.0529


Epoch 5/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.0397]


Epoch 5 - Average Loss: 1.0177
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:18<00:00, 10.09it/s, loss=0.9631]


Epoch 6 - Average Loss: 0.9928


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.82it/s]


Validation | Loss: 1.0044, RMSE: 1.0022


Epoch 7/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.8928]


Epoch 7 - Average Loss: 0.9761


Epoch 8/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.0035]


Epoch 8 - Average Loss: 0.9608


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=1.0170]


Epoch 9 - Average Loss: 0.9520


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.86it/s]
[I 2025-06-13 15:01:58,266] Trial 29 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00012532144428595163, 'hidden_dims': [512, 128], 'dropout': 0.3780335797439494, 'l2_reg': 2.1254200561000987e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  7.57it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:17<00:00, 10.24it/s, loss=1.1577]


Epoch 1 - Average Loss: 2.3247


Epoch 2/12: 100%|██████████| 794/794 [01:17<00:00, 10.27it/s, loss=1.0461]


Epoch 2 - Average Loss: 1.0638


Epoch 3/12: 100%|██████████| 794/794 [01:17<00:00, 10.24it/s, loss=0.9688]


Epoch 3 - Average Loss: 0.9985


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.75it/s]


Validation | Loss: 0.9812, RMSE: 0.9906


Epoch 4/12: 100%|██████████| 794/794 [01:18<00:00, 10.13it/s, loss=0.9442]


Epoch 4 - Average Loss: 0.9585


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.12it/s, loss=0.8870]


Epoch 5 - Average Loss: 0.9321
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:17<00:00, 10.30it/s, loss=0.9125]


Epoch 6 - Average Loss: 0.9080


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.66it/s]


Validation | Loss: 0.9804, RMSE: 0.9901


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.14it/s, loss=0.7638]


Epoch 7 - Average Loss: 0.8855


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.16it/s, loss=0.8897]


Epoch 8 - Average Loss: 0.8684


Epoch 9/12: 100%|██████████| 794/794 [01:17<00:00, 10.24it/s, loss=0.8201]


Epoch 9 - Average Loss: 0.8545


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.71it/s]
[I 2025-06-13 15:14:45,846] Trial 30 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00021400685400745407, 'hidden_dims': [256, 128], 'dropout': 0.5375533599666054, 'l2_reg': 1.712971281020433e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  7.99it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:17<00:00, 10.28it/s, loss=1.1980] 


Epoch 1 - Average Loss: 2.3834


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.14it/s, loss=1.1711]


Epoch 2 - Average Loss: 1.1919


Epoch 3/12: 100%|██████████| 794/794 [01:17<00:00, 10.20it/s, loss=1.0990]


Epoch 3 - Average Loss: 1.1169


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 46.03it/s]


Validation | Loss: 1.0286, RMSE: 1.0142


Epoch 4/12: 100%|██████████| 794/794 [01:17<00:00, 10.19it/s, loss=1.0231]


Epoch 4 - Average Loss: 1.0662


Epoch 5/12: 100%|██████████| 794/794 [01:17<00:00, 10.20it/s, loss=1.0157]


Epoch 5 - Average Loss: 1.0243
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:18<00:00, 10.17it/s, loss=1.0334]


Epoch 6 - Average Loss: 0.9969


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.93it/s]


Validation | Loss: 1.0312, RMSE: 1.0155


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.17it/s, loss=0.9219]


Epoch 7 - Average Loss: 0.9803


Epoch 8/12: 100%|██████████| 794/794 [01:17<00:00, 10.19it/s, loss=0.9016]


Epoch 8 - Average Loss: 0.9661


Epoch 9/12: 100%|██████████| 794/794 [01:17<00:00, 10.19it/s, loss=1.0393]


Epoch 9 - Average Loss: 0.9571


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 46.04it/s]
[I 2025-06-13 15:27:33,376] Trial 31 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00045682242648570576, 'hidden_dims': [256, 128], 'dropout': 0.6338052397475161, 'l2_reg': 1.666983816049595e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.05it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:17<00:00, 10.31it/s, loss=1.1153]


Epoch 1 - Average Loss: 1.9943


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.15it/s, loss=1.0738]


Epoch 2 - Average Loss: 1.1620


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.17it/s, loss=1.1244]


Epoch 3 - Average Loss: 1.0890


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.79it/s]


Validation | Loss: 1.0097, RMSE: 1.0048


Epoch 4/12: 100%|██████████| 794/794 [01:17<00:00, 10.26it/s, loss=1.0098]


Epoch 4 - Average Loss: 1.0574


Epoch 5/12: 100%|██████████| 794/794 [01:17<00:00, 10.23it/s, loss=0.9870]


Epoch 5 - Average Loss: 1.0347
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:17<00:00, 10.19it/s, loss=1.0769]


Epoch 6 - Average Loss: 1.0210


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.67it/s]


Validation | Loss: 1.0004, RMSE: 1.0002


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.09it/s, loss=1.0097]


Epoch 7 - Average Loss: 1.0109


Epoch 8/12: 100%|██████████| 794/794 [01:17<00:00, 10.24it/s, loss=0.9425]


Epoch 8 - Average Loss: 1.0026


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.16it/s, loss=0.9261]


Epoch 9 - Average Loss: 0.9959


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.45it/s]
[I 2025-06-13 15:40:20,973] Trial 32 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.0003297431219393315, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4926237093267391, 'l2_reg': 0.0001286464727460365, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.38it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=1.2027]


Epoch 1 - Average Loss: 1.7868


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=1.1267]


Epoch 2 - Average Loss: 1.1412


Epoch 3/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.1305]


Epoch 3 - Average Loss: 1.0715


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.08it/s]


Validation | Loss: 1.0314, RMSE: 1.0156


Epoch 4/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=1.0520]


Epoch 4 - Average Loss: 1.0224


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.9861]


Epoch 5 - Average Loss: 0.9938
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.9927]


Epoch 6 - Average Loss: 0.9772


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.25it/s]


Validation | Loss: 0.9786, RMSE: 0.9892


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.9531]


Epoch 7 - Average Loss: 0.9655


Epoch 8/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.9487]


Epoch 8 - Average Loss: 0.9556


Epoch 9/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=1.0298]


Epoch 9 - Average Loss: 0.9482


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 43.97it/s]
[I 2025-06-13 15:53:21,281] Trial 33 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00015731587942740307, 'hidden_dims': [1024, 256, 128], 'dropout': 0.45848045296667517, 'l2_reg': 4.2759359636168686e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.15it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=1.0521]


Epoch 1 - Average Loss: 1.8282


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.1264]


Epoch 2 - Average Loss: 1.0746


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.05it/s, loss=0.9718]


Epoch 3 - Average Loss: 1.0236


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.25it/s]


Validation | Loss: 1.0640, RMSE: 1.0315


Epoch 4/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.9717]


Epoch 4 - Average Loss: 0.9836


Epoch 5/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=1.0110]


Epoch 5 - Average Loss: 0.9457
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.9365]


Epoch 6 - Average Loss: 0.9191


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.45it/s]


Validation | Loss: 0.9695, RMSE: 0.9846


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.9208]


Epoch 7 - Average Loss: 0.9016


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.05it/s, loss=0.8989]


Epoch 8 - Average Loss: 0.8884


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.8400]


Epoch 9 - Average Loss: 0.8787


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.50it/s]


Validation | Loss: 0.9540, RMSE: 0.9767


Epoch 10/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.8320]


Epoch 10 - Average Loss: 0.8709
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.9262]


Epoch 11 - Average Loss: 0.8639


Epoch 12/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.9312]


Epoch 12 - Average Loss: 0.8579


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.31it/s]


Validation | Loss: 0.9597, RMSE: 0.9796


[I 2025-06-13 16:10:44,457] Trial 34 finished with value: 0.9796382123092554 and parameters: {'learning_rate': 0.00015731587942740307, 'hidden_dims': [1024, 256, 128], 'dropout': 0.45848045296667517, 'l2_reg': 4.2759359636168686e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.0001701263710901258, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4324151680909759, 'l2_reg': 3.097107668959835e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.61it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:17<00:00, 10.28it/s, loss=1.0272]


Epoch 1 - Average Loss: 1.8158


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.9863]


Epoch 2 - Average Loss: 1.0468


Epoch 3/12: 100%|██████████| 794/794 [01:17<00:00, 10.20it/s, loss=0.9681]


Epoch 3 - Average Loss: 0.9958


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 44.90it/s]


Validation | Loss: 1.1069, RMSE: 1.0521


Epoch 4/12: 100%|██████████| 794/794 [01:17<00:00, 10.27it/s, loss=0.8685]


Epoch 4 - Average Loss: 0.9520


Epoch 5/12: 100%|██████████| 794/794 [01:15<00:00, 10.55it/s, loss=0.8883]


Epoch 5 - Average Loss: 0.9168
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:15<00:00, 10.45it/s, loss=0.9244]


Epoch 6 - Average Loss: 0.8952


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 43.16it/s]


Validation | Loss: 0.9609, RMSE: 0.9803


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.17it/s, loss=1.0393]


Epoch 7 - Average Loss: 0.8800


Epoch 8/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.8472]


Epoch 8 - Average Loss: 0.8697


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.8123]


Epoch 9 - Average Loss: 0.8602


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 44.71it/s]


Validation | Loss: 0.9485, RMSE: 0.9739


Epoch 10/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.8297]


Epoch 10 - Average Loss: 0.8523
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:18<00:00, 10.14it/s, loss=0.9038]


Epoch 11 - Average Loss: 0.8452


Epoch 12/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=0.8600]


Epoch 12 - Average Loss: 0.8381


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 44.90it/s]


Validation | Loss: 0.9515, RMSE: 0.9754


[I 2025-06-13 16:27:55,848] Trial 35 finished with value: 0.9754257792743928 and parameters: {'learning_rate': 0.0001701263710901258, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4324151680909759, 'l2_reg': 3.097107668959835e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.0002699031572582006, 'hidden_dims': [1024, 256, 128], 'dropout': 0.42821512558870156, 'l2_reg': 3.367684250098324e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.71it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.1135]


Epoch 1 - Average Loss: 1.5913


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.9873]


Epoch 2 - Average Loss: 1.0240


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.9466]


Epoch 3 - Average Loss: 0.9606


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.39it/s]


Validation | Loss: 1.0298, RMSE: 1.0148


Epoch 4/12: 100%|██████████| 794/794 [01:19<00:00, 10.01it/s, loss=0.9471]


Epoch 4 - Average Loss: 0.9212


Epoch 5/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.8733]


Epoch 5 - Average Loss: 0.8976
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.9228]


Epoch 6 - Average Loss: 0.8856


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.45it/s]


Validation | Loss: 0.9744, RMSE: 0.9871


Epoch 7/12: 100%|██████████| 794/794 [01:19<00:00, 10.05it/s, loss=0.9012]


Epoch 7 - Average Loss: 0.8738


Epoch 8/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.8358]


Epoch 8 - Average Loss: 0.8613


Epoch 9/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.8280]


Epoch 9 - Average Loss: 0.8538


Evaluating: 100%|██████████| 979/979 [00:24<00:00, 39.56it/s]


Validation | Loss: 0.9603, RMSE: 0.9800


Epoch 10/12: 100%|██████████| 794/794 [01:19<00:00, 10.05it/s, loss=0.8179]


Epoch 10 - Average Loss: 0.8447
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.8664]


Epoch 11 - Average Loss: 0.8386


Epoch 12/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.8267]


Epoch 12 - Average Loss: 0.8335


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 43.63it/s]


Validation | Loss: 0.9726, RMSE: 0.9862


[I 2025-06-13 16:45:24,006] Trial 36 finished with value: 0.9862118428526399 and parameters: {'learning_rate': 0.0002699031572582006, 'hidden_dims': [1024, 256, 128], 'dropout': 0.42821512558870156, 'l2_reg': 3.367684250098324e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00018883648607072805, 'hidden_dims': [1024, 256, 128], 'dropout': 0.3323793177427462, 'l2_reg': 5.599022506914161e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.05it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.0399]


Epoch 1 - Average Loss: 1.6772


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.05it/s, loss=0.9784]


Epoch 2 - Average Loss: 1.0294


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.14it/s, loss=0.8637]


Epoch 3 - Average Loss: 0.9753


Evaluating: 100%|██████████| 979/979 [00:26<00:00, 37.32it/s]


Validation | Loss: 1.1155, RMSE: 1.0562


Epoch 4/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.8926]


Epoch 4 - Average Loss: 0.9297


Epoch 5/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.8621]


Epoch 5 - Average Loss: 0.9058
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.01it/s, loss=0.8553]


Epoch 6 - Average Loss: 0.8915


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 43.55it/s]


Validation | Loss: 0.9814, RMSE: 0.9907


Epoch 7/12: 100%|██████████| 794/794 [01:19<00:00, 10.01it/s, loss=0.8513]


Epoch 7 - Average Loss: 0.8724


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.16it/s, loss=0.8088]


Epoch 8 - Average Loss: 0.8594


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.13it/s, loss=0.7998]


Epoch 9 - Average Loss: 0.8454


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.21it/s]
[I 2025-06-13 16:58:28,179] Trial 37 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00020298175135446717, 'hidden_dims': [512, 256, 128], 'dropout': 0.393348009557256, 'l2_reg': 2.8250444289648856e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.71it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.1588]


Epoch 1 - Average Loss: 1.8698


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=1.0669]


Epoch 2 - Average Loss: 1.0565


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.12it/s, loss=0.9770]


Epoch 3 - Average Loss: 1.0070


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 46.47it/s]


Validation | Loss: 1.0419, RMSE: 1.0207


Epoch 4/12: 100%|██████████| 794/794 [01:18<00:00, 10.09it/s, loss=0.9630]


Epoch 4 - Average Loss: 0.9636


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=0.9412]


Epoch 5 - Average Loss: 0.9345
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.8335]


Epoch 6 - Average Loss: 0.9146


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.85it/s]


Validation | Loss: 0.9890, RMSE: 0.9945


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=0.9242]


Epoch 7 - Average Loss: 0.8992


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.11it/s, loss=0.8779]


Epoch 8 - Average Loss: 0.8855


Epoch 9/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.8467]


Epoch 9 - Average Loss: 0.8764


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.70it/s]


Validation | Loss: 0.9607, RMSE: 0.9801


Epoch 10/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.8465]


Epoch 10 - Average Loss: 0.8681
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:18<00:00, 10.10it/s, loss=0.8139]


Epoch 11 - Average Loss: 0.8624


Epoch 12/12: 100%|██████████| 794/794 [01:17<00:00, 10.20it/s, loss=0.8194]


Epoch 12 - Average Loss: 0.8548


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 44.19it/s]


Validation | Loss: 0.9532, RMSE: 0.9763


[I 2025-06-13 17:15:44,692] Trial 38 finished with value: 0.9763372049521956 and parameters: {'learning_rate': 0.00020298175135446717, 'hidden_dims': [512, 256, 128], 'dropout': 0.393348009557256, 'l2_reg': 2.8250444289648856e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.0001387398146823645, 'hidden_dims': [256, 128], 'dropout': 0.4886629796186397, 'l2_reg': 0.0014535908515170327, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.21it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:18<00:00, 10.11it/s, loss=1.7359]


Epoch 1 - Average Loss: 3.4165


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.16it/s, loss=1.4923]


Epoch 2 - Average Loss: 1.6315


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.16it/s, loss=1.3793]


Epoch 3 - Average Loss: 1.4273


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.12it/s]


Validation | Loss: 1.0974, RMSE: 1.0476


Epoch 4/12: 100%|██████████| 794/794 [01:17<00:00, 10.21it/s, loss=1.2749]


Epoch 4 - Average Loss: 1.3207


Epoch 5/12: 100%|██████████| 794/794 [01:17<00:00, 10.24it/s, loss=1.1476]


Epoch 5 - Average Loss: 1.2625
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:18<00:00, 10.15it/s, loss=1.2219]


Epoch 6 - Average Loss: 1.2282


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.92it/s]


Validation | Loss: 1.1127, RMSE: 1.0549


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.16it/s, loss=1.1043]


Epoch 7 - Average Loss: 1.2056


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.15it/s, loss=1.2125]


Epoch 8 - Average Loss: 1.1919


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.13it/s, loss=1.1242]


Epoch 9 - Average Loss: 1.1818


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.84it/s]
[I 2025-06-13 17:28:34,785] Trial 39 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00016709027243426455, 'hidden_dims': [1024, 256, 128], 'dropout': 0.44623699668099726, 'l2_reg': 1.5152644934507585e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  8.31it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=1.1434]


Epoch 1 - Average Loss: 1.7543


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=1.0222]


Epoch 2 - Average Loss: 1.0306


Epoch 3/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.9576]


Epoch 3 - Average Loss: 0.9823


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.02it/s]


Validation | Loss: 1.0621, RMSE: 1.0306


Epoch 4/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.8619]


Epoch 4 - Average Loss: 0.9388


Epoch 5/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.9088]


Epoch 5 - Average Loss: 0.9060
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.8561]


Epoch 6 - Average Loss: 0.8816


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 44.44it/s]


Validation | Loss: 0.9422, RMSE: 0.9707


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.09it/s, loss=0.9239]


Epoch 7 - Average Loss: 0.8683


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=0.8978]


Epoch 8 - Average Loss: 0.8563


Epoch 9/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.9076]


Epoch 9 - Average Loss: 0.8471


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.12it/s]


Validation | Loss: 0.9617, RMSE: 0.9807


Epoch 10/12: 100%|██████████| 794/794 [01:18<00:00, 10.05it/s, loss=0.8344]


Epoch 10 - Average Loss: 0.8399
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:18<00:00, 10.05it/s, loss=0.8970]


Epoch 11 - Average Loss: 0.8346


Epoch 12/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.8497]


Epoch 12 - Average Loss: 0.8288


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 44.83it/s]


Validation | Loss: 0.9492, RMSE: 0.9743


[I 2025-06-13 17:45:58,456] Trial 40 finished with value: 0.9742597456728256 and parameters: {'learning_rate': 0.00016709027243426455, 'hidden_dims': [1024, 256, 128], 'dropout': 0.44623699668099726, 'l2_reg': 1.5152644934507585e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00011476911582672547, 'hidden_dims': [512, 256, 128], 'dropout': 0.5475254165066274, 'l2_reg': 0.007262138754221973, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  7.89it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=2.9712]


Epoch 1 - Average Loss: 5.9217


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.09it/s, loss=2.0519]


Epoch 2 - Average Loss: 2.4436


Epoch 3/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=1.8350]


Epoch 3 - Average Loss: 1.8819


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.51it/s]


Validation | Loss: 1.0796, RMSE: 1.0391


Epoch 4/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=1.5470]


Epoch 4 - Average Loss: 1.6660


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.05it/s, loss=1.5853]


Epoch 5 - Average Loss: 1.5659
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=1.5140]


Epoch 6 - Average Loss: 1.5155


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.79it/s]


Validation | Loss: 1.0626, RMSE: 1.0308


Epoch 7/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=1.5136]


Epoch 7 - Average Loss: 1.4963


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.12it/s, loss=1.4735]


Epoch 8 - Average Loss: 1.4839


Epoch 9/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=1.4305]


Epoch 9 - Average Loss: 1.4763


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.30it/s]
[I 2025-06-13 17:58:56,345] Trial 41 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00027208992125278986, 'hidden_dims': [512, 128], 'dropout': 0.6964429692162095, 'l2_reg': 1.5584188186452046e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  7.57it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:18<00:00, 10.14it/s, loss=1.2889] 


Epoch 1 - Average Loss: 2.1570


Epoch 2/12: 100%|██████████| 794/794 [01:17<00:00, 10.21it/s, loss=1.0849]


Epoch 2 - Average Loss: 1.1791


Epoch 3/12: 100%|██████████| 794/794 [01:17<00:00, 10.20it/s, loss=0.9605]


Epoch 3 - Average Loss: 1.0793


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.88it/s]


Validation | Loss: 1.0139, RMSE: 1.0069


Epoch 4/12: 100%|██████████| 794/794 [01:17<00:00, 10.22it/s, loss=1.0682]


Epoch 4 - Average Loss: 1.0405


Epoch 5/12: 100%|██████████| 794/794 [01:17<00:00, 10.24it/s, loss=0.9696]


Epoch 5 - Average Loss: 1.0183
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:18<00:00, 10.14it/s, loss=0.8956]


Epoch 6 - Average Loss: 0.9999


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.90it/s]


Validation | Loss: 0.9893, RMSE: 0.9946


Epoch 7/12: 100%|██████████| 794/794 [01:17<00:00, 10.20it/s, loss=0.9504]


Epoch 7 - Average Loss: 0.9894


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.12it/s, loss=1.0058]


Epoch 8 - Average Loss: 0.9800


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.9599]


Epoch 9 - Average Loss: 0.9725


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.59it/s]


Validation | Loss: 0.9561, RMSE: 0.9778


Epoch 10/12: 100%|██████████| 794/794 [01:18<00:00, 10.15it/s, loss=0.9557]


Epoch 10 - Average Loss: 0.9673
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:17<00:00, 10.22it/s, loss=0.8190]


Epoch 11 - Average Loss: 0.9594


Epoch 12/12: 100%|██████████| 794/794 [01:18<00:00, 10.15it/s, loss=0.9763]


Epoch 12 - Average Loss: 0.9563


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.86it/s]


Validation | Loss: 0.9555, RMSE: 0.9775


[I 2025-06-13 18:16:03,854] Trial 42 finished with value: 0.9775009798624107 and parameters: {'learning_rate': 0.00027208992125278986, 'hidden_dims': [512, 128], 'dropout': 0.6964429692162095, 'l2_reg': 1.5584188186452046e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.0005699070463126421, 'hidden_dims': [1024, 256, 128], 'dropout': 0.5833405626313694, 'l2_reg': 1.0048345841386835e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  9.04it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.9347]


Epoch 1 - Average Loss: 1.4235


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.9591]


Epoch 2 - Average Loss: 1.0028


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.05it/s, loss=0.9441]


Epoch 3 - Average Loss: 0.9530


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 44.91it/s]


Validation | Loss: 0.9749, RMSE: 0.9874


Epoch 4/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.9319]


Epoch 4 - Average Loss: 0.9310


Epoch 5/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.8962]


Epoch 5 - Average Loss: 0.9210
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.9477]


Epoch 6 - Average Loss: 0.9130


Evaluating: 100%|██████████| 979/979 [00:22<00:00, 42.70it/s]


Validation | Loss: 0.9831, RMSE: 0.9915


Epoch 7/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.8405]


Epoch 7 - Average Loss: 0.9065


Epoch 8/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.9629]


Epoch 8 - Average Loss: 0.9062


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.8175]


Epoch 9 - Average Loss: 0.9000


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.38it/s]
[I 2025-06-13 18:29:05,796] Trial 43 pruned. 


{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00016865928223452418, 'hidden_dims': [1024, 256, 128], 'dropout': 0.44204034493219335, 'l2_reg': 2.776456026150286e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  7.47it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:18<00:00, 10.12it/s, loss=1.0161]


Epoch 1 - Average Loss: 1.7778


Epoch 2/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=1.0076]


Epoch 2 - Average Loss: 1.0441


Epoch 3/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=0.9982]


Epoch 3 - Average Loss: 0.9957


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.26it/s]


Validation | Loss: 1.0572, RMSE: 1.0282


Epoch 4/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.9648]


Epoch 4 - Average Loss: 0.9535


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.9790]


Epoch 5 - Average Loss: 0.9179
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=0.8729]


Epoch 6 - Average Loss: 0.8945


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.46it/s]


Validation | Loss: 0.9472, RMSE: 0.9733


Epoch 7/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.8527]


Epoch 7 - Average Loss: 0.8798


Epoch 8/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.9334]


Epoch 8 - Average Loss: 0.8670


Epoch 9/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=0.8851]


Epoch 9 - Average Loss: 0.8608


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.19it/s]


Validation | Loss: 0.9616, RMSE: 0.9806


Epoch 10/12: 100%|██████████| 794/794 [01:18<00:00, 10.07it/s, loss=0.8198]


Epoch 10 - Average Loss: 0.8524
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_10.pth.gz


Epoch 11/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.7799]


Epoch 11 - Average Loss: 0.8459


Epoch 12/12: 100%|██████████| 794/794 [01:19<00:00, 10.03it/s, loss=0.8519]


Epoch 12 - Average Loss: 0.8407


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 46.00it/s]


Validation | Loss: 0.9465, RMSE: 0.9729


[I 2025-06-13 18:46:27,125] Trial 44 finished with value: 0.9728881830125684 and parameters: {'learning_rate': 0.00016865928223452418, 'hidden_dims': [1024, 256, 128], 'dropout': 0.44204034493219335, 'l2_reg': 2.776456026150286e-05}. Best is trial 17 with value: 0.9725434151001778.


Saved final model at ../model_checkpoints/final_model.pth.gz
{'num_epochs': 12, 'batch_size': 512, 'learning_rate': 0.00014205519084350642, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4473418900212535, 'l2_reg': 1.5760781582302715e-05, 'checkpoint_interval': 5, 'eval_interval': 3}
Device: cuda


Calculating global mean: 100%|██████████| 3/3 [00:00<00:00,  7.69it/s]


Global mean rating: 3.582


Epoch 1/12: 100%|██████████| 794/794 [01:18<00:00, 10.08it/s, loss=1.1285]


Epoch 1 - Average Loss: 1.8670


Epoch 2/12: 100%|██████████| 794/794 [01:19<00:00, 10.02it/s, loss=0.9796]


Epoch 2 - Average Loss: 1.0453


Epoch 3/12: 100%|██████████| 794/794 [01:19<00:00, 10.04it/s, loss=0.9647]


Epoch 3 - Average Loss: 0.9961


Evaluating: 100%|██████████| 979/979 [00:21<00:00, 45.22it/s]


Validation | Loss: 1.1337, RMSE: 1.0648


Epoch 4/12: 100%|██████████| 794/794 [01:18<00:00, 10.06it/s, loss=0.9729]


Epoch 4 - Average Loss: 0.9586


Epoch 5/12: 100%|██████████| 794/794 [01:18<00:00, 10.15it/s, loss=0.9242]


Epoch 5 - Average Loss: 0.9245
Saved checkpoint at ../model_checkpoints/checkpoint_epoch_5.pth.gz


Epoch 6/12:  32%|███▏      | 255/794 [00:25<00:53, 10.12it/s, loss=0.9218]

In [9]:
warnings.filterwarnings("ignore", category=UserWarning, module="optuna")

study = optuna.load_study(
    storage="sqlite:///optuna_study_3_samples.db", 
    study_name="autoencoder_tuning_3_samples"       
)

print(f"Best RMSE: {study.best_value:.4f}")
print(f"Best params: {study.best_params}")
print(f"Total trials: {len(study.trials)}")

Best RMSE: 0.9725
Best params: {'learning_rate': 0.00025806243863933984, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4807743363021898, 'l2_reg': 3.332750186337578e-05}
Total trials: 46


In [14]:
# retreive full training data info
train_partition_files = get_data(data_dir)
val_partition_files = get_data(val_dir)

if CONFIG['seed'] is not None:
    random.seed(CONFIG['seed'])

full_train_partitions = random.sample(train_partition_files, 7)

full_val_partitions = []
for partition in full_train_partitions:
  val_partition = partition.copy()
  val_partition['path'] = partition['path'].replace('train', 'validation')
  full_val_partitions.append(val_partition)


print(f"Train EX: {sample_train_partitions[0]}")
print(f"Val EX: {sample_val_partitions[0]}")

Train EX: {'path': '../NETFLIX_DATA/partitions/train/part_1_7.parquet', 'part': 1, 'group': 7}
Val EX: {'path': '../NETFLIX_DATA/partitions/validation/part_1_7.parquet', 'part': 1, 'group': 7}


In [15]:
# user and movie_map from full train data
user_map, movie_map = map_id(full_train_partitions)

Mapping IDs: 100%|██████████| 7/7 [00:01<00:00,  3.90it/s]

Map successful for 431670 users, 3720 movies





In [16]:
# preload user rating profiles

full_train_data = AutoEncoder.load_user_data(partitions=full_train_partitions, 
                                             user_map=user_map)

full_validation_data = AutoEncoder.load_validation_data(partitions=full_val_partitions, 
                                                        user_map=user_map,
                                                        movie_map=movie_map)

Loading users' rating profile 1/7: 100%|██████████| 1828517/1828517 [01:03<00:00, 28687.08it/s]
Loading users' rating profile 2/7: 100%|██████████| 2772664/2772664 [01:35<00:00, 28895.74it/s]
Loading users' rating profile 3/7: 100%|██████████| 2776385/2776385 [01:33<00:00, 29589.30it/s]
Loading users' rating profile 4/7: 100%|██████████| 1511079/1511079 [00:51<00:00, 29553.19it/s]
Loading users' rating profile 5/7: 100%|██████████| 2770394/2770394 [01:34<00:00, 29357.31it/s]
Loading users' rating profile 6/7: 100%|██████████| 2810701/2810701 [01:35<00:00, 29459.90it/s]
Loading users' rating profile 7/7: 100%|██████████| 2754183/2754183 [01:35<00:00, 28882.31it/s]
Loading validation pairs 1/7: 100%|██████████| 176307/176307 [00:05<00:00, 31319.51it/s]
Loading validation pairs 2/7: 100%|██████████| 276488/276488 [00:08<00:00, 31453.44it/s]
Loading validation pairs 3/7: 100%|██████████| 338140/338140 [00:10<00:00, 30907.70it/s]
Loading validation pairs 4/7: 100%|██████████| 157133/157133 

In [18]:
final_save_dir = checkpoint_dir+"/final"

final_params = {
    'num_epochs': 30,
    'checkpoint_interval': 5,
    'eval_interval': float('inf'), # no eval
    'batch_size': 512
}

tuned_model, rmse = train_autoencoder(
    train_partitions=full_train_partitions,
    validation_partitions=full_val_partitions,
    user_map=user_map,
    movie_map=movie_map,
    checkpoint_dir=final_save_dir,
    user_data=full_train_data,
    validation_data=full_validation_data,
    **study.best_params,
    **final_params
)

Device: cuda


Calculating global mean: 100%|██████████| 7/7 [00:00<00:00,  7.38it/s]


Global mean rating: 3.607


Epoch 1/30: 100%|██████████| 844/844 [01:26<00:00,  9.76it/s, loss=1.0275]


Epoch 1 - Average Loss: 1.6219


Epoch 2/30: 100%|██████████| 844/844 [01:25<00:00,  9.82it/s, loss=1.0382]


Epoch 2 - Average Loss: 1.0738


Epoch 3/30: 100%|██████████| 844/844 [01:25<00:00,  9.89it/s, loss=1.1025]


Epoch 3 - Average Loss: 1.0205


Epoch 4/30: 100%|██████████| 844/844 [01:25<00:00,  9.89it/s, loss=0.9885]


Epoch 4 - Average Loss: 0.9777


Epoch 5/30: 100%|██████████| 844/844 [01:25<00:00,  9.87it/s, loss=1.2543]


Epoch 5 - Average Loss: 0.9540
Saved checkpoint at ../model_checkpoints/final/checkpoint_epoch_5.pth.gz


Epoch 6/30: 100%|██████████| 844/844 [01:25<00:00,  9.90it/s, loss=0.9228]


Epoch 6 - Average Loss: 0.9376


Epoch 7/30: 100%|██████████| 844/844 [01:25<00:00,  9.84it/s, loss=1.0575]


Epoch 7 - Average Loss: 0.9310


Epoch 8/30: 100%|██████████| 844/844 [01:25<00:00,  9.88it/s, loss=0.8478]


Epoch 8 - Average Loss: 0.9214


Epoch 9/30: 100%|██████████| 844/844 [01:25<00:00,  9.87it/s, loss=0.8932]


Epoch 9 - Average Loss: 0.9175


Epoch 10/30: 100%|██████████| 844/844 [01:25<00:00,  9.88it/s, loss=1.0814]


Epoch 10 - Average Loss: 0.9109
Saved checkpoint at ../model_checkpoints/final/checkpoint_epoch_10.pth.gz


Epoch 11/30: 100%|██████████| 844/844 [01:25<00:00,  9.90it/s, loss=0.8668]


Epoch 11 - Average Loss: 0.9061


Epoch 12/30: 100%|██████████| 844/844 [01:25<00:00,  9.89it/s, loss=0.8995]


Epoch 12 - Average Loss: 0.9040


Epoch 13/30: 100%|██████████| 844/844 [01:24<00:00,  9.95it/s, loss=0.7665]


Epoch 13 - Average Loss: 0.9012


Epoch 14/30: 100%|██████████| 844/844 [01:24<00:00,  9.96it/s, loss=0.8392]


Epoch 14 - Average Loss: 0.8971


Epoch 15/30: 100%|██████████| 844/844 [01:25<00:00,  9.91it/s, loss=0.8536]


Epoch 15 - Average Loss: 0.8958
Saved checkpoint at ../model_checkpoints/final/checkpoint_epoch_15.pth.gz


Epoch 16/30: 100%|██████████| 844/844 [01:25<00:00,  9.87it/s, loss=0.8124]


Epoch 16 - Average Loss: 0.8944


Epoch 17/30: 100%|██████████| 844/844 [01:25<00:00,  9.87it/s, loss=0.8914]


Epoch 17 - Average Loss: 0.8927


Epoch 18/30: 100%|██████████| 844/844 [01:25<00:00,  9.90it/s, loss=0.8281]


Epoch 18 - Average Loss: 0.8919


Epoch 19/30: 100%|██████████| 844/844 [01:25<00:00,  9.84it/s, loss=0.8959]


Epoch 19 - Average Loss: 0.8907


Epoch 20/30: 100%|██████████| 844/844 [01:26<00:00,  9.79it/s, loss=1.2405]


Epoch 20 - Average Loss: 0.8905
Saved checkpoint at ../model_checkpoints/final/checkpoint_epoch_20.pth.gz


Epoch 21/30: 100%|██████████| 844/844 [01:25<00:00,  9.83it/s, loss=1.1974]


Epoch 21 - Average Loss: 0.8905


Epoch 22/30: 100%|██████████| 844/844 [01:26<00:00,  9.81it/s, loss=0.8578]


Epoch 22 - Average Loss: 0.8886


Epoch 23/30: 100%|██████████| 844/844 [01:26<00:00,  9.80it/s, loss=0.8499]


Epoch 23 - Average Loss: 0.8856


Epoch 24/30: 100%|██████████| 844/844 [01:25<00:00,  9.84it/s, loss=0.8033]


Epoch 24 - Average Loss: 0.8830


Epoch 25/30: 100%|██████████| 844/844 [01:26<00:00,  9.78it/s, loss=0.9292]


Epoch 25 - Average Loss: 0.8830
Saved checkpoint at ../model_checkpoints/final/checkpoint_epoch_25.pth.gz


Epoch 26/30: 100%|██████████| 844/844 [01:25<00:00,  9.85it/s, loss=0.8600]


Epoch 26 - Average Loss: 0.8807


Epoch 27/30: 100%|██████████| 844/844 [01:25<00:00,  9.83it/s, loss=0.8422]


Epoch 27 - Average Loss: 0.8772


Epoch 28/30: 100%|██████████| 844/844 [01:25<00:00,  9.89it/s, loss=0.8487]


Epoch 28 - Average Loss: 0.8742


Epoch 29/30: 100%|██████████| 844/844 [01:25<00:00,  9.84it/s, loss=0.8746]


Epoch 29 - Average Loss: 0.8743


Epoch 30/30: 100%|██████████| 844/844 [01:25<00:00,  9.89it/s, loss=0.8070]


Epoch 30 - Average Loss: 0.8751
Saved checkpoint at ../model_checkpoints/final/checkpoint_epoch_30.pth.gz
Saved final model at ../model_checkpoints/final/final_model.pth.gz


UnboundLocalError: cannot access local variable 'val_rmse' where it is not associated with a value

In [19]:
model_path = checkpoint_dir+"/final/final_model.pth.gz"

In [20]:
print(f"Loading model from {model_path}")

# Load the checkpoint
with gzip.open(model_path, 'rb') as f:
    checkpoint = torch.load(f, map_location=device)

# Extract metadata
metadata = checkpoint['metadata']
print(f"Model metadata: {metadata}")

# Initialize model with same architecture
model = AutoEncoder(
    num_movies=len(movie_map),
    hidden_dims=metadata['hidden_dims'],
    dropout=metadata['dropout'],
    l2=metadata.get('l2_reg', 0.001),  # Use l2_reg from metadata if available
    global_mean=checkpoint.get('global_mean', 3.0)  # Try to get from checkpoint
).to(device)

# Load the trained weights
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # Set to evaluation mode

print(f"Model loaded successfully!")
print(f"Architecture: {metadata['hidden_dims']}")
print(f"Dropout: {metadata['dropout']}")
print(f"Learning rate (training): {metadata['learning_rate']}")

Loading model from ../model_checkpoints/final/final_model.pth.gz
Model metadata: {'epochs': 30, 'train_loss': 0.8751115782707224, 'val_loss': None, 'hidden_dims': [1024, 256, 128], 'dropout': 0.4807743363021898, 'learning_rate': 0.00025806243863933984}
Model loaded successfully!
Architecture: [1024, 256, 128]
Dropout: 0.4807743363021898
Learning rate (training): 0.00025806243863933984


In [21]:
# prepare test data
test_path = "../NETFLIX_DATA/test.parquet"
test_df = pd.read_parquet(test_path)
test_data = [(row['user_id'], row['movie_id'], row['rating']) 
             for _, row in tqdm(test_df.iterrows(), 
                               total=len(test_df), 
                               desc="Loading test data")]

Loading test data: 100%|██████████| 1408395/1408395 [00:48<00:00, 28836.62it/s]


In [22]:
test_metrics = evaluate_model(
    model=model,
    validation_data=test_data,        
    training_data=full_train_data, 
    user_map=user_map,               
    movie_map=movie_map,             
    device=device
)

Evaluating: 100%|██████████| 2751/2751 [00:16<00:00, 167.11it/s]


In [23]:
print(f"Evaluated on test data of length {len(test_data)}")
test_metrics

Evaluated on test data of length 1408395


{'loss': 1.5933213663980612,
 'rmse': 1.2622683416762306,
 'mae': 1.0228561228930335,
 'total_predictions': 1408395}