In [2]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
import optuna
from optuna.trial import TrialState


  from .autonotebook import tqdm as notebook_tqdm


## Import the movieLens dataset




In [17]:
import pandas as pd

# Rutas de los archivos
movies_path = "ml-1m/movies.dat"
ratings_path = "ml-1m/ratings.dat"
users_path = "ml-1m/users.dat"

# Carga de los datos
# Cargar archivos
users = pd.read_csv("ml-1m/users.dat", sep="::", engine="python", 
                    names=["UserID", "Gender", "Age", "Occupation", "Zip-code"], encoding="latin-1")

movies = pd.read_csv("ml-1m/movies.dat", sep="::", engine="python", 
                     names=["MovieID", "Title", "Genres"], encoding="latin-1")

ratings = pd.read_csv("ml-1m/ratings.dat", sep="::", engine="python", 
                      names=["UserID", "MovieID", "Rating", "Timestamp"], encoding="latin-1")

# Mostrar primeras filas para verificar
print("Ratings:")
print(ratings.head())
print("\nMovies:")
print(movies.head())
print("\nUsers:")
print(users.head())


Ratings:
   UserID  MovieID  Rating  Timestamp
0       1     1193       5  978300760
1       1      661       3  978302109
2       1      914       3  978301968
3       1     3408       4  978300275
4       1     2355       5  978824291

Movies:
   MovieID                               Title                        Genres
0        1                    Toy Story (1995)   Animation|Children's|Comedy
1        2                      Jumanji (1995)  Adventure|Children's|Fantasy
2        3             Grumpier Old Men (1995)                Comedy|Romance
3        4            Waiting to Exhale (1995)                  Comedy|Drama
4        5  Father of the Bride Part II (1995)                        Comedy

Users:
   UserID Gender  Age  Occupation Zip-code
0       1      F    1          10    48067
1       2      M   56          16    70072
2       3      M   25          15    55117
3       4      M   45           7    02460
4       5      M   25          20    55455


In [18]:
# Normalizar ratings de 1-5 a 0-1
ratings["Rating"] = (ratings["Rating"] - 1.0) / 4.0

# Convertir géneros a listas
movies["Genres"] = movies["Genres"].apply(lambda x: x.split("|"))

# Codificar IDs
user2idx = {user_id: idx for idx, user_id in enumerate(users["UserID"].unique())}
movie2idx = {movie_id: idx for idx, movie_id in enumerate(movies["MovieID"].unique())}

ratings["UserID"] = ratings["UserID"].map(user2idx)
ratings["MovieID"] = ratings["MovieID"].map(movie2idx)

num_users = len(user2idx)
num_movies = len(movie2idx)

print(f"Total usuarios: {num_users}, Total películas: {num_movies}")


Total usuarios: 6040, Total películas: 3883


## Dividir en Train / Validation / Test

    Train (70%) → Para entrenar el modelo.

    Validation (15%) → Para ajustar hiperparámetros.

    Test (15%) → Para evaluar el modelo final.

In [19]:
# División: 70% Train, 15% Val, 15% Test
train_data, temp_data = train_test_split(ratings, test_size=0.3, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

print(f"Tamaño Train: {len(train_data)}, Validación: {len(val_data)}, Test: {len(test_data)}")


Tamaño Train: 700146, Validación: 150031, Test: 150032


## Crear PyTorch Dataset y DataLoader



In [20]:
class MovieLensDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df["UserID"].values, dtype=torch.long)
        self.movies = torch.tensor(df["MovieID"].values, dtype=torch.long)
        self.ratings = torch.tensor(df["Rating"].values, dtype=torch.float32)
    
    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, idx):
        return self.users[idx], self.movies[idx], self.ratings[idx]

# Instanciar datasets
train_dataset = MovieLensDataset(train_data)
val_dataset = MovieLensDataset(val_data)
test_dataset = MovieLensDataset(test_data)

# Loaders
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [21]:
class FinalTunedRecommenderNet(nn.Module):
    def __init__(self, num_users, num_movies):
        super(FinalTunedRecommenderNet, self).__init__()
        embedding_dim = 64  # De Optuna

        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.movie_embedding = nn.Embedding(num_movies, embedding_dim)
        nn.init.xavier_uniform_(self.user_embedding.weight)
        nn.init.xavier_uniform_(self.movie_embedding.weight)

        self.fc1 = nn.Linear(embedding_dim * 2, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(0.1467)

        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(0.1018)

        self.fc3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)

    def forward(self, user_ids, movie_ids):
        u = self.user_embedding(user_ids)
        m = self.movie_embedding(movie_ids)
        x = torch.cat([u, m], dim=1)
        x = self.dropout1(F.leaky_relu(self.bn1(self.fc1(x))))
        x = self.dropout2(F.leaky_relu(self.bn2(self.fc2(x))))
        x = F.leaky_relu(self.fc3(x))
        return self.output(x).squeeze()


Entrenar el Modelo


In [22]:
class DeeperRecommenderNet(nn.Module):
    def __init__(self, num_users, num_movies, embedding_dim=128):
        super(DeeperRecommenderNet, self).__init__()

        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.movie_embedding = nn.Embedding(num_movies, embedding_dim)

        # Inicialización Xavier
        nn.init.xavier_uniform_(self.user_embedding.weight)
        nn.init.xavier_uniform_(self.movie_embedding.weight)

        self.fc1 = nn.Linear(embedding_dim * 2, 512)
        self.dropout1 = nn.Dropout(0.4)

        self.fc2 = nn.Linear(512, 256)
        self.dropout2 = nn.Dropout(0.3)

        self.fc3 = nn.Linear(256, 128)
        self.dropout3 = nn.Dropout(0.2)

        self.output = nn.Linear(128, 1)

    def forward(self, user_ids, movie_ids):
        user_vec = self.user_embedding(user_ids)
        movie_vec = self.movie_embedding(movie_ids)

        x = torch.cat([user_vec, movie_vec], dim=1)

        x = F.relu(self.fc1(x))
        x = self.dropout1(x)

        x = F.relu(self.fc2(x))
        x = self.dropout2(x)

        x = F.relu(self.fc3(x))
        x = self.dropout3(x)

        x = self.output(x)
        return x.squeeze()


In [23]:
model = FinalTunedRecommenderNet(num_users, num_movies)

criterion = nn.MSELoss()
optimizer = optim.Adam(
    model.parameters(),
    lr=0.000374,  # De Optuna
    weight_decay=7.49e-6  # De Optuna
)


In [24]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=50, patience=5, clip_value=1.0):
    best_val_loss = float('inf')
    epochs_without_improvement = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for users, movies, ratings in train_loader:
            optimizer.zero_grad()
            predictions = model(users, movies)
            loss = criterion(predictions, ratings)
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip_value)
            optimizer.step()
            running_loss += loss.item()

        avg_train_loss = running_loss / len(train_loader)

        # Validación
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for users, movies, ratings in val_loader:
                predictions = model(users, movies)
                loss = criterion(predictions, ratings)
                val_loss += loss.item()

        avg_val_loss = val_loss / len(val_loader)

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

        # Guardar mejor modelo y optimizer
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, "best_model.pth")
            print("🟢 Mejor modelo guardado.")
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print("🛑 Early stopping: no mejora en validación.")
                break

    print("✅ Entrenamiento finalizado.")
    return model


In [25]:
# Entrenar
trained_model = train_model(model, train_loader, val_loader, criterion, optimizer, epochs=50)


Epoch 1/50 | Train Loss: 0.0571 | Val Loss: 0.0525
🟢 Mejor modelo guardado.
Epoch 2/50 | Train Loss: 0.0509 | Val Loss: 0.0512
🟢 Mejor modelo guardado.
Epoch 3/50 | Train Loss: 0.0488 | Val Loss: 0.0494
🟢 Mejor modelo guardado.
Epoch 4/50 | Train Loss: 0.0470 | Val Loss: 0.0488
🟢 Mejor modelo guardado.
Epoch 5/50 | Train Loss: 0.0452 | Val Loss: 0.0482
🟢 Mejor modelo guardado.
Epoch 6/50 | Train Loss: 0.0436 | Val Loss: 0.0481
🟢 Mejor modelo guardado.
Epoch 7/50 | Train Loss: 0.0422 | Val Loss: 0.0477
🟢 Mejor modelo guardado.
Epoch 8/50 | Train Loss: 0.0408 | Val Loss: 0.0478
Epoch 9/50 | Train Loss: 0.0396 | Val Loss: 0.0480
Epoch 10/50 | Train Loss: 0.0386 | Val Loss: 0.0485
Epoch 11/50 | Train Loss: 0.0377 | Val Loss: 0.0486
Epoch 12/50 | Train Loss: 0.0369 | Val Loss: 0.0491
🛑 Early stopping: no mejora en validación.
✅ Entrenamiento finalizado.


In [36]:
# Crear una nueva instancia y cargar pesos
best_model = FinalTunedRecommenderNet(num_users, num_movies)
best_model.load_state_dict(torch.load("best_model.pth"))
best_model.eval()


RuntimeError: Error(s) in loading state_dict for FinalTunedRecommenderNet:
	Missing key(s) in state_dict: "user_embedding.weight", "movie_embedding.weight", "fc1.weight", "fc1.bias", "bn1.weight", "bn1.bias", "bn1.running_mean", "bn1.running_var", "fc2.weight", "fc2.bias", "bn2.weight", "bn2.bias", "bn2.running_mean", "bn2.running_var", "fc3.weight", "fc3.bias", "output.weight", "output.bias". 
	Unexpected key(s) in state_dict: "model_state_dict", "optimizer_state_dict". 

In [26]:
def evaluate_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for users, movies, ratings in test_loader:
            predictions = model(users, movies)
            loss = criterion(predictions, ratings)
            test_loss += loss.item()

            all_preds.extend(predictions.cpu().numpy())
            all_labels.extend(ratings.cpu().numpy())

    avg_loss = test_loss / len(test_loader)

    # Convertir a numpy arrays
    all_preds = np.clip(np.array(all_preds), 0.0, 1.0)  # clipping
    all_labels = np.array(all_labels)

    # Escala original [1–5]
    preds_original = all_preds * 4 + 1
    labels_original = all_labels * 4 + 1

    mse = mean_squared_error(labels_original, preds_original)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(labels_original, preds_original)

    print(f"🔍 Test MSE (1-5): {mse:.4f}")
    print(f"📉 Test RMSE (1-5): {rmse:.4f}")
    print(f"📊 Test MAE  (1-5): {mae:.4f}")
    
    return mse, rmse, mae


In [28]:
mse, rmse, mae = evaluate_model(model, test_loader, criterion)


🔍 Test MSE (1-5): 0.7844
📉 Test RMSE (1-5): 0.8857
📊 Test MAE  (1-5): 0.6908


In [14]:
def build_model(trial, num_users, num_movies):
    embedding_dim = trial.suggest_categorical("embedding_dim", [32, 64, 128])

    dropout1 = trial.suggest_float("dropout1", 0.0, 0.5)
    dropout2 = trial.suggest_float("dropout2", 0.0, 0.5)

    class TunedNet(nn.Module):
        def __init__(self):
            super().__init__()
            self.user_embedding = nn.Embedding(num_users, embedding_dim)
            self.movie_embedding = nn.Embedding(num_movies, embedding_dim)
            nn.init.xavier_uniform_(self.user_embedding.weight)
            nn.init.xavier_uniform_(self.movie_embedding.weight)

            self.fc1 = nn.Linear(embedding_dim * 2, 256)
            self.bn1 = nn.BatchNorm1d(256)
            self.dropout1 = nn.Dropout(dropout1)

            self.fc2 = nn.Linear(256, 128)
            self.bn2 = nn.BatchNorm1d(128)
            self.dropout2 = nn.Dropout(dropout2)

            self.fc3 = nn.Linear(128, 64)
            self.output = nn.Linear(64, 1)

        def forward(self, user_ids, movie_ids):
            user_vec = self.user_embedding(user_ids)
            movie_vec = self.movie_embedding(movie_ids)
            x = torch.cat([user_vec, movie_vec], dim=1)
            x = self.dropout1(F.leaky_relu(self.bn1(self.fc1(x))))
            x = self.dropout2(F.leaky_relu(self.bn2(self.fc2(x))))
            x = F.leaky_relu(self.fc3(x))
            x = self.output(x)
            return x.squeeze()

    return TunedNet()


In [15]:
def objective(trial):
    model = build_model(trial, num_users, num_movies)

    lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-2)

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.MSELoss()

    # Entrenamiento simplificado (menos épocas para velocidad)
    for epoch in range(5):  
        model.train()
        for users, movies, ratings in train_loader:
            optimizer.zero_grad()
            predictions = model(users, movies)
            loss = criterion(predictions, ratings)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

    # Validación final
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for users, movies, ratings in val_loader:
            predictions = model(users, movies)
            loss = criterion(predictions, ratings)
            val_loss += loss.item()
    return val_loss / len(val_loader)


In [16]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30)


[I 2025-04-06 14:19:40,455] A new study created in memory with name: no-name-b6e4b2b4-2a85-4496-bcc9-dad90370b5a2
  lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
  weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-2)
[I 2025-04-06 14:22:14,198] Trial 0 finished with value: 0.05928875238198758 and parameters: {'embedding_dim': 32, 'dropout1': 0.3538981485222143, 'dropout2': 0.46671717254394446, 'lr': 0.003943275259744605, 'weight_decay': 2.884341152524564e-05}. Best is trial 0 with value: 0.05928875238198758.
[I 2025-04-06 14:25:11,145] Trial 1 finished with value: 0.04896031740262075 and parameters: {'embedding_dim': 64, 'dropout1': 0.17618018395729507, 'dropout2': 0.16578724067694084, 'lr': 0.000957316937795701, 'weight_decay': 3.1424899941556134e-06}. Best is trial 1 with value: 0.04896031740262075.
[I 2025-04-06 14:28:07,008] Trial 2 finished with value: 0.05364347833628748 and parameters: {'embedding_dim': 64, 'dropout1': 0.35726815996261263, 'dropout2': 0.22585020

KeyboardInterrupt: 