In [1]:
!pip install -U LibRecommender

Collecting LibRecommender
  Downloading LibRecommender-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (30 kB)
Collecting gensim>=4.0.0 (from LibRecommender)
  Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Collecting numpy<2.0,>=1.18.5 (from gensim>=4.0.0->LibRecommender)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scipy<1.14.0,>=1.7.0 (from gensim>=4.0.0->LibRecommender)
  Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
Downloading LibRecommender-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━

## NCF

In [None]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import NCF
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo NCF para la tarea de rating
model = NCF(
    task="rating",
    data_info=data_info,
    embed_size=16,
    n_epochs=20,
    lr=0.01,
    batch_size=256,
    use_bn=False  # Desactivar batch normalization por incompatibilidad con Keras 3
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_ncf.csv", index=False)

print("Archivo de predicciones guardado como predictions_ncf.csv")

Training start time: [35m2025-03-30 09:33:02[0m


train: 100%|██████████| 2440/2440 [00:08<00:00, 299.35it/s]


Epoch 1 elapsed: 8.155s


train: 100%|██████████| 2440/2440 [00:07<00:00, 311.12it/s]


Epoch 2 elapsed: 7.847s


train: 100%|██████████| 2440/2440 [00:07<00:00, 308.87it/s]


Epoch 3 elapsed: 7.904s


train: 100%|██████████| 2440/2440 [00:07<00:00, 326.35it/s]


Epoch 4 elapsed: 7.480s


train: 100%|██████████| 2440/2440 [00:07<00:00, 305.10it/s]


Epoch 5 elapsed: 8.000s


train: 100%|██████████| 2440/2440 [00:08<00:00, 292.92it/s]


Epoch 6 elapsed: 8.333s


train: 100%|██████████| 2440/2440 [00:07<00:00, 326.93it/s]


Epoch 7 elapsed: 7.468s


train: 100%|██████████| 2440/2440 [00:08<00:00, 296.78it/s]


Epoch 8 elapsed: 8.224s


train: 100%|██████████| 2440/2440 [00:07<00:00, 311.09it/s]


Epoch 9 elapsed: 7.847s


train: 100%|██████████| 2440/2440 [00:07<00:00, 330.05it/s]


Epoch 10 elapsed: 7.396s


train: 100%|██████████| 2440/2440 [00:07<00:00, 309.87it/s]


Epoch 11 elapsed: 7.877s


train: 100%|██████████| 2440/2440 [00:07<00:00, 309.00it/s]


Epoch 12 elapsed: 7.900s


train: 100%|██████████| 2440/2440 [00:07<00:00, 329.72it/s]


Epoch 13 elapsed: 7.403s


train: 100%|██████████| 2440/2440 [00:07<00:00, 307.40it/s]


Epoch 14 elapsed: 7.941s


train: 100%|██████████| 2440/2440 [00:08<00:00, 293.24it/s]


Epoch 15 elapsed: 8.327s


train: 100%|██████████| 2440/2440 [00:07<00:00, 323.61it/s]


Epoch 16 elapsed: 7.543s


train: 100%|██████████| 2440/2440 [00:07<00:00, 310.23it/s]


Epoch 17 elapsed: 7.868s


train: 100%|██████████| 2440/2440 [00:07<00:00, 322.47it/s]


Epoch 18 elapsed: 7.573s


train: 100%|██████████| 2440/2440 [00:07<00:00, 317.17it/s]


Epoch 19 elapsed: 7.697s


train: 100%|██████████| 2440/2440 [00:07<00:00, 311.67it/s]


Epoch 20 elapsed: 7.833s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 396

### Hyperparameters

In [1]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import NCF
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import mean_absolute_error

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables al inicio
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Definir el espacio de hiperparámetros para NCF
param_grid = {
    'embed_size': [8, 16, 32, 64],       # Tamaño del embedding
    'lr': [0.001, 0.01],            # Tasa de aprendizaje
    'n_epochs': [5, 10, 20, 25],         # Número de épocas
    'reg': [0.0, 0.01, 0.1, 0.5],        # Regularización
    'batch_size': [128, 256, 512],       # Tamaño del batch
    'hidden_units': [(128, 64, 32), (256, 128), (512, 256, 128)], # Capas ocultas
    'use_bn': [True],             # Batch normalization
}

# Función para entrenar y evaluar el modelo usando MAE
def evaluate_model(params):
    try:
        # Limpieza de la sesión para evitar conflictos
        tf.keras.backend.clear_session()

        # Crear un nuevo gráfico para garantizar el aislamiento
        with tf.Graph().as_default():
            model = NCF(
                task="rating",
                data_info=data_info,
                embed_size=params['embed_size'],
                lr=params['lr'],
                n_epochs=params['n_epochs'],
                reg=params['reg'],
                batch_size=params['batch_size'],
                hidden_units=params['hidden_units'],
                use_bn=params['use_bn'],
                seed=42
            )
            model.fit(train_data, neg_sampling=False, eval_data=val_data, verbose=0)

            # Generar predicciones en el conjunto de validación
            val_predictions = model.predict(val_df['user'].values, val_df['item'].values)
            mae = mean_absolute_error(val_df['rating'], val_predictions)
            return mae
    except Exception as e:
        print(f"Error con parámetros {params}: {e}")
        return float('inf')

# Realizar la búsqueda de hiperparámetros
best_score = float('inf')
best_params = None

for params in ParameterGrid(param_grid):
    print(f"Probando parámetros: {params}")
    mae = evaluate_model(params)
    print(f"MAE: {mae}")

    if mae < best_score:
        best_score = mae
        best_params = params

print(f"Mejores hiperparámetros: {best_params} con MAE: {best_score}")

# Entrenar el modelo final con los mejores hiperparámetros
tf.keras.backend.clear_session()
best_model = NCF(
    task="rating",
    data_info=data_info,
    embed_size=best_params['embed_size'],
    lr=best_params['lr'],
    n_epochs=best_params['n_epochs'],
    reg=best_params['reg'],
    batch_size=best_params['batch_size'],
    hidden_units=best_params['hidden_units'],
    use_bn=best_params['use_bn'],
    seed=42
)
best_model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = best_model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions
df_test[['ID', 'rating']].to_csv("predictions_best_ncf.csv", index=False)

print("Archivo de predicciones guardado como predictions_best_ncf.csv")

2025-03-30 15:30:48.600797: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-30 15:30:58.531170: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-30 15:30:58.531403: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-30 15:31:00.796145: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-30 15:31:04.112415: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-30 15:31:04.162486: I tensorflow/core/platform/cpu_feature_guard.cc:1

KeyboardInterrupt: 

## ALS

In [None]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import ALS
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo ALS para la tarea de rating
# model = ALS(
#     task="rating",  # Tarea de rating ya que son valoraciones explícitas
#     data_info=data_info,  # Información generada por DatasetPure
#     embed_size=16,
#     n_epochs=10,
#     reg=0.1,  # Parámetro de regularización
#     use_cg=True,  # Utilizar el método de gradiente conjugado
#     n_threads=4  # Usar múltiples hilos para acelerar el entrenamiento
# )

model = ALS(task="rating",data_info=data_info,reg=0.1)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_als_baseline.csv", index=False)

print("Archivo de predicciones guardado como predictions_als.csv")

Training start time: [35m2025-03-30 10:08:42[0m
Epoch 1 elapsed: 0.574s
Epoch 2 elapsed: 0.577s
Epoch 3 elapsed: 0.470s
Epoch 4 elapsed: 0.322s
Epoch 5 elapsed: 0.340s
Epoch 6 elapsed: 0.315s
Epoch 7 elapsed: 0.321s
Epoch 8 elapsed: 0.328s
Epoch 9 elapsed: 0.322s
Epoch 10 elapsed: 0.315s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 29

## DeepFM

In [None]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import DeepFM
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo DeepFM para la tarea de rating
model = DeepFM(
    task="rating",  # Cambiado de 'ranking' a 'rating' ya que son valoraciones explícitas
    data_info=data_info,  # Información generada por DatasetPure
    use_bn=False,  # Desactivar batch normalization por incompatibilidad con Keras 3
    dropout_rate=None,  # Desactivar Dropout para evitar conflicto con Keras 3
    seed=42
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_deepfm.csv", index=False)

print("Archivo de predicciones guardado como predictions_deepfm.csv")

Instructions for updating:
non-resource variables are not supported in the long term


Training start time: [35m2025-03-30 10:19:27[0m
total params: [33m3,590,954[0m | embedding params: [33m3,576,567[0m | network params: [33m14,387[0m


train: 100%|██████████| 2440/2440 [00:18<00:00, 134.59it/s]


Epoch 1 elapsed: 18.145s


train: 100%|██████████| 2440/2440 [00:10<00:00, 222.82it/s]


Epoch 2 elapsed: 10.954s


train: 100%|██████████| 2440/2440 [00:13<00:00, 180.45it/s]


Epoch 3 elapsed: 13.525s


train: 100%|██████████| 2440/2440 [00:11<00:00, 221.65it/s]


Epoch 4 elapsed: 11.012s


train: 100%|██████████| 2440/2440 [00:13<00:00, 184.92it/s]


Epoch 5 elapsed: 13.198s


train: 100%|██████████| 2440/2440 [00:16<00:00, 148.78it/s]


Epoch 6 elapsed: 16.408s


train: 100%|██████████| 2440/2440 [00:12<00:00, 200.26it/s]


Epoch 7 elapsed: 12.188s


train: 100%|██████████| 2440/2440 [00:17<00:00, 140.45it/s]


Epoch 8 elapsed: 17.378s


train: 100%|██████████| 2440/2440 [00:13<00:00, 180.48it/s]


Epoch 9 elapsed: 13.528s


train: 100%|██████████| 2440/2440 [00:14<00:00, 171.17it/s]


Epoch 10 elapsed: 14.259s


train: 100%|██████████| 2440/2440 [00:16<00:00, 147.42it/s]


Epoch 11 elapsed: 16.557s


train: 100%|██████████| 2440/2440 [00:13<00:00, 184.78it/s]


Epoch 12 elapsed: 13.212s


train: 100%|██████████| 2440/2440 [00:12<00:00, 199.15it/s]


Epoch 13 elapsed: 12.256s


train: 100%|██████████| 2440/2440 [00:10<00:00, 223.04it/s]


Epoch 14 elapsed: 10.942s


train: 100%|██████████| 2440/2440 [00:10<00:00, 222.44it/s]


Epoch 15 elapsed: 10.974s


train: 100%|██████████| 2440/2440 [00:10<00:00, 223.06it/s]


Epoch 16 elapsed: 10.943s


train: 100%|██████████| 2440/2440 [00:10<00:00, 223.24it/s]


Epoch 17 elapsed: 10.934s


train: 100%|██████████| 2440/2440 [00:12<00:00, 195.66it/s]


Epoch 18 elapsed: 12.473s


train: 100%|██████████| 2440/2440 [00:10<00:00, 224.32it/s]


Epoch 19 elapsed: 10.883s


train: 100%|██████████| 2440/2440 [00:20<00:00, 120.84it/s]


Epoch 20 elapsed: 20.200s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 39

### Hyperparameters

In [None]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import DeepFM
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import mean_absolute_error

import warnings
warnings.filterwarnings("ignore")


# Reiniciar la sesión de TensorFlow para evitar conflictos de variables al inicio
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("../data/train.csv")
df_test = pd.read_csv("../data/test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Definir el espacio de hiperparámetros extendido
param_grid = {
    'embed_size': [16, 32],          # Tamaño del embedding
    'lr': [0.001, 0.01],               # Tasa de aprendizaje
    'n_epochs': [5, 10, 15, 20, 25, 30],            # Número de épocas
    'reg': [0.1, 0.5],           # Regularización
    'batch_size': [128],          # Tamaño del batch
    'dropout_rate': [0.3, 0.5],  # Tasa de dropout
    'hidden_units': [(128, 64, 32), (64, 32, 16), (512, 256, 128)],  # Capas ocultas
    'use_bn': [True],                # Batch normalization
}

# Función para entrenar y evaluar el modelo usando MAE
def evaluate_model(params):
    try:
        # Limpiar sesión antes de crear un nuevo modelo
        tf.keras.backend.clear_session()

        # Crear un nuevo gráfico para garantizar el aislamiento
        with tf.Graph().as_default():
            model = DeepFM(
                task="rating",
                data_info=data_info,
                embed_size=params['embed_size'],
                lr=params['lr'],
                n_epochs=params['n_epochs'],
                reg=params['reg'],
                batch_size=params['batch_size'],
                dropout_rate=params['dropout_rate'],
                hidden_units=params['hidden_units'],
                use_bn=params['use_bn'],
                seed=42
            )
            model.fit(train_data, neg_sampling=False, eval_data=val_data, verbose=0)

            # Generar predicciones en el conjunto de validación
            val_predictions = model.predict(val_df['user'].values, val_df['item'].values)
            mae = mean_absolute_error(val_df['rating'], val_predictions)
            return mae
    except Exception as e:
        print(f"Error con parámetros {params}: {e}")
        return float('inf')

# Realizar la búsqueda de hiperparámetros
best_score = float('inf')
best_params = None

for params in ParameterGrid(param_grid):
    print(f"Probando parámetros: {params}")
    mae = evaluate_model(params)
    print(f"MAE: {mae}")

    if mae < best_score:
        best_score = mae
        best_params = params

print(f"Mejores hiperparámetros: {best_params} con MAE: {best_score}")

# Entrenar el modelo final con los mejores hiperparámetros
tf.keras.backend.clear_session()
best_model = DeepFM(
    task="rating",
    data_info=data_info,
    embed_size=best_params['embed_size'],
    lr=best_params['lr'],
    n_epochs=best_params['n_epochs'],
    reg=best_params['reg'],
    batch_size=best_params['batch_size'],
    dropout_rate=best_params['dropout_rate'],
    hidden_units=best_params['hidden_units'],
    use_bn=best_params['use_bn'],
    seed=42
)
best_model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = best_model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions
df_test[['ID', 'rating']].to_csv("predictions_best_deepfm.csv", index=False)

print("Archivo de predicciones guardado como predictions_best_deepfm.csv")

### Subidas

In [None]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import DeepFM
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Probando parámetros: {'batch_size': 128, 'dropout_rate': None, 'embed_size': 8, 'hidden_units': (128, 64, 32), 'lr': 0.001, 'n_epochs': 10, 'reg': 0.01, 'use_bn': False}

import warnings
warnings.filterwarnings("ignore")

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("../data/train.csv")
df_test = pd.read_csv("../data/test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo DeepFM para la tarea de rating
model = DeepFM(
    task="rating",
    data_info=data_info,
    embed_size=8,
    lr=0.001,
    n_epochs=20,
    reg=0.01,
    batch_size=128,
    dropout_rate=None,
    hidden_units=(128, 64, 32),
    use_bn=True,
    seed=42
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_deepfm_prueba_hyper_3.csv", index=False)

print("Archivo de predicciones guardado como predictions_deepfm.csv")

Training start time: [35m2025-03-30 16:43:41[0m
total params: [33m1,906,330[0m | embedding params: [33m1,893,583[0m | network params: [33m12,747[0m


train: 100%|██████████| 4880/4880 [00:33<00:00, 145.62it/s]


Epoch 1 elapsed: 33.519s


train: 100%|██████████| 4880/4880 [00:34<00:00, 141.33it/s]


Epoch 2 elapsed: 34.533s


train: 100%|██████████| 4880/4880 [00:34<00:00, 143.41it/s]


Epoch 3 elapsed: 34.032s


train: 100%|██████████| 4880/4880 [00:33<00:00, 145.04it/s]


Epoch 4 elapsed: 33.650s


train: 100%|██████████| 4880/4880 [00:34<00:00, 142.88it/s]


Epoch 5 elapsed: 34.161s


train: 100%|██████████| 4880/4880 [00:33<00:00, 143.89it/s]


Epoch 6 elapsed: 33.920s


train: 100%|██████████| 4880/4880 [00:31<00:00, 152.56it/s]


Epoch 7 elapsed: 31.993s


train: 100%|██████████| 4880/4880 [00:31<00:00, 152.94it/s]


Epoch 8 elapsed: 31.912s


train: 100%|██████████| 4880/4880 [00:32<00:00, 150.34it/s]


Epoch 9 elapsed: 32.465s


train: 100%|██████████| 4880/4880 [00:32<00:00, 149.58it/s]


Epoch 10 elapsed: 32.631s


train: 100%|██████████| 4880/4880 [00:32<00:00, 149.34it/s]


Epoch 11 elapsed: 32.683s


train: 100%|██████████| 4880/4880 [00:32<00:00, 148.48it/s]


Epoch 12 elapsed: 32.873s


train: 100%|██████████| 4880/4880 [00:34<00:00, 141.42it/s]


Epoch 13 elapsed: 34.511s


train: 100%|██████████| 4880/4880 [00:33<00:00, 144.19it/s]


Epoch 14 elapsed: 33.850s


train: 100%|██████████| 4880/4880 [00:32<00:00, 150.85it/s]


Epoch 15 elapsed: 32.355s


train: 100%|██████████| 4880/4880 [00:32<00:00, 150.73it/s]


Epoch 16 elapsed: 32.381s


train: 100%|██████████| 4880/4880 [00:32<00:00, 151.26it/s]


Epoch 17 elapsed: 32.267s


train: 100%|██████████| 4880/4880 [00:31<00:00, 153.87it/s]


Epoch 18 elapsed: 31.720s


train: 100%|██████████| 4880/4880 [00:32<00:00, 149.80it/s]


Epoch 19 elapsed: 32.580s


train: 100%|██████████| 4880/4880 [00:32<00:00, 150.14it/s]


Epoch 20 elapsed: 32.508s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 39

## WideDeep

In [None]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import WideDeep
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo Wide & Deep para la tarea de rating
model = WideDeep(
    task="rating",  # Tarea de rating ya que son valoraciones explícitas
    data_info=data_info,  # Información generada por DatasetPure
    use_bn=False,  # Desactivar batch normalization por incompatibilidad con Keras 3
    dropout_rate=False,  # Añadir dropout para evitar overfitting
    seed=42
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_widedeep.csv", index=False)

print("Archivo de predicciones guardado como predictions_widedeep.csv")

Training start time: [35m2025-03-30 10:32:43[0m
total params: [33m3,590,937[0m | embedding params: [33m3,576,567[0m | network params: [33m14,370[0m


train: 100%|██████████| 2440/2440 [00:17<00:00, 139.21it/s]


Epoch 1 elapsed: 17.531s


train: 100%|██████████| 2440/2440 [00:15<00:00, 161.44it/s]


Epoch 2 elapsed: 15.117s


train: 100%|██████████| 2440/2440 [00:10<00:00, 224.49it/s]


Epoch 3 elapsed: 10.872s


train: 100%|██████████| 2440/2440 [00:09<00:00, 255.21it/s]


Epoch 4 elapsed: 9.563s


train: 100%|██████████| 2440/2440 [00:09<00:00, 245.92it/s]


Epoch 5 elapsed: 9.927s


train: 100%|██████████| 2440/2440 [00:09<00:00, 244.95it/s]


Epoch 6 elapsed: 9.965s


train: 100%|██████████| 2440/2440 [00:09<00:00, 246.87it/s]


Epoch 7 elapsed: 9.887s


train: 100%|██████████| 2440/2440 [00:09<00:00, 254.36it/s]


Epoch 8 elapsed: 9.598s


train: 100%|██████████| 2440/2440 [00:09<00:00, 247.59it/s]


Epoch 9 elapsed: 9.862s


train: 100%|██████████| 2440/2440 [00:09<00:00, 248.27it/s]


Epoch 10 elapsed: 9.833s


train: 100%|██████████| 2440/2440 [00:10<00:00, 237.57it/s]


Epoch 11 elapsed: 10.274s


train: 100%|██████████| 2440/2440 [00:09<00:00, 248.53it/s]


Epoch 12 elapsed: 9.821s


train: 100%|██████████| 2440/2440 [00:09<00:00, 249.37it/s]


Epoch 13 elapsed: 9.788s


train: 100%|██████████| 2440/2440 [00:11<00:00, 205.29it/s]


Epoch 14 elapsed: 11.890s


train: 100%|██████████| 2440/2440 [00:10<00:00, 238.49it/s]


Epoch 15 elapsed: 10.241s


train: 100%|██████████| 2440/2440 [00:15<00:00, 152.70it/s]


Epoch 16 elapsed: 15.982s


train: 100%|██████████| 2440/2440 [00:10<00:00, 239.01it/s]


Epoch 17 elapsed: 10.215s


train: 100%|██████████| 2440/2440 [00:09<00:00, 263.71it/s]


Epoch 18 elapsed: 9.257s


train: 100%|██████████| 2440/2440 [00:09<00:00, 248.10it/s]


Epoch 19 elapsed: 9.839s


train: 100%|██████████| 2440/2440 [00:09<00:00, 247.68it/s]


Epoch 20 elapsed: 9.855s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 396

## FM

In [13]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import FM
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo FM para la tarea de rating
model = FM(
    task="rating",  # Tarea de rating ya que son valoraciones explícitas
    data_info=data_info,  # Información generada por DatasetPure
    use_bn=False,  # Desactivar batch normalization para evitar conflictos con Keras 3
    dropout_rate=None,  # Desactivar dropout para evitar conflictos con Keras 3
    seed=42
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_fm.csv", index=False)

print("Archivo de predicciones guardado como predictions_fm.csv")

Training start time: [35m2025-03-30 12:13:36[0m
total params: [33m3,576,361[0m | embedding params: [33m3,576,343[0m | network params: [33m18[0m


train: 100%|██████████| 2440/2440 [00:10<00:00, 225.54it/s]


Epoch 1 elapsed: 10.822s


train: 100%|██████████| 2440/2440 [00:10<00:00, 241.08it/s]


Epoch 2 elapsed: 10.124s


train: 100%|██████████| 2440/2440 [00:10<00:00, 240.53it/s]


Epoch 3 elapsed: 10.147s


train: 100%|██████████| 2440/2440 [00:10<00:00, 239.89it/s]


Epoch 4 elapsed: 10.175s


train: 100%|██████████| 2440/2440 [00:10<00:00, 240.54it/s]


Epoch 5 elapsed: 10.147s


train: 100%|██████████| 2440/2440 [00:10<00:00, 239.63it/s]


Epoch 6 elapsed: 10.187s


train: 100%|██████████| 2440/2440 [00:09<00:00, 260.13it/s]


Epoch 7 elapsed: 9.383s


train: 100%|██████████| 2440/2440 [00:10<00:00, 240.50it/s]


Epoch 8 elapsed: 10.149s


train: 100%|██████████| 2440/2440 [00:10<00:00, 241.22it/s]


Epoch 9 elapsed: 10.118s


train: 100%|██████████| 2440/2440 [00:13<00:00, 181.34it/s]


Epoch 10 elapsed: 13.466s


train: 100%|██████████| 2440/2440 [00:11<00:00, 203.82it/s]


Epoch 11 elapsed: 11.980s


train: 100%|██████████| 2440/2440 [00:10<00:00, 238.51it/s]


Epoch 12 elapsed: 10.233s


train: 100%|██████████| 2440/2440 [00:10<00:00, 238.27it/s]


Epoch 13 elapsed: 10.244s


train: 100%|██████████| 2440/2440 [00:10<00:00, 238.44it/s]


Epoch 14 elapsed: 10.236s


train: 100%|██████████| 2440/2440 [00:09<00:00, 246.89it/s]


Epoch 15 elapsed: 9.887s


train: 100%|██████████| 2440/2440 [00:09<00:00, 248.08it/s]


Epoch 16 elapsed: 9.838s


train: 100%|██████████| 2440/2440 [00:10<00:00, 237.61it/s]


Epoch 17 elapsed: 10.272s


train: 100%|██████████| 2440/2440 [00:11<00:00, 207.04it/s]


Epoch 18 elapsed: 11.789s


train: 100%|██████████| 2440/2440 [00:10<00:00, 237.45it/s]


Epoch 19 elapsed: 10.281s


train: 100%|██████████| 2440/2440 [00:10<00:00, 239.63it/s]


Epoch 20 elapsed: 10.185s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 39

## AutoInt

In [8]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import AutoInt
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("../data/train.csv")
df_test = pd.read_csv("../data/test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo AutoInt para la tarea de rating
model = AutoInt(
    task="rating",  # Tarea de rating ya que son valoraciones explícitas
    data_info=data_info,  # Información generada por DatasetPure
    att_embed_size=(8, 8, 8),
    dropout_rate=None,  # Desactivar dropout para evitar conflictos con Keras 3
    use_residual=True,  # Desactivar capa residual para evitar conflictos
    seed=42
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_autoint_baseline.csv", index=False)

print("Archivo de predicciones guardado como predictions_autoint.csv")

Training start time: [35m2025-03-30 15:52:34[0m


train: 100%|██████████| 2440/2440 [00:41<00:00, 58.88it/s]


Epoch 1 elapsed: 41.445s


train: 100%|██████████| 2440/2440 [00:41<00:00, 59.00it/s]


Epoch 2 elapsed: 41.364s


train: 100%|██████████| 2440/2440 [00:41<00:00, 59.09it/s]


Epoch 3 elapsed: 41.294s


train: 100%|██████████| 2440/2440 [00:41<00:00, 59.07it/s]


Epoch 4 elapsed: 41.312s


train: 100%|██████████| 2440/2440 [00:41<00:00, 59.11it/s]


Epoch 5 elapsed: 41.283s


train: 100%|██████████| 2440/2440 [00:43<00:00, 56.46it/s]


Epoch 6 elapsed: 43.224s


train: 100%|██████████| 2440/2440 [00:42<00:00, 57.50it/s]


Epoch 7 elapsed: 42.442s


train: 100%|██████████| 2440/2440 [00:42<00:00, 57.42it/s]


Epoch 8 elapsed: 42.500s


train: 100%|██████████| 2440/2440 [00:42<00:00, 57.85it/s]


Epoch 9 elapsed: 42.187s


train: 100%|██████████| 2440/2440 [00:41<00:00, 58.30it/s]


Epoch 10 elapsed: 41.855s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 39

## DIN

In [6]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import DIN
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo DIN para la tarea de rating
model = DIN(
    task="rating",  # Tarea de rating ya que son valoraciones explícitas
    data_info=data_info,  # Información generada por DatasetPure
    use_bn=False,  # Desactivar batch normalization para evitar conflictos con Keras 3
    dropout_rate=None,  # Desactivar dropout para evitar conflictos con Keras 3
    use_tf_attention=False,
    seed=42
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_din.csv", index=False)

print("Archivo de predicciones guardado como predictions_din.csv")

Training start time: [35m2025-03-30 10:49:21[0m
total params: [33m3,383,666[0m | embedding params: [33m3,366,210[0m | network params: [33m17,456[0m


train: 100%|██████████| 2440/2440 [00:16<00:00, 145.23it/s]


Epoch 1 elapsed: 16.804s


train: 100%|██████████| 2440/2440 [00:14<00:00, 165.52it/s]


Epoch 2 elapsed: 14.745s


train: 100%|██████████| 2440/2440 [00:14<00:00, 167.73it/s]


Epoch 3 elapsed: 14.550s


train: 100%|██████████| 2440/2440 [00:14<00:00, 167.63it/s]


Epoch 4 elapsed: 14.560s


train: 100%|██████████| 2440/2440 [00:15<00:00, 157.72it/s]


Epoch 5 elapsed: 15.477s


train: 100%|██████████| 2440/2440 [00:15<00:00, 156.58it/s]


Epoch 6 elapsed: 15.587s


train: 100%|██████████| 2440/2440 [00:14<00:00, 164.29it/s]


Epoch 7 elapsed: 14.859s


train: 100%|██████████| 2440/2440 [00:14<00:00, 167.05it/s]


Epoch 8 elapsed: 14.610s


train: 100%|██████████| 2440/2440 [00:15<00:00, 157.55it/s]


Epoch 9 elapsed: 15.491s


train: 100%|██████████| 2440/2440 [00:14<00:00, 166.51it/s]


Epoch 10 elapsed: 14.657s


train: 100%|██████████| 2440/2440 [00:14<00:00, 167.17it/s]


Epoch 11 elapsed: 14.599s


train: 100%|██████████| 2440/2440 [00:14<00:00, 166.31it/s]


Epoch 12 elapsed: 14.674s


train: 100%|██████████| 2440/2440 [00:14<00:00, 165.58it/s]


Epoch 13 elapsed: 14.740s


train: 100%|██████████| 2440/2440 [00:15<00:00, 157.87it/s]


Epoch 14 elapsed: 15.462s


train: 100%|██████████| 2440/2440 [00:14<00:00, 167.33it/s]


Epoch 15 elapsed: 14.587s


train: 100%|██████████| 2440/2440 [00:14<00:00, 166.98it/s]


Epoch 16 elapsed: 14.616s


train: 100%|██████████| 2440/2440 [00:14<00:00, 167.97it/s]


Epoch 17 elapsed: 14.530s


train: 100%|██████████| 2440/2440 [00:15<00:00, 159.86it/s]


Epoch 18 elapsed: 15.267s


train: 100%|██████████| 2440/2440 [00:14<00:00, 168.13it/s]


Epoch 19 elapsed: 14.517s


train: 100%|██████████| 2440/2440 [00:14<00:00, 165.90it/s]


Epoch 20 elapsed: 14.711s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 39

## RNN4Rec

In [7]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import RNN4Rec
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo RNN4Rec para la tarea de rating
model = RNN4Rec(
    task="rating",  # Tarea de rating ya que son valoraciones explícitas
    data_info=data_info,  # Información generada por DatasetPure
    dropout_rate=None,  # Desactivar dropout para evitar conflictos
    use_layer_norm=False,  # Desactivar normalización por capa
    seed=42
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_rnn4rec.csv", index=False)

print("Archivo de predicciones guardado como predictions_rnn4rec.csv")


Object was never used (type <class 'tensorflow.python.framework.ops.Operation'>):
<tf.Operation 'gru_1/Assert/Assert' type=Assert>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/rnn.py", line 682, in _cudnn_gru
    _assert_valid_mask(mask)  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/rnn.py", line 557, in _assert_valid_mask
    tf.Assert(  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/util/traceback_utils.py", line 155, in error_handler
    del filtered_tb  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/util/dispatch.py", line 1260, in op_dispatch_handler
    return dispatch_target(*args, **kwargs)  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/util/tf_should_use.py", line 288, in wrapped


Training start time: [35m2025-03-30 11:15:23[0m
total params: [33m4,820,415[0m | embedding params: [33m4,818,623[0m | network params: [33m1,792[0m


train: 100%|██████████| 2440/2440 [00:16<00:00, 151.21it/s]


Epoch 1 elapsed: 16.139s


train: 100%|██████████| 2440/2440 [00:14<00:00, 171.68it/s]


Epoch 2 elapsed: 14.218s


train: 100%|██████████| 2440/2440 [00:14<00:00, 164.76it/s]


Epoch 3 elapsed: 14.812s


train: 100%|██████████| 2440/2440 [00:14<00:00, 171.34it/s]


Epoch 4 elapsed: 14.246s


train: 100%|██████████| 2440/2440 [00:14<00:00, 171.29it/s]


Epoch 5 elapsed: 14.249s


train: 100%|██████████| 2440/2440 [00:14<00:00, 169.82it/s]


Epoch 6 elapsed: 14.372s


train: 100%|██████████| 2440/2440 [00:14<00:00, 167.62it/s]


Epoch 7 elapsed: 14.560s


train: 100%|██████████| 2440/2440 [00:15<00:00, 160.48it/s]


Epoch 8 elapsed: 15.208s


train: 100%|██████████| 2440/2440 [00:14<00:00, 171.80it/s]


Epoch 9 elapsed: 14.208s


train: 100%|██████████| 2440/2440 [00:14<00:00, 172.44it/s]


Epoch 10 elapsed: 14.155s


train: 100%|██████████| 2440/2440 [00:14<00:00, 172.31it/s]


Epoch 11 elapsed: 14.164s


train: 100%|██████████| 2440/2440 [00:14<00:00, 173.14it/s]


Epoch 12 elapsed: 14.096s


train: 100%|██████████| 2440/2440 [00:14<00:00, 164.42it/s]


Epoch 13 elapsed: 14.843s


train: 100%|██████████| 2440/2440 [00:14<00:00, 169.21it/s]


Epoch 14 elapsed: 14.422s


train: 100%|██████████| 2440/2440 [00:14<00:00, 169.25it/s]


Epoch 15 elapsed: 14.423s


train: 100%|██████████| 2440/2440 [00:14<00:00, 169.51it/s]


Epoch 16 elapsed: 14.400s


train: 100%|██████████| 2440/2440 [00:14<00:00, 167.93it/s]


Epoch 17 elapsed: 14.534s


train: 100%|██████████| 2440/2440 [00:14<00:00, 166.27it/s]


Epoch 18 elapsed: 14.679s


train: 100%|██████████| 2440/2440 [00:14<00:00, 171.02it/s]


Epoch 19 elapsed: 14.272s


train: 100%|██████████| 2440/2440 [00:16<00:00, 144.39it/s]


Epoch 20 elapsed: 16.901s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 39

## Caser

In [9]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import Caser
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("../data/train.csv")
df_test = pd.read_csv("../data/test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo Caser para la tarea de rating
model = Caser(
    task="rating",  # Tarea de rating ya que son valoraciones explícitas
    data_info=data_info,  # Información generada por DatasetPure
    dropout_rate=None,  # Desactivar dropout para evitar conflictos
    seed=42
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_caser.csv", index=False)

print("Archivo de predicciones guardado como predictions_caser.csv")


Training start time: [35m2025-03-30 16:01:51[0m
total params: [33m8,187,647[0m | embedding params: [33m8,184,503[0m | network params: [33m3,144[0m


train: 100%|██████████| 2440/2440 [01:36<00:00, 25.27it/s]


Epoch 1 elapsed: 96.551s


train: 100%|██████████| 2440/2440 [01:36<00:00, 25.37it/s]


Epoch 2 elapsed: 96.207s


train: 100%|██████████| 2440/2440 [01:35<00:00, 25.44it/s]


Epoch 3 elapsed: 95.927s


train: 100%|██████████| 2440/2440 [01:32<00:00, 26.40it/s]


Epoch 4 elapsed: 92.448s


train: 100%|██████████| 2440/2440 [01:33<00:00, 26.16it/s]


Epoch 5 elapsed: 93.293s


train: 100%|██████████| 2440/2440 [01:32<00:00, 26.51it/s]


Epoch 6 elapsed: 92.070s


train: 100%|██████████| 2440/2440 [01:33<00:00, 26.23it/s]


Epoch 7 elapsed: 93.042s


train: 100%|██████████| 2440/2440 [01:32<00:00, 26.44it/s]


Epoch 8 elapsed: 92.312s


train: 100%|██████████| 2440/2440 [01:31<00:00, 26.68it/s]


Epoch 9 elapsed: 91.481s


train: 100%|██████████| 2440/2440 [01:32<00:00, 26.29it/s]


Epoch 10 elapsed: 92.819s


train: 100%|██████████| 2440/2440 [01:32<00:00, 26.29it/s]


Epoch 11 elapsed: 92.820s


train: 100%|██████████| 2440/2440 [01:32<00:00, 26.39it/s]


Epoch 12 elapsed: 92.461s


train: 100%|██████████| 2440/2440 [01:32<00:00, 26.48it/s]


Epoch 13 elapsed: 92.155s


train: 100%|██████████| 2440/2440 [01:36<00:00, 25.25it/s]


Epoch 14 elapsed: 96.653s


train: 100%|██████████| 2440/2440 [01:36<00:00, 25.29it/s]


Epoch 15 elapsed: 96.503s


train: 100%|██████████| 2440/2440 [01:34<00:00, 25.71it/s]


Epoch 16 elapsed: 94.907s


train: 100%|██████████| 2440/2440 [01:34<00:00, 25.80it/s]


Epoch 17 elapsed: 94.587s


train: 100%|██████████| 2440/2440 [01:33<00:00, 25.99it/s]


Epoch 18 elapsed: 93.900s


train: 100%|██████████| 2440/2440 [01:34<00:00, 25.78it/s]


Epoch 19 elapsed: 94.662s


train: 100%|██████████| 2440/2440 [01:35<00:00, 25.68it/s]


Epoch 20 elapsed: 95.051s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 39

## WaveNet

In [9]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import WaveNet
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo WaveNet para la tarea de rating
model = WaveNet(
    task="rating",  # Tarea de rating ya que son valoraciones explícitas
    data_info=data_info,  # Información generada por DatasetPure
    # embed_size=16,
    # n_epochs=20,
    # lr=0.001,
    # batch_size=256,
    dropout_rate=None,  # Desactivar dropout para evitar conflictos
    # n_filters=16,  # Número de filtros en la capa CNN
    # n_blocks=1,  # Número de bloques de CNN
    # n_layers_per_block=4,  # Capas por bloque
    # recent_num=10,  # Número de ítems recientes en la secuencia de comportamiento
    seed=42
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_wavenet.csv", index=False)

print("Archivo de predicciones guardado como predictions_wavenet.csv")

Training start time: [35m2025-03-30 11:23:58[0m
total params: [33m8,187,119[0m | embedding params: [33m8,184,559[0m | network params: [33m2,560[0m


train: 100%|██████████| 2440/2440 [00:19<00:00, 122.08it/s]


Epoch 1 elapsed: 19.989s


train: 100%|██████████| 2440/2440 [00:17<00:00, 139.88it/s]


Epoch 2 elapsed: 17.446s


train: 100%|██████████| 2440/2440 [00:17<00:00, 139.36it/s]


Epoch 3 elapsed: 17.514s


train: 100%|██████████| 2440/2440 [00:17<00:00, 138.16it/s]


Epoch 4 elapsed: 17.664s


train: 100%|██████████| 2440/2440 [00:17<00:00, 136.46it/s]


Epoch 5 elapsed: 17.884s


train: 100%|██████████| 2440/2440 [00:17<00:00, 138.05it/s]


Epoch 6 elapsed: 17.678s


train: 100%|██████████| 2440/2440 [00:17<00:00, 142.98it/s]


Epoch 7 elapsed: 17.069s


train: 100%|██████████| 2440/2440 [00:17<00:00, 138.76it/s]


Epoch 8 elapsed: 17.588s


train: 100%|██████████| 2440/2440 [00:17<00:00, 141.97it/s]


Epoch 9 elapsed: 17.191s


train: 100%|██████████| 2440/2440 [00:18<00:00, 134.62it/s]


Epoch 10 elapsed: 18.127s


train: 100%|██████████| 2440/2440 [00:17<00:00, 139.32it/s]


Epoch 11 elapsed: 17.517s


train: 100%|██████████| 2440/2440 [00:18<00:00, 134.35it/s]


Epoch 12 elapsed: 18.165s


train: 100%|██████████| 2440/2440 [00:17<00:00, 139.67it/s]


Epoch 13 elapsed: 17.473s


train: 100%|██████████| 2440/2440 [00:17<00:00, 137.48it/s]


Epoch 14 elapsed: 17.750s


train: 100%|██████████| 2440/2440 [00:17<00:00, 142.51it/s]


Epoch 15 elapsed: 17.127s


train: 100%|██████████| 2440/2440 [00:17<00:00, 142.02it/s]


Epoch 16 elapsed: 17.188s


train: 100%|██████████| 2440/2440 [00:17<00:00, 138.62it/s]


Epoch 17 elapsed: 17.604s


train: 100%|██████████| 2440/2440 [00:17<00:00, 142.44it/s]


Epoch 18 elapsed: 17.133s


train: 100%|██████████| 2440/2440 [00:17<00:00, 137.88it/s]


Epoch 19 elapsed: 17.700s


train: 100%|██████████| 2440/2440 [00:17<00:00, 142.78it/s]


Epoch 20 elapsed: 17.093s
[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 39

## UserCF

In [11]:
import pandas as pd
import tensorflow as tf
from libreco.algorithms import UserCF
from libreco.data import DatasetPure
from sklearn.model_selection import train_test_split

# Reiniciar la sesión de TensorFlow para evitar conflictos de variables
tf.keras.backend.clear_session()

# Cargar los datos de entrenamiento
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Dividir el conjunto de entrenamiento en train y validation
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42)

# Preparación de los datos para LibreCO
train_data = train_df[['user', 'item', 'rating']]
val_data = val_df[['user', 'item', 'rating']]
test_data = df_test[['user', 'item']]

# Renombrar la columna 'rating' a 'label' para que LibreCO lo reconozca
train_data.rename(columns={'rating': 'label'}, inplace=True)
val_data.rename(columns={'rating': 'label'}, inplace=True)

# Crear conjuntos de datos con la estructura adecuada para LibreCO
train_data, data_info = DatasetPure.build_trainset(train_data)
val_data = DatasetPure.build_evalset(val_data)

# Inicializar el modelo UserCF para la tarea de rating
model = UserCF(
    task="rating",  # Tarea de rating ya que son valoraciones explícitas
    data_info=data_info,  # Información generada por DatasetPure
    seed=42
)

# Entrenar el modelo
model.fit(train_data, neg_sampling=False, eval_data=val_data)

# Generar predicciones para el conjunto de test
predictions = model.predict(test_data['user'].values, test_data['item'].values)

# Asegurarse de que el formato sea correcto para Kaggle
df_test['rating'] = predictions

# Guardar las predicciones en un archivo CSV con el formato esperado
df_test[['ID', 'rating']].to_csv("predictions_usercf.csv", index=False)

print("Archivo de predicciones guardado como predictions_usercf.csv")


Training start time: [35m2025-03-30 11:54:09[0m
Final block size and num: (3065, 21)
sim_matrix elapsed: 11.717s
sim_matrix, shape: (64356, 64356), num_elements: 4706674, density: 0.1136 %


top_k: 100%|██████████| 64356/64356 [00:01<00:00, 44229.34it/s]


[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 396, 397, 399, 400, 401, 403

In [None]:
rating_mode = train_df['rating'].mode()[0]

predictions = model.predict(test_data['user'].values, test_data['item'].values, cold_start='popular')

# Reemplazar predicciones desconocidas por la moda
def replace_unknown(preds, mode):
    return [p if not pd.isna(p) else mode for p in preds]

predictions = replace_unknown(predictions, rating_mode)

df_test['rating'] = predictions

df_test[['ID', 'rating']].to_csv("predictions_ncf_moda.csv", index=False)

print("Archivo de predicciones guardado como predictions_ncf.csv")

[31mDetect 20856 unknown interaction(s), position: [1, 2, 4, 6, 8, 10, 14, 17, 19, 28, 29, 34, 35, 37, 39, 40, 41, 42, 43, 45, 48, 49, 50, 52, 53, 60, 66, 70, 71, 72, 73, 74, 75, 80, 81, 85, 86, 87, 90, 95, 96, 100, 101, 102, 104, 108, 111, 113, 116, 117, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 139, 141, 142, 143, 144, 145, 146, 149, 150, 151, 152, 153, 157, 160, 161, 163, 164, 168, 172, 173, 174, 175, 176, 177, 182, 188, 191, 193, 194, 199, 200, 203, 206, 209, 210, 211, 212, 215, 217, 219, 221, 222, 223, 226, 227, 228, 229, 233, 234, 236, 237, 238, 240, 241, 245, 252, 253, 255, 256, 257, 258, 261, 264, 265, 266, 269, 270, 272, 273, 274, 275, 277, 278, 281, 282, 285, 286, 289, 292, 293, 296, 297, 298, 300, 302, 305, 307, 310, 311, 317, 318, 319, 320, 321, 324, 325, 326, 328, 329, 331, 332, 334, 337, 338, 339, 340, 344, 345, 346, 349, 350, 353, 355, 358, 360, 361, 362, 367, 369, 370, 372, 373, 374, 375, 376, 379, 381, 384, 388, 391, 393, 394, 395, 396, 397, 399, 400, 401, 403