# 02 — Modelos de CF Explícito (Neural CF)

Este cuaderno entrena un modelo **Neural Collaborative Filtering** 
(embeddings de usuario e ítem + MLP) sobre `rating_complete` (explícito).

In [1]:
# Importación y rutas
import sys
from pathlib import Path
repo_root = Path().resolve().parent if Path.cwd().name == "notebooks" else Path().resolve()
sys.path.insert(0, str(repo_root))
print("Repo root:", repo_root)


Repo root: C:\Users\enman\Downloads\COLFONDOS\DMC\anime-recomendation


In [2]:
import polars as pl, numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

processed = repo_root / "data" / "processed"
ratings = pl.read_parquet(processed / "rating_complete.parquet")

# Indexación (mapas user/item -> idx)
users = ratings.select("user_id").unique().with_row_count("uidx")
items = ratings.select("anime_id").unique().with_row_count("iidx")
ui = (ratings.join(users, on="user_id").join(items, on="anime_id")
            .select("uidx","iidx","score"))

n_users = int(users.height)
n_items = int(items.height)
print("n_users, n_items:", n_users, n_items)

Xu = ui["uidx"].to_numpy()
Xi = ui["iidx"].to_numpy()
y  = ui["score"].to_numpy().astype("float32")

# Normalización básica 1..10 -> 0..1
y_min, y_max = y.min(), y.max()
y_norm = (y - y_min) / (y_max - y_min + 1e-9)

X_train_u, X_test_u, X_train_i, X_test_i, y_train, y_test = train_test_split(
    Xu, Xi, y_norm, test_size=0.1, random_state=42, stratify=None
)

def RecommenderNet(n_users, n_items, embedding_size=64, hidden=[128,64]):
    u_in = Input(shape=(1,), name="u_in")
    i_in = Input(shape=(1,), name="i_in")
    u_emb = Embedding(n_users, embedding_size, name="u_emb")(u_in)
    i_emb = Embedding(n_items, embedding_size, name="i_emb")(i_in)
    u_vec = Flatten()(u_emb)
    i_vec = Flatten()(i_emb)
    x = Concatenate()([u_vec, i_vec])
    for h in hidden:
        x = Dense(h)(x); x = BatchNormalization()(x); x = Activation("relu")(x); x = Dropout(0.2)(x)
    out = Dense(1, activation="sigmoid")(x)  # pred en [0,1]
    model = Model([u_in, i_in], out)
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss="mse", metrics=["mae"])
    return model

model = RecommenderNet(n_users, n_items)
model.summary()


ModuleNotFoundError: No module named 'sklearn'

In [None]:
ckpt = repo_root / "models"; ckpt.mkdir(exist_ok=True, parents=True)
callbacks = [
    ModelCheckpoint(str(ckpt / "ncf_best.keras"), save_best_only=True, monitor="val_loss", mode="min"),
    ReduceLROnPlateau(patience=2, factor=0.5, min_lr=1e-5)
]

hist = model.fit([X_train_u, X_train_i], y_train, 
    validation_data=([X_test_u, X_test_i], y_test),
    epochs=5, batch_size=8192, callbacks=callbacks, verbose=1)

# Evaluación (RMSE/MAE en la escala original)
y_pred = model.predict([X_test_u, X_test_i], verbose=0).ravel()
import numpy as np
def rmse(a,b): return float(np.sqrt(np.mean((a-b)**2)))

rmse_val = rmse(y_test, y_pred)
mae_val = float(np.mean(np.abs(y_test - y_pred)))
print("VAL (norm) RMSE:", rmse_val, "MAE:", mae_val)

# Desnormalizar si se desea comparar con [1..10]
y_pred_denorm = y_pred * (y_max - y_min) + y_min
y_test_denorm = y_test * (y_max - y_min) + y_min
print("VAL (original) RMSE:", rmse(y_test_denorm, y_pred_denorm))


In [None]:
# Funciones de recomendación para usuario dado
import numpy as np

def recommend_for_user(user_id_original, topk=10):
    # mapear a índice
    uidx = int(users.filter(pl.col("user_id")==user_id_original)["uidx"][0])
    # puntuar todos los items
    ii = np.arange(n_items, dtype=np.int32)
    uu = np.full_like(ii, uidx)
    scores = model.predict([uu, ii], verbose=0).ravel()
    # excluir vistos
    seen = set(ui.filter(pl.col("uidx")==uidx)["iidx"].to_list())
    order = np.argsort(scores)[::-1]
    recs = [int(i) for i in order if i not in seen][:topk]
    # devolver ids originales
    item_map = items.sort("iidx")
    return item_map.filter(pl.col("iidx").is_in(recs))["anime_id"].to_list()

print("Ejemplo recomendaciones para el primer usuario:", recommend_for_user(users['user_id'][0]))
