# 05 — Fusión Híbrida y Evaluación

Combina puntajes de **CF explícito** y similitud de **contenido** con un peso `α`.

In [None]:
import sys
from pathlib import Path
repo_root = Path().resolve().parent if Path.cwd().name == "notebooks" else Path().resolve()
sys.path.insert(0, str(repo_root))


In [None]:
import polars as pl, numpy as np, pickle
from pathlib import Path
import tensorflow as tf

processed = repo_root / "data" / "processed"
models_dir = repo_root / "models"

ratings = pl.read_parquet(processed / "rating_complete.parquet")
users = ratings.select("user_id").unique().with_row_count("uidx")
items = ratings.select("anime_id").unique().with_row_count("iidx")

# cargar modelo CF explícito
ncf_path = models_dir / "ncf_best.keras"
model = tf.keras.models.load_model(ncf_path)

# cargar embeddings de contenido (pueden ser densos o TF-IDF)
with open(models_dir / "content_embeddings.pkl", "rb") as f:
    content_blob = pickle.load(f)
emb = content_blob["emb"]


In [None]:
# Funciones de scoring
import numpy as np

def scores_cf(uidx):
    ii = np.arange(int(items.height), dtype=np.int32)
    uu = np.full_like(ii, uidx)
    return model.predict([uu, ii], verbose=0).ravel()

def top_similar(iidx, topk=50):
    # si emb es matriz densa (numpy), usar coseno manual; si es TF-IDF/CSR, usar dot normalizado
    if hasattr(emb, "toarray"):  # sparse
        import scipy.sparse as sp
        v = emb[iidx]
        sims = (emb @ v.T).toarray().ravel()
        sims = sims / (np.linalg.norm(emb.toarray(), axis=1) + 1e-9)  # simplificado para demo
    else:
        from numpy.linalg import norm
        v = emb[iidx]
        sims = (emb @ v) / (norm(emb, axis=1) * norm(v) + 1e-9)
    order = np.argsort(sims)[::-1]
    return order[:topk], sims[order[:topk]]

def scores_content(iidx):
    idxs, sims = top_similar(iidx, topk=int(items.height))
    s = np.zeros(int(items.height), dtype=np.float32)
    s[idxs] = sims
    return s

alpha = 0.65


In [None]:
# Ejemplo: recomendaciones híbridas para un usuario
def recommend_hybrid_for_user(user_id_original, topk=10, alpha=0.65):
    uidx = int(users.filter(pl.col("user_id")==user_id_original)["uidx"][0])
    # CF
    s_cf = scores_cf(uidx)
    # heurística: usar su ítem más visto para contenido (demo)
    i_seen = (ratings.join(users, on="user_id")
                      .filter(pl.col("uidx")==uidx)["anime_id"].to_list())
    if i_seen:
        iidx = int(items.filter(pl.col("anime_id")==i_seen[0])["iidx"][0])
        s_ct = scores_content(iidx)
    else:
        s_ct = np.zeros_like(s_cf)
    s = alpha*s_cf + (1-alpha)*s_ct
    order = np.argsort(s)[::-1]
    seen_iidx = set((ratings.join(users, on="user_id").join(items, on="anime_id")
                         .filter(pl.col("uidx")==uidx)["iidx"].to_list()))
    recs = [int(i) for i in order if i not in seen_iidx][:topk]
    return items.sort("iidx").filter(pl.col("iidx").is_in(recs))["anime_id"].to_list()

print("Ejemplo híbrido usuario 0:", recommend_hybrid_for_user(users['user_id'][0], topk=10))
