# 03 — Modelos de CF Implícito (ALS/BPR)

Binariza señales desde `animelist.csv` y entrena un modelo implícito (ej.: ALS/BPR).

In [None]:
import sys
from pathlib import Path
repo_root = Path().resolve().parent if Path.cwd().name == "notebooks" else Path().resolve()
sys.path.insert(0, str(repo_root))


In [None]:
import polars as pl, numpy as np
from pathlib import Path

processed = repo_root / "data" / "processed"
raw = repo_root / "data" / "raw" / "mal2020"

animelist = pl.read_csv(raw / "animelist.csv", infer_schema_length=0, encoding="utf8-lossy")
animelist = animelist.rename({c: c.lower() for c in animelist.columns})

# Señal implícita simple: watching/plan_to_watch/completed => 1
status_col = "my_status" if "my_status" in animelist.columns else ("status" if "status" in animelist.columns else None)
if status_col and animelist.select(pl.col(status_col).dtype)[0,0] == pl.Utf8:
    implicit_mask = animelist[status_col].is_in(["watching","plan to watch","completed"])
elif status_col:
    implicit_mask = animelist[status_col].is_in([1,2,6])  # 1 watching, 2 completed, 6 plan_to_watch
else:
    implicit_mask = pl.lit(True)

implicit_ui = animelist.filter(implicit_mask).select(pl.col("user_id"), pl.col("anime_id")).unique()
print("Interacciones implícitas:", implicit_ui.shape)


In [None]:
# Construir matriz dispersa
import scipy.sparse as sp
users = implicit_ui.select("user_id").unique().with_row_count("uidx")
items = implicit_ui.select("anime_id").unique().with_row_count("iidx")
ui = (implicit_ui.join(users, on="user_id").join(items, on="anime_id").select("uidx","iidx"))

rows = ui["uidx"].to_numpy()
cols = ui["iidx"].to_numpy()
data = np.ones_like(rows, dtype=np.float32)
X = sp.csr_matrix((data, (rows, cols)), shape=(int(users.height), int(items.height)))
X


In [None]:
# Entrenar ALS implícito con 'implicit' si está disponible
try:
    import implicit
    model = implicit.als.AlternatingLeastSquares(factors=64, regularization=0.05, iterations=10)
    # implicit espera una matriz de items x usuarios en formato COO
    model.fit(X.T.tocsr())
    print("ALS implícito entrenado.")
except Exception as e:
    print("No se pudo usar 'implicit' (instálalo si quieres ALS real). Se deja X preparado:", e)
