NCF Recommenders Library Movielens

In [None]:
# Neural Collaborative Filtering (NCF) with MovieLens 1M

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

from recommenders.datasets import movielens
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.utils.constants import SEED
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.utils.timer import Timer
from recommenders.datasets.python_splitters import python_chrono_split
from recommenders.evaluation.python_evaluation import get_top_k_items

from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os
import tempfile

print("Downloading MovieLens 1M dataset...")
df = movielens.load_pandas_df(size="1m")

print("Data preview:")
print(df.head())

ratings = df.rename(columns={"userID": "userID", "itemID": "itemID", "rating": "rating"})

print("Splitting data into train and test sets...")
train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=SEED)
print(f"Train size: {len(train_df)}, Test size: {len(test_df)}")

train_df = train_df.sort_values(by="userID")
test_df = test_df.sort_values(by="userID")

with tempfile.TemporaryDirectory() as temp_dir:
    train_path = os.path.join(temp_dir, "train.csv")
    test_path = os.path.join(temp_dir, "test.csv")
    train_df.to_csv(train_path, index=False)
    test_df.to_csv(test_path, index=False)

    print("Converting to NCFDataset...")
    data = NCFDataset(train_file=train_path, test_file=test_path, seed=SEED)

    print("Initializing NCF model...")
    ncf_model = NCF(
        n_users=data.n_users,
        n_items=data.n_items,
        model_type="NeuMF",  # Options: "NeuMF", "GMF", "MLP"
        learning_rate=0.001,
        batch_size=256,
        verbose=10,
        seed=SEED,
    )

    print("Training the model...")
    with Timer() as train_time:
        ncf_model.fit(data)

    print(f"Training took: {train_time}")

    print("Generating top-K predictions...")
    top_k = 10

    # Only use users and items seen during training
    known_users = set(train_df["userID"].unique())
    known_items = set(train_df["itemID"].unique())

    test_users = test_df[test_df.userID.isin(known_users)].userID.unique()
    all_items = df[df.itemID.isin(known_items)].itemID.unique()

    user_input, item_input = [], []
    for u in test_users:
        user_input.extend([u] * len(all_items))
        item_input.extend(all_items)

    scores = ncf_model.predict(user_input, item_input, is_list=True)
    all_preds = pd.DataFrame({"userID": user_input, "itemID": item_input, "prediction": scores})
    all_preds.rename(columns={"prediction": "rating"}, inplace=True)
    top_k_preds = get_top_k_items(all_preds, k=top_k)
    top_k_preds["prediction"] = top_k_preds["rating"]  # copy predicted score
    top_k_preds["rating"] = 1  # dummy relevancy score
    top_k_preds = top_k_preds[["userID", "itemID", "rating", "prediction"]].astype({
        "userID": int, "itemID": int, "rating": int, "prediction": float
    })

    test_df_filtered = test_df[test_df.userID.isin(test_users) & test_df.itemID.isin(known_items)].copy()
    test_df_filtered = test_df_filtered[["userID", "itemID", "rating"]].astype({
        "userID": int, "itemID": int, "rating": int
    })

    print("Evaluating model (Top-K metrics)...")
    k = 10
    print(f"MAP@{k}: {map_at_k(test_df_filtered, top_k_preds, col_prediction='prediction', k=k):.4f}")
    print(f"NDCG@{k}: {ndcg_at_k(test_df_filtered, top_k_preds, col_prediction='prediction', k=k):.4f}")
    print(f"Precision@{k}: {precision_at_k(test_df_filtered, top_k_preds, col_prediction='prediction', k=k):.4f}")
    print(f"Recall@{k}: {recall_at_k(test_df_filtered, top_k_preds, col_prediction='prediction', k=k):.4f}")





MAP@10: 0.0558

NDCG@10: 0.1354

Precision@10: 0.1248

Recall@10: 0.0728