In [3]:
import os
import math
import random
import numpy as np
import pandas as pd
from recommenders.datasets.python_splitters import python_chrono_split, python_stratified_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
from collections import defaultdict

os.environ["HF_HOME"] = "E:/Python Scripts/recsys"
os.environ['HF_DATASETS_CACHE'] = "E:/Python Scripts/recsys/data"
os.environ['TRANSFORMERS_CACHE'] = "E:/Python Scripts/recsys/models"

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')

In [185]:
# Select 2 categories to highlight cross-domain transfer
SOURCE_DOMAIN = "Movies_and_TV"
TARGET_DOMAIN = "Video_Games"
DOMAINS = [SOURCE_DOMAIN, TARGET_DOMAIN]

MIN_USER_INTERACTIONS = 10
MIN_ITEM_INTERACTIONS = 10
POSITIVE_THRESHOLD = 4.0  # Ratings >= 4.0 are considered positive

# Load the dataset
def load_amazon_reviews(domain:str, max_per_domain:int=100000) -> pd.DataFrame:
    dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023",
                           f"raw_review_{domain}",
                           trust_remote_code=True)
    rows = []
    for i, r in enumerate(dataset["full"]):
        if i >= max_per_domain:
            break
        rows.append({
            "user": r["user_id"],
            "item": r["parent_asin"],
            "rating": float(r["rating"]),
            "domain": domain,
            "verified_purchase": r["verified_purchase"],
            "timestamp": int(r["timestamp"])
        })

    return pd.DataFrame(rows)

dfs = [load_amazon_reviews(dom, max_per_domain=100000) for dom in DOMAINS]
df = pd.concat(dfs, ignore_index=True).sort_values("timestamp").reset_index(drop=True)

In [186]:
df.head()

Unnamed: 0,user,item,rating,domain,verified_purchase,timestamp
0,AHAYX6YWLK52LPXFSE2QUNMMS44A,783114222,5.0,Movies_and_TV,False,913069725000
1,AG3S4FROO422V5KP7DJCBXVUQLJQ,800185676,5.0,Movies_and_TV,True,914267986000
2,AHVNRIAPM3GVNS3RH3MNIEVSSBNA,6303501281,5.0,Movies_and_TV,False,914297420000
3,AHAYX6YWLK52LPXFSE2QUNMMS44A,6300215571,5.0,Movies_and_TV,True,920170436000
4,AEKPXGAS7MDLNHMCEZMOOQUOYJLA,6304279485,5.0,Movies_and_TV,False,921529732000


In [187]:
def preprocess_data(df, min_user_interactions, min_item_interactions):
    df["trust_weight"] = df["verified_purchase"].apply(lambda x: 1.0 if x else 0.8)

    # Filtering interactions
    user_counts = df["user"].value_counts()
    item_counts = df["item"].value_counts()
    active_users = user_counts[user_counts >= min_user_interactions].index
    active_items = item_counts[item_counts >= min_item_interactions].index
    df = df[df["user"].isin(active_users) & df["item"].isin(active_items)].reset_index(drop=True)

    print(f"Final data length: {df.shape[0]}")
    print(f"Unique users: {df['user'].nunique()}")
    print(f"Unique items: {df['item'].nunique()}")

    # ensure dtypes
    df["rating"] = df["rating"].astype(np.float32)
    df["trust_weight"] = df["trust_weight"].astype(np.float32)
    return df

In [188]:
processed_df = preprocess_data(df, MIN_USER_INTERACTIONS, MIN_ITEM_INTERACTIONS)

Final data length: 19933
Unique users: 3169
Unique items: 2567


In [189]:
class DataEncoder():
    def __init__(self):
        self.user_encoder = LabelEncoder()
        self.item_encoders = {}  # Store encoders for each domain
        self.is_fitted = False

    def label_encoder(self, df):
        # Encode users (shared across domains)
        df["user_id"] = self.user_encoder.fit_transform(df["user"])
        print(f"Encoded {len(self.user_encoder.classes_)} unique users.")

        domains = df["domain"].unique()

        # Encode items per domain (items might have same ID in different domains)
        df["item_id"] = -1  # Initialize with -1
        for domain in domains:
            domain_data = df[df["domain"] == domain]
            item_encoder = LabelEncoder()
            encoded_items = item_encoder.fit_transform(domain_data["item"])
            df.loc[df["domain"] == domain, "item_id"] = encoded_items
            self.item_encoders[domain] = item_encoder

            print(f"Encoded {len(item_encoder.classes_)} unique items in domain '{domain}'.")

        # Convert to integer type
        df["user_id"] = df["user_id"].astype(np.int64)
        df["item_id"] = df["item_id"].astype(np.int64)

        self.is_fitted = True
        return df

    def transform_new_data(self, df):
        # Transform new data using existing encoders. Useful for handling new reviews in production.
        if not self.is_fitted:
            raise ValueError("DataEncoder is not fitted. Call label_encoder() first.")

        # Encode users and items
        df["user_id"] = self.user_encoder.transform(df["user"])
        df["item_id"] = self.item_encoders[df["domain"]].transform(df["item"])

        return df

In [190]:
encoder = DataEncoder()
encoded_df = encoder.label_encoder(processed_df)

Encoded 3169 unique users.
Encoded 749 unique items in domain 'Movies_and_TV'.
Encoded 1818 unique items in domain 'Video_Games'.


In [191]:
encoded_df.head()

Unnamed: 0,user,item,rating,domain,verified_purchase,timestamp,trust_weight,user_id,item_id
0,AHRGTIMQO47C2VLJILIDU53BQKSA,B00005ALS0,4.0,Movies_and_TV,True,990492274000,1.0,2986,7
1,AGX4QGDAQZQRII5YUAUAASPRUW3Q,B00000JRSB,5.0,Video_Games,False,994245315000,0.8,2349,4
2,AGX4QGDAQZQRII5YUAUAASPRUW3Q,B00004Y57G,5.0,Video_Games,False,994247742000,0.8,2349,9
3,AFCCY6D7QLYO3SVKCVFPYUXL6HPA,B00005O3VC,5.0,Movies_and_TV,False,998237072000,0.8,1027,9
4,AHVRJMMQMNEWRCZJZ6T5XHMER2PA,B0086VPV86,5.0,Video_Games,False,1002634840000,0.8,3081,472


In [192]:
# mean_rating = encoded_df["rating"].mean()
# encoded_df["rating"] = encoded_df["rating"] - mean_rating

def create_data_splits(df, train_size=0.8):
    train, temp = python_chrono_split(
        df, ratio=train_size, filter_by="user",
        col_user="user_id", col_item="item_id", col_timestamp="timestamp"
    )

    val, test = python_stratified_split(
        temp, ratio=0.5, filter_by="user",
        col_user="user_id", col_item="item_id"
    )

    print(f"Train set size: {train.shape[0]}")
    print(f"Validation set size: {val.shape[0]}")
    print(f"Test set size: {test.shape[0]}")
    print(f"Common users in train and val: {len(set(train['user_id']).intersection(set(val['user_id'])))}")
    print(f"Common users in train and test: {len(set(train['user_id']).intersection(set(test['user_id'])))}")

    return train, val, test

In [193]:
train_df, val_df, test_df = create_data_splits(encoded_df)

Train set size: 16052
Validation set size: 1286
Test set size: 2595
Common users in train and val: 890
Common users in train and test: 2323


In [194]:
from scipy.sparse import csr_matrix

def create_interaction_matrix(df, n_users=None, n_items=None):
    if n_users is None:
        n_users = df["user_id"].nunique()
    if n_items is None:
        n_items = df["item_id"].nunique()

    # Create a sparse matrix for interactions
    row = df["user_id"].values
    col = df["item_id"].values
    data = df["rating"].values
    interaction_matrix = csr_matrix((data, (row, col)), shape=(n_users, n_items))
    density = interaction_matrix.nnz / (interaction_matrix.shape[0] * interaction_matrix.shape[1])

    print(f"Shape: {interaction_matrix.shape} (users x items)")
    print(f"Non-zero entries: {interaction_matrix.nnz}")
    print(f"Density: {density:.4f}")

    return interaction_matrix

In [195]:
encoded_df["user_id"].max() + 1

np.int64(3169)

In [196]:
encoded_df["user_id"].nunique()

3169

In [197]:
domain_df = train_df[train_df["domain"] == "Movies_and_TV"]
domain_df["item_id"].nunique()

747

In [198]:
domain_df["item_id"].max() + 1

np.int64(749)

In [199]:
domains = encoded_df["domain"].unique()
interaction_matrices = {}

for domain in domains:
    print(f"\n{domain} domain interaction matrix:")
    domain_df = train_df[train_df["domain"] == domain]

    n_users = encoded_df["user_id"].max() + 1
    n_items = domain_df["item_id"].max() + 1
    interaction_matrix = create_interaction_matrix(domain_df, n_users, n_items)
    interaction_matrices[domain] = interaction_matrix


Movies_and_TV domain interaction matrix:
Shape: (3169, 749) (users x items)
Non-zero entries: 5613
Density: 0.0024

Video_Games domain interaction matrix:
Shape: (3169, 1818) (users x items)
Non-zero entries: 10142
Density: 0.0018


In [200]:
class SimpleMFDataset(Dataset):
    def __init__(self, df):
        self.user_ids = torch.tensor(df["user_id"].values, dtype=torch.long)
        self.item_ids = torch.tensor(df["item_id"].values, dtype=torch.long)
        self.ratings = torch.tensor(df["rating"].values, dtype=torch.float32)
        self.weights = torch.tensor(df["trust_weight"].values, dtype=torch.float32)

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, idx):
        return self.user_ids[idx], self.item_ids[idx], self.ratings[idx], self.weights[idx]

In [201]:
class SimpleMatrixFactorization(nn.Module):
    def __init__(self, n_users, n_items, embedding_dim=64):
        super().__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.item_embedding = nn.Embedding(n_items, embedding_dim)
        # nn.init.normal_(self.user_embedding.weight, std=0.01)
        # nn.init.normal_(self.item_embedding.weight, std=0.01)
        self.user_bias = nn.Embedding(n_users, 1)
        self.item_bias = nn.Embedding(n_items, 1)
        self.global_bias = nn.Parameter(torch.zeros(1))

        # init
        nn.init.normal_(self.user_embedding.weight, std=0.05)
        nn.init.normal_(self.item_embedding.weight, std=0.05)
        nn.init.zeros_(self.user_bias.weight)
        nn.init.zeros_(self.item_bias.weight)

    def forward(self, user_ids, item_ids):
        user_embeds = self.user_embedding(user_ids)
        item_embeds = self.item_embedding(item_ids)
        dot_product = (user_embeds * item_embeds).sum(dim=1, keepdim=True)
        out = dot_product + self.user_bias(user_ids) + self.item_bias(item_ids) + self.global_bias
        return out.squeeze(1)

    # def forward(self, user_ids, item_ids):
    #     user_embeds = self.user_embedding(user_ids)
    #     item_embeds = self.item_embedding(item_ids)
    #     dot_product = (user_embeds * item_embeds).sum(dim=1)
    #     return dot_product  # No bias terms for simplicity

In [202]:
class PyTorchMFModel:
    def __init__(self,
                 n_embeddings=64,
                 n_epochs=10,
                 lr=0.005,
                 weight_decay=1e-5,
                 batch_size=1024):
        self.n_factors = n_embeddings
        self.n_epochs = n_epochs
        self.lr = lr
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.model = None

    def train(self, train_df, val_df, n_users, n_items):
        print(f"\n🚀 Training PyTorch Matrix Factorization Model...")
        print(f"   Device: {DEVICE}")
        print(f"   Factors: {self.n_factors}, Epochs: {self.n_epochs}, LR: {self.lr}")

        train_loader = DataLoader(SimpleMFDataset(train_df), batch_size=self.batch_size, shuffle=True, drop_last=False)
        val_loader   = DataLoader(SimpleMFDataset(val_df),   batch_size=self.batch_size, shuffle=False, drop_last=False)

        self.model = SimpleMatrixFactorization(n_users, n_items, self.n_factors).to(DEVICE)
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode="min", factor=0.5, patience=2, min_lr=1e-5
        )

        best_val_rmse, best_state = float("inf"), None

        for epoch in range(self.n_epochs):
            # ---- train ----
            self.model.train()
            se_sum, denom = 0.0, 0.0
            pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{self.n_epochs}")
            for users, items, ratings, weights in pbar:
                users = users.to(DEVICE); items = items.to(DEVICE)
                ratings = ratings.to(DEVICE); weights = weights.to(DEVICE)

                optimizer.zero_grad()
                preds = self.model(users, items)

                # sample-weighted MSE on true rating
                se = (preds - ratings) ** 2
                loss = (weights * se).sum() / (weights.sum() + 1e-8)
                loss.backward()
                optimizer.step()

                se_sum += se.detach().sum().item()
                denom += ratings.numel()
                pbar.set_postfix({"Train RMSE": f"{math.sqrt(se_sum/denom):.4f}"})

            train_rmse = math.sqrt(se_sum / max(1, denom))

            # ---- validate ----
            self.model.eval()
            with torch.no_grad():
                se_sum, denom = 0.0, 0.0
                for users, items, ratings, _ in val_loader:
                    users = users.to(DEVICE); items = items.to(DEVICE); ratings = ratings.to(DEVICE)
                    preds = self.model(users, items)
                    se_sum += torch.sum((preds - ratings) ** 2).item()
                    denom += ratings.numel()
            val_rmse = math.sqrt(se_sum / max(1, denom))
            scheduler.step(val_rmse)

            print(f"Epoch {epoch+1}/{self.n_epochs} - Train RMSE: {train_rmse:.4f}, Val RMSE: {val_rmse:.4f}")

            # track best
            if val_rmse < best_val_rmse - 1e-4:
                best_val_rmse = val_rmse
                best_state = {k: v.cpu() for k, v in self.model.state_dict().items()}

        if best_state is not None:
            self.model.load_state_dict({k: v.to(DEVICE) for k, v in best_state.items()})
        print("✅ Training complete! Best Val RMSE:", f"{best_val_rmse:.4f}")

    def predict_dataframe(self, df):
        """df must contain columns: user_id, item_id"""
        if self.model is None:
            raise ValueError("Model not trained yet!")
        self.model.eval()

        users = torch.tensor(df["user_id"].values, dtype=torch.long, device=DEVICE)
        items = torch.tensor(df["item_id"].values, dtype=torch.long, device=DEVICE)

        preds = []
        with torch.no_grad():
            for i in range(0, len(df), self.batch_size):
                pu = users[i:i+self.batch_size]
                pi = items[i:i+self.batch_size]
                p = self.model(pu, pi)
                preds.append(p.detach().cpu().numpy())
        return np.concatenate(preds)

In [203]:
def dcg_at_k(relevances, k):
    """relevances: list/array of true gains ordered by the *predicted* rank."""
    r = np.asarray(relevances)[:k]
    if r.size == 0:
        return 0.0
    discounts = 1.0 / np.log2(np.arange(2, r.size + 2))
    return float(np.sum(r * discounts))

def ndcg_at_k(predicted_items, true_rel_map, k):
    """
    predicted_items: list of item_ids sorted by predicted score (desc).
    true_rel_map   : dict {item_id -> gain}. Items not present => 0 gain.
    """
    # gains at predicted order
    gains = [true_rel_map.get(i, 0.0) for i in predicted_items[:k]]
    dcg = dcg_at_k(gains, k)

    # ideal gains: sort all candidate items by their true gain desc
    ideal_gains = sorted(true_rel_map.values(), reverse=True)
    idcg = dcg_at_k(ideal_gains, k)
    return (dcg / idcg) if idcg > 0 else 0.0

def evaluate_ranking_metrics(
    model,
    test_df,
    all_data_df,
    domain: str,
    k=10,
    rating_threshold=4.0,
    n_neg_samples=100,
    rng_seed=42,
    graded=False  # <- set True if you want graded gains
):
    """
    Domain-aware Precision@k, Recall@k, MAP@k, NDCG@k.
    - Binary relevance by default: rating >= threshold -> 1, else 0
    - Graded relevance (optional): gain = max(rating - (threshold - 1), 0)
      e.g., threshold=4.0 => 4★->1, 5★->2, else 0
    """
    print(f"\n📊 Evaluating Ranking Metrics @k={k} for domain = {domain}")
    rs = np.random.RandomState(rng_seed)

    # filter to target domain
    test_dom = test_df[test_df["domain"] == domain].copy()
    hist_dom = all_data_df[all_data_df["domain"] == domain].copy()

    if test_dom.empty:
        print("No test rows for this domain. Skipping.")
        return {
            f'precision_at_{k}': 0.0,
            f'recall_at_{k}': 0.0,
            f'map_at_{k}': 0.0,
            f'ndcg_at_{k}': 0.0
        }

    # candidate pool in this domain
    domain_items = np.unique(hist_dom["item_id"].values)
    # seen items per user in this domain
    seen_by_user = hist_dom.groupby("user_id")["item_id"].apply(set)

    precisions, recalls, aps, ndcgs = [], [], [], []

    for user_id, g in tqdm(test_dom.groupby("user_id"), desc="Calculating Ranking Metrics"):
        # --- build relevance map for this user's test items ---
        if graded:
            # graded gains: only >= threshold contribute, and graded by how far above threshold
            rel_map = {
                iid: max(float(r) - (rating_threshold - 1.0), 0.0)
                for iid, r in zip(g["item_id"].values, g["rating"].values)
            }
        else:
            # binary gains
            rel_map = {
                iid: 1.0 if (float(r) >= rating_threshold) else 0.0
                for iid, r in zip(g["item_id"].values, g["rating"].values)
            }
        # positive (nonzero) items
        rel_items = {iid for iid, gain in rel_map.items() if gain > 0.0}
        if not rel_items:
            continue  # skip users with no relevant items in test

        seen = seen_by_user.get(user_id, set())
        negatives = np.array(list(set(domain_items) - seen))
        if negatives.size == 0:
            continue

        m = min(n_neg_samples, negatives.size)
        neg_samples = rs.choice(negatives, size=m, replace=False)

        # candidate set = relevant test items + sampled negatives
        items_to_rank = np.array(list(rel_map.keys()))
        items_to_rank = np.concatenate([items_to_rank, neg_samples])

        pred_df = pd.DataFrame({"user_id": user_id, "item_id": items_to_rank})
        pred_df["score"] = model.predict_dataframe(pred_df)
        pred_df = pred_df.sort_values("score", ascending=False)
        top_k = pred_df["item_id"].values[:k]

        # --- Precision@k / Recall@k ---
        hit_set = set(top_k).intersection(rel_items)
        precisions.append(len(hit_set) / k)
        recalls.append(len(hit_set) / len(rel_items))

        # --- MAP@k ---
        ap, hits = 0.0, 0
        for rank, item in enumerate(top_k, start=1):
            if item in rel_items:
                hits += 1
                ap += hits / rank
        aps.append(ap / len(rel_items))

        # --- NDCG@k ---
        ndcgs.append(ndcg_at_k(top_k.tolist(), rel_map, k))

    out = {
        f"precision_at_{k}": float(np.mean(precisions)) if precisions else 0.0,
        f"recall_at_{k}":    float(np.mean(recalls))    if recalls    else 0.0,
        f"map_at_{k}":       float(np.mean(aps))        if aps        else 0.0,
        f"ndcg_at_{k}":      float(np.mean(ndcgs))      if ndcgs      else 0.0,
    }

    print(f"\n--- Ranking Evaluation Results (domain={domain}, k={k}) ---")
    print(f"   Precision@{k}: {out[f'precision_at_{k}']:.4f}")
    print(f"   Recall@{k}:    {out[f'recall_at_{k}']:.4f}")
    print(f"   MAP@{k}:       {out[f'map_at_{k}']:.4f}")
    print(f"   NDCG@{k}:      {out[f'ndcg_at_{k}']:.4f}")
    print("-----------------------------------------")
    return out

In [213]:
# --- Training the PyTorch Model ---
print("\nTraining PyTorch Matrix Factorization on Movies domain:")
movies_train = train_df[train_df["domain"] == "Movies_and_TV"]
movies_val   = val_df[val_df["domain"]   == "Movies_and_TV"]
movies_test  = test_df[test_df["domain"] == "Movies_and_TV"]  # use test set for final eval

n_total_users   = encoded_df["user_id"].max() + 1
n_movies_items  = encoded_df[encoded_df["domain"] == "Movies_and_TV"]["item_id"].max() + 1

mf_torch_model = PyTorchMFModel(n_embeddings=8, n_epochs=50, lr=0.05, weight_decay=5e-3, batch_size=1024)
mf_torch_model.train(movies_train, movies_val, n_total_users, n_movies_items)


Training PyTorch Matrix Factorization on Movies domain:

🚀 Training PyTorch Matrix Factorization Model...
   Device: cuda
   Factors: 8, Epochs: 50, LR: 0.05


Epoch 1/50: 100%|██████████| 6/6 [00:00<00:00, 46.67it/s, Train RMSE=4.2143]


Epoch 1/50 - Train RMSE: 4.2143, Val RMSE: 3.7021


Epoch 2/50: 100%|██████████| 6/6 [00:00<00:00, 48.50it/s, Train RMSE=3.5086]


Epoch 2/50 - Train RMSE: 3.5086, Val RMSE: 3.0322


Epoch 3/50: 100%|██████████| 6/6 [00:00<00:00, 15.81it/s, Train RMSE=2.8652]


Epoch 3/50 - Train RMSE: 2.8652, Val RMSE: 2.4357


Epoch 4/50: 100%|██████████| 6/6 [00:00<00:00, 47.46it/s, Train RMSE=2.3070]


Epoch 4/50 - Train RMSE: 2.3070, Val RMSE: 1.9514


Epoch 5/50: 100%|██████████| 6/6 [00:00<00:00, 55.46it/s, Train RMSE=1.8786]


Epoch 5/50 - Train RMSE: 1.8786, Val RMSE: 1.6484


Epoch 6/50: 100%|██████████| 6/6 [00:00<00:00, 55.01it/s, Train RMSE=1.6232]


Epoch 6/50 - Train RMSE: 1.6232, Val RMSE: 1.4958


Epoch 7/50: 100%|██████████| 6/6 [00:00<00:00, 58.67it/s, Train RMSE=1.4910]


Epoch 7/50 - Train RMSE: 1.4910, Val RMSE: 1.4179


Epoch 8/50: 100%|██████████| 6/6 [00:00<00:00, 55.16it/s, Train RMSE=1.4170]


Epoch 8/50 - Train RMSE: 1.4170, Val RMSE: 1.3687


Epoch 9/50: 100%|██████████| 6/6 [00:00<00:00, 47.09it/s, Train RMSE=1.3629]


Epoch 9/50 - Train RMSE: 1.3629, Val RMSE: 1.3343


Epoch 10/50: 100%|██████████| 6/6 [00:00<00:00, 56.38it/s, Train RMSE=1.3214]


Epoch 10/50 - Train RMSE: 1.3214, Val RMSE: 1.3099


Epoch 11/50: 100%|██████████| 6/6 [00:00<00:00, 56.58it/s, Train RMSE=1.2882]


Epoch 11/50 - Train RMSE: 1.2882, Val RMSE: 1.2907


Epoch 12/50: 100%|██████████| 6/6 [00:00<00:00, 42.59it/s, Train RMSE=1.2577]


Epoch 12/50 - Train RMSE: 1.2577, Val RMSE: 1.2728


Epoch 13/50: 100%|██████████| 6/6 [00:00<00:00, 60.22it/s, Train RMSE=1.2268]


Epoch 13/50 - Train RMSE: 1.2268, Val RMSE: 1.2497


Epoch 14/50: 100%|██████████| 6/6 [00:00<00:00, 53.57it/s, Train RMSE=1.1927]


Epoch 14/50 - Train RMSE: 1.1927, Val RMSE: 1.2262


Epoch 15/50: 100%|██████████| 6/6 [00:00<00:00, 53.08it/s, Train RMSE=1.1592]


Epoch 15/50 - Train RMSE: 1.1592, Val RMSE: 1.2017


Epoch 16/50: 100%|██████████| 6/6 [00:00<00:00, 48.01it/s, Train RMSE=1.1265]


Epoch 16/50 - Train RMSE: 1.1265, Val RMSE: 1.1776


Epoch 17/50: 100%|██████████| 6/6 [00:00<00:00, 17.27it/s, Train RMSE=1.0965]


Epoch 17/50 - Train RMSE: 1.0965, Val RMSE: 1.1556


Epoch 18/50: 100%|██████████| 6/6 [00:00<00:00, 41.00it/s, Train RMSE=1.0695]


Epoch 18/50 - Train RMSE: 1.0695, Val RMSE: 1.1361


Epoch 19/50: 100%|██████████| 6/6 [00:00<00:00, 54.42it/s, Train RMSE=1.0462]


Epoch 19/50 - Train RMSE: 1.0462, Val RMSE: 1.1212


Epoch 20/50: 100%|██████████| 6/6 [00:00<00:00, 57.78it/s, Train RMSE=1.0258]


Epoch 20/50 - Train RMSE: 1.0258, Val RMSE: 1.1067


Epoch 21/50: 100%|██████████| 6/6 [00:00<00:00, 54.51it/s, Train RMSE=1.0085]


Epoch 21/50 - Train RMSE: 1.0085, Val RMSE: 1.0943


Epoch 22/50: 100%|██████████| 6/6 [00:00<00:00, 57.54it/s, Train RMSE=0.9933]


Epoch 22/50 - Train RMSE: 0.9933, Val RMSE: 1.0839


Epoch 23/50: 100%|██████████| 6/6 [00:00<00:00, 57.30it/s, Train RMSE=0.9798]


Epoch 23/50 - Train RMSE: 0.9798, Val RMSE: 1.0758


Epoch 24/50: 100%|██████████| 6/6 [00:00<00:00, 57.39it/s, Train RMSE=0.9683]


Epoch 24/50 - Train RMSE: 0.9683, Val RMSE: 1.0673


Epoch 25/50: 100%|██████████| 6/6 [00:00<00:00, 60.62it/s, Train RMSE=0.9581]


Epoch 25/50 - Train RMSE: 0.9581, Val RMSE: 1.0614


Epoch 26/50: 100%|██████████| 6/6 [00:00<00:00, 49.67it/s, Train RMSE=0.9495]


Epoch 26/50 - Train RMSE: 0.9495, Val RMSE: 1.0554


Epoch 27/50: 100%|██████████| 6/6 [00:00<00:00, 56.28it/s, Train RMSE=0.9417]


Epoch 27/50 - Train RMSE: 0.9417, Val RMSE: 1.0506


Epoch 28/50: 100%|██████████| 6/6 [00:00<00:00, 55.06it/s, Train RMSE=0.9360]


Epoch 28/50 - Train RMSE: 0.9360, Val RMSE: 1.0469


Epoch 29/50: 100%|██████████| 6/6 [00:00<00:00, 55.38it/s, Train RMSE=0.9315]


Epoch 29/50 - Train RMSE: 0.9315, Val RMSE: 1.0450


Epoch 30/50: 100%|██████████| 6/6 [00:00<00:00, 50.33it/s, Train RMSE=0.9279]


Epoch 30/50 - Train RMSE: 0.9279, Val RMSE: 1.0445


Epoch 31/50: 100%|██████████| 6/6 [00:00<00:00, 16.78it/s, Train RMSE=0.9248]


Epoch 31/50 - Train RMSE: 0.9248, Val RMSE: 1.0455


Epoch 32/50: 100%|██████████| 6/6 [00:00<00:00, 41.49it/s, Train RMSE=0.9220]


Epoch 32/50 - Train RMSE: 0.9220, Val RMSE: 1.0433


Epoch 33/50: 100%|██████████| 6/6 [00:00<00:00, 51.57it/s, Train RMSE=0.9198]


Epoch 33/50 - Train RMSE: 0.9198, Val RMSE: 1.0413


Epoch 34/50: 100%|██████████| 6/6 [00:00<00:00, 55.41it/s, Train RMSE=0.9183]


Epoch 34/50 - Train RMSE: 0.9183, Val RMSE: 1.0402


Epoch 35/50: 100%|██████████| 6/6 [00:00<00:00, 51.57it/s, Train RMSE=0.9169]


Epoch 35/50 - Train RMSE: 0.9169, Val RMSE: 1.0386


Epoch 36/50: 100%|██████████| 6/6 [00:00<00:00, 56.08it/s, Train RMSE=0.9159]


Epoch 36/50 - Train RMSE: 0.9159, Val RMSE: 1.0400


Epoch 37/50: 100%|██████████| 6/6 [00:00<00:00, 52.31it/s, Train RMSE=0.9152]


Epoch 37/50 - Train RMSE: 0.9152, Val RMSE: 1.0417


Epoch 38/50: 100%|██████████| 6/6 [00:00<00:00, 56.72it/s, Train RMSE=0.9143]


Epoch 38/50 - Train RMSE: 0.9143, Val RMSE: 1.0424


Epoch 39/50: 100%|██████████| 6/6 [00:00<00:00, 56.80it/s, Train RMSE=0.9116]


Epoch 39/50 - Train RMSE: 0.9116, Val RMSE: 1.0420


Epoch 40/50: 100%|██████████| 6/6 [00:00<00:00, 54.32it/s, Train RMSE=0.9113]


Epoch 40/50 - Train RMSE: 0.9113, Val RMSE: 1.0420


Epoch 41/50: 100%|██████████| 6/6 [00:00<00:00, 49.56it/s, Train RMSE=0.9114]


Epoch 41/50 - Train RMSE: 0.9114, Val RMSE: 1.0415


Epoch 42/50: 100%|██████████| 6/6 [00:00<00:00, 44.85it/s, Train RMSE=0.9102]


Epoch 42/50 - Train RMSE: 0.9102, Val RMSE: 1.0418


Epoch 43/50: 100%|██████████| 6/6 [00:00<00:00, 52.24it/s, Train RMSE=0.9102]


Epoch 43/50 - Train RMSE: 0.9102, Val RMSE: 1.0415


Epoch 44/50: 100%|██████████| 6/6 [00:00<00:00, 36.15it/s, Train RMSE=0.9101]


Epoch 44/50 - Train RMSE: 0.9101, Val RMSE: 1.0416


Epoch 45/50: 100%|██████████| 6/6 [00:00<00:00, 17.01it/s, Train RMSE=0.9094]


Epoch 45/50 - Train RMSE: 0.9094, Val RMSE: 1.0416


Epoch 46/50: 100%|██████████| 6/6 [00:00<00:00, 51.56it/s, Train RMSE=0.9094]


Epoch 46/50 - Train RMSE: 0.9094, Val RMSE: 1.0416


Epoch 47/50: 100%|██████████| 6/6 [00:00<00:00, 54.37it/s, Train RMSE=0.9093]


Epoch 47/50 - Train RMSE: 0.9093, Val RMSE: 1.0413


Epoch 48/50: 100%|██████████| 6/6 [00:00<00:00, 54.74it/s, Train RMSE=0.9090]


Epoch 48/50 - Train RMSE: 0.9090, Val RMSE: 1.0413


Epoch 49/50: 100%|██████████| 6/6 [00:00<00:00, 52.75it/s, Train RMSE=0.9090]


Epoch 49/50 - Train RMSE: 0.9090, Val RMSE: 1.0414


Epoch 50/50: 100%|██████████| 6/6 [00:00<00:00, 47.68it/s, Train RMSE=0.9089]

Epoch 50/50 - Train RMSE: 0.9089, Val RMSE: 1.0413
✅ Training complete! Best Val RMSE: 1.0386





In [214]:
ranking_results = evaluate_ranking_metrics(
    model=mf_torch_model,
    test_df=movies_test,
    all_data_df=encoded_df,
    domain="Movies_and_TV",
    k=10,
    rating_threshold=POSITIVE_THRESHOLD,  # 4.0
    n_neg_samples=100
)


📊 Evaluating Ranking Metrics @k=10 for domain = Movies_and_TV


Calculating Ranking Metrics: 100%|██████████| 910/910 [00:02<00:00, 419.87it/s]


--- Ranking Evaluation Results (domain=Movies_and_TV, k=10) ---
   Precision@10: 0.0148
   Recall@10:    0.1404
   MAP@10:       0.0617
   NDCG@10:      0.0805
-----------------------------------------





In [215]:
import math
import torch
import torch.nn as nn

class NeuMF(nn.Module):
    def __init__(
        self,
        n_users: int,
        n_items: int,
        factors_gmf: int = 32,
        factors_mlp: int = 32,
        mlp_layers=(128, 64, 32),
        dropout: float = 0.2,
        use_bias: bool = True,
    ):
        super().__init__()
        # GMF embeddings
        self.gmf_user = nn.Embedding(n_users, factors_gmf)
        self.gmf_item = nn.Embedding(n_items, factors_gmf)
        # MLP embeddings
        self.mlp_user = nn.Embedding(n_users, factors_mlp)
        self.mlp_item = nn.Embedding(n_items, factors_mlp)

        # MLP tower
        in_dim = factors_mlp * 2
        mlp = []
        for h in mlp_layers:
            mlp += [nn.Linear(in_dim, h), nn.ReLU(), nn.Dropout(dropout)]
            in_dim = h
        self.mlp = nn.Sequential(*mlp)

        # Prediction layer over [GMF ⊕ MLP]
        pred_in = factors_gmf + (mlp_layers[-1] if len(mlp_layers) > 0 else factors_mlp * 2)
        self.pred = nn.Linear(pred_in, 1)

        # Optional biases + global mean (helpful for ratings)
        self.use_bias = use_bias
        if use_bias:
            self.user_bias = nn.Embedding(n_users, 1)
            self.item_bias = nn.Embedding(n_items, 1)
        else:
            self.user_bias = None
            self.item_bias = None
        self.global_mean = nn.Parameter(torch.zeros(1), requires_grad=False)

        # init
        for emb in [self.gmf_user, self.gmf_item, self.mlp_user, self.mlp_item]:
            nn.init.normal_(emb.weight, std=0.01)
        for m in self.mlp:
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight); nn.init.zeros_(m.bias)
        nn.init.xavier_uniform_(self.pred.weight); nn.init.zeros_(self.pred.bias)
        if self.use_bias:
            nn.init.zeros_(self.user_bias.weight); nn.init.zeros_(self.item_bias.weight)

    def forward(self, users, items):
        gmf = self.gmf_user(users) * self.gmf_item(items)  # elementwise product
        mlp_in = torch.cat([self.mlp_user(users), self.mlp_item(items)], dim=1)
        mlp_out = self.mlp(mlp_in)
        x = torch.cat([gmf, mlp_out], dim=1)
        y = self.pred(x).squeeze(1)
        if self.use_bias:
            y = y + self.user_bias(users).squeeze(1) + self.item_bias(items).squeeze(1) + self.global_mean
        return y

In [216]:
from torch.utils.data import DataLoader

class PyTorchNeuMFModel:
    def __init__(
        self,
        factors_gmf=32,
        factors_mlp=32,
        mlp_layers=(128, 64, 32),
        dropout=0.2,
        use_bias=True,
        n_epochs=30,
        lr=1e-3,
        weight_decay=1e-5,
        batch_size=1024,
        max_grad_norm=5.0,
        patience=4,  # for ReduceLROnPlateau-driven convergence
    ):
        self.factors_gmf = factors_gmf
        self.factors_mlp = factors_mlp
        self.mlp_layers = mlp_layers
        self.dropout = dropout
        self.use_bias = use_bias
        self.n_epochs = n_epochs
        self.lr = lr
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.max_grad_norm = max_grad_norm
        self.patience = patience
        self.model = None

    def train(self, train_df, val_df, n_users, n_items):
        print("\n🚀 Training NeuMF (ratings regression with sample-weighted MSE)")
        print(f"   Device: {DEVICE} | GMF:{self.factors_gmf} | MLP:{self.factors_mlp} "
              f"| Layers:{self.mlp_layers} | Dropout:{self.dropout}")

        train_loader = DataLoader(SimpleMFDataset(train_df), batch_size=self.batch_size, shuffle=True)
        val_loader   = DataLoader(SimpleMFDataset(val_df),   batch_size=self.batch_size, shuffle=False)

        self.model = NeuMF(
            n_users, n_items,
            factors_gmf=self.factors_gmf,
            factors_mlp=self.factors_mlp,
            mlp_layers=self.mlp_layers,
            dropout=self.dropout,
            use_bias=self.use_bias
        ).to(DEVICE)

        # set global mean from train ratings
        self.model.global_mean.data.fill_(float(train_df["rating"].mean()))

        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode="min", factor=0.5, patience=2, min_lr=1e-5
        )

        best_val_rmse = float("inf")
        best_state = None
        bad_epochs = 0

        for epoch in range(self.n_epochs):
            # -------- train --------
            self.model.train()
            se_sum, denom = 0.0, 0.0
            pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{self.n_epochs}")
            for users, items, ratings, weights in pbar:
                users = users.to(DEVICE); items = items.to(DEVICE)
                ratings = ratings.to(DEVICE); weights = weights.to(DEVICE)

                optimizer.zero_grad()
                preds = self.model(users, items)

                se = (preds - ratings) ** 2
                loss = (weights * se).sum() / (weights.sum() + 1e-8)
                loss.backward()

                if self.max_grad_norm is not None:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)

                optimizer.step()

                se_sum += se.detach().sum().item()
                denom += ratings.numel()
                pbar.set_postfix({"Train RMSE": f"{math.sqrt(se_sum/denom):.4f}"})

            train_rmse = math.sqrt(se_sum / max(1, denom))

            # -------- validate --------
            self.model.eval()
            with torch.no_grad():
                se_sum, denom = 0.0, 0.0
                for users, items, ratings, _ in val_loader:
                    users = users.to(DEVICE); items = items.to(DEVICE); ratings = ratings.to(DEVICE)
                    preds = self.model(users, items)
                    se_sum += torch.sum((preds - ratings) ** 2).item()
                    denom += ratings.numel()
            val_rmse = math.sqrt(se_sum / max(1, denom))
            scheduler.step(val_rmse)

            print(f"Epoch {epoch+1}/{self.n_epochs} - Train RMSE: {train_rmse:.4f} | Val RMSE: {val_rmse:.4f}")

            # early-stop restore
            if val_rmse < best_val_rmse - 1e-4:
                best_val_rmse = val_rmse
                best_state = {k: v.detach().cpu() for k, v in self.model.state_dict().items()}
                bad_epochs = 0
            else:
                bad_epochs += 1
                if bad_epochs >= self.patience:
                    print(f"Early stopping at epoch {epoch+1}. Best Val RMSE: {best_val_rmse:.4f}")
                    break

        if best_state is not None:
            self.model.load_state_dict({k: v.to(DEVICE) for k, v in best_state.items()})
        print("✅ Training complete! Best Val RMSE:", f"{best_val_rmse:.4f}")

    def predict_dataframe(self, df):
        """df must have user_id, item_id"""
        if self.model is None:
            raise ValueError("Model not trained yet!")
        self.model.eval()

        users = torch.tensor(df["user_id"].values, dtype=torch.long, device=DEVICE)
        items = torch.tensor(df["item_id"].values, dtype=torch.long, device=DEVICE)

        preds = []
        with torch.no_grad():
            for i in range(0, len(df), self.batch_size):
                pu = users[i:i+self.batch_size]
                pi = items[i:i+self.batch_size]
                p = self.model(pu, pi)
                preds.append(p.detach().cpu().numpy())
        return np.concatenate(preds)

In [226]:
print("\nTraining NeuMF on Movies domain:")
movies_train = train_df[train_df["domain"] == "Movies_and_TV"]
movies_val   = val_df[val_df["domain"]   == "Movies_and_TV"]
movies_test  = test_df[test_df["domain"] == "Movies_and_TV"]

n_total_users  = encoded_df["user_id"].max() + 1
n_movies_items = encoded_df[encoded_df["domain"] == "Movies_and_TV"]["item_id"].max() + 1

neumf_model = PyTorchNeuMFModel(
    factors_gmf=16, factors_mlp=16,
    mlp_layers=(64, 32),
    dropout=0.3,
    use_bias=True,
    n_epochs=50,
    lr=5e-3,
    weight_decay=1e-4,
    batch_size=1024,
    max_grad_norm=5.0,
    patience=5
)
neumf_model.train(movies_train, movies_val, n_total_users, n_movies_items)


Training NeuMF on Movies domain:

🚀 Training NeuMF (ratings regression with sample-weighted MSE)
   Device: cuda | GMF:16 | MLP:16 | Layers:(64, 32) | Dropout:0.3


Epoch 1/50: 100%|██████████| 6/6 [00:00<00:00, 31.90it/s, Train RMSE=1.0685]


Epoch 1/50 - Train RMSE: 1.0685 | Val RMSE: 1.0925


Epoch 2/50: 100%|██████████| 6/6 [00:00<00:00, 35.33it/s, Train RMSE=1.0167]


Epoch 2/50 - Train RMSE: 1.0167 | Val RMSE: 1.0479


Epoch 3/50: 100%|██████████| 6/6 [00:00<00:00, 36.43it/s, Train RMSE=0.8932]


Epoch 3/50 - Train RMSE: 0.8932 | Val RMSE: 1.0571


Epoch 4/50: 100%|██████████| 6/6 [00:00<00:00, 38.96it/s, Train RMSE=0.7898]


Epoch 4/50 - Train RMSE: 0.7898 | Val RMSE: 1.0990


Epoch 5/50: 100%|██████████| 6/6 [00:00<00:00, 37.70it/s, Train RMSE=0.7241]


Epoch 5/50 - Train RMSE: 0.7241 | Val RMSE: 1.0497


Epoch 6/50: 100%|██████████| 6/6 [00:00<00:00, 41.46it/s, Train RMSE=0.6882]


Epoch 6/50 - Train RMSE: 0.6882 | Val RMSE: 1.0574


Epoch 7/50: 100%|██████████| 6/6 [00:00<00:00, 35.50it/s, Train RMSE=0.6685]

Epoch 7/50 - Train RMSE: 0.6685 | Val RMSE: 1.0710
Early stopping at epoch 7. Best Val RMSE: 1.0479
✅ Training complete! Best Val RMSE: 1.0479





In [227]:
# Evaluate with your domain-aware metrics (same protocol)
neumf_results = evaluate_ranking_metrics(
    model=neumf_model,
    test_df=movies_test,
    all_data_df=encoded_df,
    domain="Movies_and_TV",
    k=10,
    rating_threshold=POSITIVE_THRESHOLD,  # 4.0
    n_neg_samples=100,
    rng_seed=42,
    graded=False
)
print(neumf_results)


📊 Evaluating Ranking Metrics @k=10 for domain = Movies_and_TV


Calculating Ranking Metrics: 100%|██████████| 910/910 [00:02<00:00, 397.12it/s]


--- Ranking Evaluation Results (domain=Movies_and_TV, k=10) ---
   Precision@10: 0.0130
   Recall@10:    0.1246
   MAP@10:       0.0432
   NDCG@10:      0.0627
-----------------------------------------
{'precision_at_10': 0.013025780189959296, 'recall_at_10': 0.1246042514699231, 'map_at_10': 0.0432475070534772, 'ndcg_at_10': 0.06274462438909985}



