In [19]:
import os
import math
import random
import numpy as np
import pandas as pd
from recommenders.datasets.python_splitters import python_chrono_split, python_stratified_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
from collections import defaultdict

# os.environ["HF_HOME"] = "E:/Python Scripts/recsys"
# os.environ['HF_DATASETS_CACHE'] = "E:/Python Scripts/recsys/data"
# os.environ['TRANSFORMERS_CACHE'] = "E:/Python Scripts/recsys/models"

os.environ["HF_HOME"] = "D:/Python Projects/recommendation_system"
os.environ['HF_DATASETS_CACHE'] = "D:/Python Projects/recommendation_system/recsys/data"
os.environ['TRANSFORMERS_CACHE'] = "D:/Python Projects/recommendation_system/recsys/models"

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')

## Load Amazon Reviews Dataset
This dataset contains user reviews for various products across different domains. We will focus on two domains:
- Movies and TV
- Video Games

The dataset is available on the Hugging Face Hub as `McAuley-Lab/Amazon-Reviews-2023`.

In [3]:
# Select 2 categories to highlight cross-domain transfer
SOURCE_DOMAIN = "Movies_and_TV"
TARGET_DOMAIN = "Video_Games"
DOMAINS = [SOURCE_DOMAIN, TARGET_DOMAIN]

MIN_USER_INTERACTIONS = 10
MIN_ITEM_INTERACTIONS = 10
POSITIVE_THRESHOLD = 4.0  # Ratings >= 4.0 are considered positive

# Load the dataset
def load_amazon_reviews(domain:str, max_per_domain:int=100000) -> pd.DataFrame:
    dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023",
                           f"raw_review_{domain}",
                           trust_remote_code=True)
    rows = []
    for i, r in enumerate(dataset["full"]):
        if i >= max_per_domain:
            break
        rows.append({
            "user": r["user_id"],
            "item": r["parent_asin"],
            "rating": float(r["rating"]),
            "domain": domain,
            "verified_purchase": r["verified_purchase"],
            "timestamp": int(r["timestamp"])
        })

    return pd.DataFrame(rows)

# Sample 100k reviews per domain for model development
dfs = [load_amazon_reviews(dom, max_per_domain=300000) for dom in DOMAINS]
df = pd.concat(dfs, ignore_index=True).sort_values("timestamp").reset_index(drop=True)

In [4]:
df.head()

Unnamed: 0,user,item,rating,domain,verified_purchase,timestamp
0,AHTBKI7WSESAHC6QY55GAQ777MXQ,6304333560,5.0,Movies_and_TV,False,899940586000
1,AHTBKI7WSESAHC6QY55GAQ777MXQ,6302737931,4.0,Movies_and_TV,False,899941515000
2,AHAYX6YWLK52LPXFSE2QUNMMS44A,783114222,5.0,Movies_and_TV,False,913069725000
3,AG3S4FROO422V5KP7DJCBXVUQLJQ,800185676,5.0,Movies_and_TV,True,914267986000
4,AHVNRIAPM3GVNS3RH3MNIEVSSBNA,6303501281,5.0,Movies_and_TV,False,914297420000


## Preprocess the data (Use ratings as continuous values)
- Convert `verified_purchase` to a trust weight (1.0 for verified, 0.8 for unverified)
- Filter users and items based on minimum interactions
- Encode users and items
- Split the dataset into training, validation, and test sets

In [5]:
def preprocess_data(df, min_user_interactions, min_item_interactions):
    df["trust_weight"] = df["verified_purchase"].apply(lambda x: 1.0 if x else 0.8)

    # Filtering interactions
    user_counts = df["user"].value_counts()
    item_counts = df["item"].value_counts()
    active_users = user_counts[user_counts >= min_user_interactions].index
    active_items = item_counts[item_counts >= min_item_interactions].index
    df = df[df["user"].isin(active_users) & df["item"].isin(active_items)].reset_index(drop=True)

    print(f"Final data length: {df.shape[0]}")
    print(f"Unique users: {df['user'].nunique()}")
    print(f"Unique items: {df['item'].nunique()}")

    # ensure dtypes
    df["rating"] = df["rating"].astype(np.float32)
    df["trust_weight"] = df["trust_weight"].astype(np.float32)
    return df

In [6]:
processed_df = preprocess_data(df, MIN_USER_INTERACTIONS, MIN_ITEM_INTERACTIONS)

Final data length: 100891
Unique users: 10159
Unique items: 9590


In [7]:
class DataEncoder():
    def __init__(self):
        self.user_encoder = LabelEncoder()
        self.item_encoders = {}  # Store encoders for each domain
        self.is_fitted = False

    def label_encoder(self, df):
        # Encode users (shared across domains)
        df["user_id"] = self.user_encoder.fit_transform(df["user"])
        print(f"Encoded {len(self.user_encoder.classes_)} unique users.")

        domains = df["domain"].unique()

        # Encode items per domain (items might have same ID in different domains)
        df["item_id"] = -1  # Initialize with -1
        for domain in domains:
            domain_data = df[df["domain"] == domain]
            item_encoder = LabelEncoder()
            encoded_items = item_encoder.fit_transform(domain_data["item"])
            df.loc[df["domain"] == domain, "item_id"] = encoded_items
            self.item_encoders[domain] = item_encoder

            print(f"Encoded {len(item_encoder.classes_)} unique items in domain '{domain}'.")

        # Convert to integer type
        df["user_id"] = df["user_id"].astype(np.int64)
        df["item_id"] = df["item_id"].astype(np.int64)

        self.is_fitted = True
        return df

    def transform_new_data(self, df):
        # Transform new data using existing encoders. Useful for handling new reviews in production.
        if not self.is_fitted:
            raise ValueError("DataEncoder is not fitted. Call label_encoder() first.")

        # Encode users and items
        df["user_id"] = self.user_encoder.transform(df["user"])
        df["item_id"] = self.item_encoders[df["domain"]].transform(df["item"])

        return df

In [8]:
encoder = DataEncoder()
encoded_df = encoder.label_encoder(processed_df)

Encoded 10159 unique users.
Encoded 4239 unique items in domain 'Movies_and_TV'.
Encoded 5351 unique items in domain 'Video_Games'.


In [9]:
encoded_df.head()

Unnamed: 0,user,item,rating,domain,verified_purchase,timestamp,trust_weight,user_id,item_id
0,AFIMCCRTD3PWOJY7BAOJMA5C7I4A,B00003CXC3,5.0,Movies_and_TV,False,957887792000,0.8,3782,55
1,AFJE7EQZIEFJJOMZGQVAO4AQBHYA,B00000K2R4,5.0,Video_Games,False,964125246000,0.8,3842,18
2,AHC3WRQ2PVG3GVNBNK7ATK7YELLA,B001E91OQA,5.0,Video_Games,False,966276400000,0.8,8338,373
3,AHM2XKCUWJ4GZQNGDGNEVEN7FJYQ,6305313687,4.0,Movies_and_TV,False,968303405000,0.8,9090,27
4,AHVRJMMQMNEWRCZJZ6T5XHMER2PA,B00000K2R4,5.0,Video_Games,False,971974663000,0.8,9860,18


In [10]:
# mean_rating = encoded_df["rating"].mean()
# encoded_df["rating"] = encoded_df["rating"] - mean_rating

def create_data_splits(df, train_size=0.8):
    train, temp = python_chrono_split(
        df, ratio=train_size, filter_by="user",
        col_user="user_id", col_item="item_id", col_timestamp="timestamp"
    )

    val, test = python_stratified_split(
        temp, ratio=0.5, filter_by="user",
        col_user="user_id", col_item="item_id"
    )

    print(f"Train set size: {train.shape[0]}")
    print(f"Validation set size: {val.shape[0]}")
    print(f"Test set size: {test.shape[0]}")
    print(f"Common users in train and val: {len(set(train['user_id']).intersection(set(val['user_id'])))}")
    print(f"Common users in train and test: {len(set(train['user_id']).intersection(set(test['user_id'])))}")

    return train, val, test

In [11]:
train_df, val_df, test_df = create_data_splits(encoded_df)

Train set size: 80760
Validation set size: 8633
Test set size: 11498
Common users in train and val: 5377
Common users in train and test: 9136


### Checking the interaction between users and items

In [12]:
from scipy.sparse import csr_matrix

def create_interaction_matrix(df, n_users=None, n_items=None):
    if n_users is None:
        n_users = df["user_id"].nunique()
    if n_items is None:
        n_items = df["item_id"].nunique()

    # Create a sparse matrix for interactions
    row = df["user_id"].values
    col = df["item_id"].values
    data = df["rating"].values
    interaction_matrix = csr_matrix((data, (row, col)), shape=(n_users, n_items))
    density = interaction_matrix.nnz / (interaction_matrix.shape[0] * interaction_matrix.shape[1])

    print(f"Shape: {interaction_matrix.shape} (users x items)")
    print(f"Non-zero entries: {interaction_matrix.nnz}")
    print(f"Density: {density:.4f}")

    return interaction_matrix

In [13]:
# See interaction matrices for each domain to understand user-item interactions
domains = encoded_df["domain"].unique()
interaction_matrices = {}

for domain in domains:
    print(f"\n{domain} domain interaction matrix:")
    domain_df = train_df[train_df["domain"] == domain]

    n_users = encoded_df["user_id"].max() + 1
    n_items = domain_df["item_id"].max() + 1
    interaction_matrix = create_interaction_matrix(domain_df, n_users, n_items)
    interaction_matrices[domain] = interaction_matrix


Movies_and_TV domain interaction matrix:
Shape: (10159, 4239) (users x items)
Non-zero entries: 41981
Density: 0.0010

Video_Games domain interaction matrix:
Shape: (10159, 5351) (users x items)
Non-zero entries: 37629
Density: 0.0007


## Simple Matrix Factorization Model
- Creating dataset class for PyTorch
- Implementing a simple matrix factorization model using PyTorch

In [14]:
class SimpleMFDataset(Dataset):
    def __init__(self, df):
        self.user_ids = torch.tensor(df["user_id"].values, dtype=torch.long)
        self.item_ids = torch.tensor(df["item_id"].values, dtype=torch.long)
        self.ratings = torch.tensor(df["rating"].values, dtype=torch.float32)
        self.weights = torch.tensor(df["trust_weight"].values, dtype=torch.float32)

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, idx):
        return self.user_ids[idx], self.item_ids[idx], self.ratings[idx], self.weights[idx]

In [15]:
class SimpleMatrixFactorization(nn.Module):
    def __init__(self, n_users, n_items, embedding_dim):
        super().__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.item_embedding = nn.Embedding(n_items, embedding_dim)
        self.user_bias = nn.Embedding(n_users, 1)
        self.item_bias = nn.Embedding(n_items, 1)
        self.global_bias = nn.Parameter(torch.zeros(1))

        # init
        nn.init.normal_(self.user_embedding.weight, std=0.05)
        nn.init.normal_(self.item_embedding.weight, std=0.05)
        nn.init.zeros_(self.user_bias.weight)
        nn.init.zeros_(self.item_bias.weight)

    def forward(self, user_ids, item_ids):
        user_embeds = self.user_embedding(user_ids)
        item_embeds = self.item_embedding(item_ids)
        dot_product = (user_embeds * item_embeds).sum(dim=1, keepdim=True)
        out = dot_product + self.user_bias(user_ids) + self.item_bias(item_ids) + self.global_bias
        return out.squeeze(1)

    # def forward(self, user_ids, item_ids):
    #     user_embeds = self.user_embedding(user_ids)
    #     item_embeds = self.item_embedding(item_ids)
    #     dot_product = (user_embeds * item_embeds).sum(dim=1)
    #     return dot_product  # No bias terms for simplicity

## Training the Simple Matrix Factorization Model
- Using sample-weighted MSE loss
- Implementing early stopping with ReduceLROnPlateau scheduler
- Evaluating the model on validation set
- Predicting ratings for a given user-item pair
- Evaluating ranking metrics like Precision@k, Recall@k, MAP@k, NDCG@k

In [16]:
class MFModel:
    def __init__(self,
                 n_embeddings=32,
                 n_epochs=10,
                 lr=0.001,
                 weight_decay=1e-5,
                 batch_size=1024,
                 device="cpu"):

        self.n_embeddings = n_embeddings
        self.n_epochs = n_epochs
        self.lr = lr
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.model = None
        self.device = device

    def train(self, train_df, val_df, n_users, n_items):
        print(f"\n Training PyTorch Matrix Factorization Model...")
        print(f"   Device: {self.device}")
        print(f"   Factors: {self.n_embeddings}, Epochs: {self.n_epochs}, LR: {self.lr}")

        train_loader = DataLoader(SimpleMFDataset(train_df), batch_size=self.batch_size, shuffle=True, drop_last=False)
        val_loader   = DataLoader(SimpleMFDataset(val_df),   batch_size=self.batch_size, shuffle=False, drop_last=False)

        self.model = SimpleMatrixFactorization(n_users, n_items, self.n_embeddings).to(self.device)
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode="min", factor=0.5, patience=2, min_lr=1e-5
        )

        best_val_rmse, best_state = float("inf"), None

        for epoch in range(self.n_epochs):
            # ---- train ----
            self.model.train()
            se_sum, denom = 0.0, 0.0
            pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{self.n_epochs}")
            for users, items, ratings, weights in pbar:
                users = users.to(self.device)
                items = items.to(self.device)
                ratings = ratings.to(self.device)
                weights = weights.to(self.device)

                optimizer.zero_grad()
                preds = self.model(users, items)

                # sample-weighted MSE on true rating
                se = (preds - ratings) ** 2
                loss = (weights * se).sum() / (weights.sum() + 1e-8)
                loss.backward()
                optimizer.step()

                se_sum += se.detach().sum().item()
                denom += ratings.numel()
                pbar.set_postfix({"Train RMSE": f"{math.sqrt(se_sum/denom):.4f}"})

            train_rmse = math.sqrt(se_sum / max(1, denom))

            # ---- validate ----
            self.model.eval()
            with torch.no_grad():
                se_sum, denom = 0.0, 0.0
                for users, items, ratings, _ in val_loader:
                    users = users.to(self.device)
                    items = items.to(self.device)
                    ratings = ratings.to(self.device)
                    preds = self.model(users, items)
                    se_sum += torch.sum((preds - ratings) ** 2).item()
                    denom += ratings.numel()
            val_rmse = math.sqrt(se_sum / max(1, denom))
            scheduler.step(val_rmse)

            print(f"Epoch {epoch+1}/{self.n_epochs} - Train RMSE: {train_rmse:.4f}, Val RMSE: {val_rmse:.4f}")

            # track best
            if val_rmse < best_val_rmse - 1e-4:
                best_val_rmse = val_rmse
                best_state = {k: v.cpu() for k, v in self.model.state_dict().items()}

        if best_state is not None:
            self.model.load_state_dict({k: v.to(self.device) for k, v in best_state.items()})
        print("  Training complete. Best Val RMSE:", f"{best_val_rmse:.4f}")

    def predict_dataframe(self, df):
        """df must contain columns: user_id, item_id"""
        if self.model is None:
            raise ValueError("Model not trained yet!")
        self.model.eval()

        users = torch.tensor(df["user_id"].values, dtype=torch.long, device=self.device)
        items = torch.tensor(df["item_id"].values, dtype=torch.long, device=self.device)

        preds = []
        with torch.no_grad():
            for i in range(0, len(df), self.batch_size):
                pu = users[i:i+self.batch_size]
                pi = items[i:i+self.batch_size]
                p = self.model(pu, pi)
                preds.append(p.detach().cpu().numpy())
        return np.concatenate(preds)

In [17]:
def dcg_at_k(relevances, k):
    """relevances: list/array of true gains ordered by the *predicted* rank."""
    r = np.asarray(relevances)[:k]
    if r.size == 0:
        return 0.0
    discounts = 1.0 / np.log2(np.arange(2, r.size + 2))
    return float(np.sum(r * discounts))

def ndcg_at_k(predicted_items, true_rel_map, k):
    """
    predicted_items: list of item_ids sorted by predicted score (desc).
    true_rel_map   : dict {item_id -> gain}. Items not present => 0 gain.
    """
    # gains at predicted order
    gains = [true_rel_map.get(i, 0.0) for i in predicted_items[:k]]
    dcg = dcg_at_k(gains, k)

    # ideal gains: sort all candidate items by their true gain desc
    ideal_gains = sorted(true_rel_map.values(), reverse=True)
    idcg = dcg_at_k(ideal_gains, k)
    return (dcg / idcg) if idcg > 0 else 0.0

def evaluate_ranking_metrics(
    model,
    test_df,
    all_data_df,
    domain: str,
    k=10,
    rating_threshold=4.0,
    n_neg_samples=100,
    rng_seed=42,
    graded=False  # <- set True if you want graded gains
):
    """
    Domain-aware Precision@k, Recall@k, MAP@k, NDCG@k.
    - Binary relevance by default: rating >= threshold -> 1, else 0
    - Graded relevance (optional): gain = max(rating - (threshold - 1), 0)
      e.g., threshold=4.0 => 4★->1, 5★->2, else 0
    """
    print(f"\n  Evaluating Ranking Metrics @k={k} for domain = {domain}")
    rs = np.random.RandomState(rng_seed)

    # filter to target domain
    test_dom = test_df[test_df["domain"] == domain].copy()
    hist_dom = all_data_df[all_data_df["domain"] == domain].copy()

    if test_dom.empty:
        print("No test rows for this domain. Skipping.")
        return {
            f'precision_at_{k}': 0.0,
            f'recall_at_{k}': 0.0,
            f'map_at_{k}': 0.0,
            f'ndcg_at_{k}': 0.0
        }

    # candidate pool in this domain
    domain_items = np.unique(hist_dom["item_id"].values)
    # seen items per user in this domain
    seen_by_user = hist_dom.groupby("user_id")["item_id"].apply(set)

    precisions, recalls, aps, ndcgs = [], [], [], []

    for user_id, g in tqdm(test_dom.groupby("user_id"), desc="Calculating Ranking Metrics"):
        # --- build relevance map for this user's test items ---
        if graded:
            # graded gains: only >= threshold contribute, and graded by how far above threshold
            rel_map = {
                iid: max(float(r) - (rating_threshold - 1.0), 0.0)
                for iid, r in zip(g["item_id"].values, g["rating"].values)
            }
        else:
            # binary gains
            rel_map = {
                iid: 1.0 if (float(r) >= rating_threshold) else 0.0
                for iid, r in zip(g["item_id"].values, g["rating"].values)
            }
        # positive (nonzero) items
        rel_items = {iid for iid, gain in rel_map.items() if gain > 0.0}
        if not rel_items:
            continue  # skip users with no relevant items in test

        seen = seen_by_user.get(user_id, set())
        negatives = np.array(list(set(domain_items) - seen))
        if negatives.size == 0:
            continue

        m = min(n_neg_samples, negatives.size)
        neg_samples = rs.choice(negatives, size=m, replace=False)

        # candidate set = relevant test items + sampled negatives
        items_to_rank = np.array(list(rel_map.keys()))
        items_to_rank = np.concatenate([items_to_rank, neg_samples])

        pred_df = pd.DataFrame({"user_id": user_id, "item_id": items_to_rank})
        pred_df["score"] = model.predict_dataframe(pred_df)
        pred_df = pred_df.sort_values("score", ascending=False)
        top_k = pred_df["item_id"].values[:k]

        # --- Precision@k / Recall@k ---
        hit_set = set(top_k).intersection(rel_items)
        precisions.append(len(hit_set) / k)
        recalls.append(len(hit_set) / len(rel_items))

        # --- MAP@k ---
        ap, hits = 0.0, 0
        for rank, item in enumerate(top_k, start=1):
            if item in rel_items:
                hits += 1
                ap += hits / rank
        aps.append(ap / len(rel_items))

        # --- NDCG@k ---
        ndcgs.append(ndcg_at_k(top_k.tolist(), rel_map, k))

    out = {
        f"precision_at_{k}": float(np.mean(precisions)) if precisions else 0.0,
        f"recall_at_{k}": float(np.mean(recalls)) if recalls else 0.0,
        f"map_at_{k}": float(np.mean(aps)) if aps else 0.0,
        f"ndcg_at_{k}": float(np.mean(ndcgs)) if ndcgs else 0.0,
    }

    print(f"\n--- Ranking Evaluation Results (domain={domain}, k={k}) ---")
    print(f"   Precision@{k}: {out[f'precision_at_{k}']:.4f}")
    print(f"   Recall@{k}:    {out[f'recall_at_{k}']:.4f}")
    print(f"   MAP@{k}:       {out[f'map_at_{k}']:.4f}")
    print(f"   NDCG@{k}:      {out[f'ndcg_at_{k}']:.4f}")
    print("-----------------------------------------")
    return out

In [20]:
# --- Training the PyTorch Model ---
print("\nTraining PyTorch Matrix Factorization on Movies domain:")
movies_train = train_df[train_df["domain"] == "Movies_and_TV"]
movies_val = val_df[val_df["domain"] == "Movies_and_TV"]
movies_test = test_df[test_df["domain"] == "Movies_and_TV"]

n_total_users = encoded_df["user_id"].max() + 1
n_movies_items = encoded_df[encoded_df["domain"] == "Movies_and_TV"]["item_id"].max() + 1

mf_torch_model = MFModel(
    n_embeddings=8,
    n_epochs=50,
    lr=0.05,
    weight_decay=5e-3,
    batch_size=1024,
    device=DEVICE
)

mf_torch_model.train(movies_train, movies_val, n_total_users, n_movies_items)


Training PyTorch Matrix Factorization on Movies domain:

 Training PyTorch Matrix Factorization Model...
   Device: cuda
   Factors: 8, Epochs: 50, LR: 0.05


Epoch 1/50: 100%|██████████| 42/42 [00:01<00:00, 29.83it/s, Train RMSE=3.1449]


Epoch 1/50 - Train RMSE: 3.1449, Val RMSE: 2.0323


Epoch 2/50: 100%|██████████| 42/42 [00:00<00:00, 43.46it/s, Train RMSE=1.7336]


Epoch 2/50 - Train RMSE: 1.7336, Val RMSE: 1.4117


Epoch 3/50: 100%|██████████| 42/42 [00:00<00:00, 51.16it/s, Train RMSE=1.2476]


Epoch 3/50 - Train RMSE: 1.2476, Val RMSE: 1.2007


Epoch 4/50: 100%|██████████| 42/42 [00:00<00:00, 43.80it/s, Train RMSE=1.1124]


Epoch 4/50 - Train RMSE: 1.1124, Val RMSE: 1.1742


Epoch 5/50: 100%|██████████| 42/42 [00:00<00:00, 47.20it/s, Train RMSE=1.0938]


Epoch 5/50 - Train RMSE: 1.0938, Val RMSE: 1.1746


Epoch 6/50: 100%|██████████| 42/42 [00:00<00:00, 45.61it/s, Train RMSE=1.0926]


Epoch 6/50 - Train RMSE: 1.0926, Val RMSE: 1.1766


Epoch 7/50: 100%|██████████| 42/42 [00:01<00:00, 39.85it/s, Train RMSE=1.0921]


Epoch 7/50 - Train RMSE: 1.0921, Val RMSE: 1.1763


Epoch 8/50: 100%|██████████| 42/42 [00:01<00:00, 39.92it/s, Train RMSE=1.0897]


Epoch 8/50 - Train RMSE: 1.0897, Val RMSE: 1.1744


Epoch 9/50: 100%|██████████| 42/42 [00:00<00:00, 45.38it/s, Train RMSE=1.0878]


Epoch 9/50 - Train RMSE: 1.0878, Val RMSE: 1.1733


Epoch 10/50: 100%|██████████| 42/42 [00:01<00:00, 39.40it/s, Train RMSE=1.0890]


Epoch 10/50 - Train RMSE: 1.0890, Val RMSE: 1.1733


Epoch 11/50: 100%|██████████| 42/42 [00:00<00:00, 45.20it/s, Train RMSE=1.0881]


Epoch 11/50 - Train RMSE: 1.0881, Val RMSE: 1.1732


Epoch 12/50: 100%|██████████| 42/42 [00:01<00:00, 39.64it/s, Train RMSE=1.0892]


Epoch 12/50 - Train RMSE: 1.0892, Val RMSE: 1.1747


Epoch 13/50: 100%|██████████| 42/42 [00:01<00:00, 39.71it/s, Train RMSE=1.0883]


Epoch 13/50 - Train RMSE: 1.0883, Val RMSE: 1.1731


Epoch 14/50: 100%|██████████| 42/42 [00:00<00:00, 45.41it/s, Train RMSE=1.0891]


Epoch 14/50 - Train RMSE: 1.0891, Val RMSE: 1.1742


Epoch 15/50: 100%|██████████| 42/42 [00:00<00:00, 44.91it/s, Train RMSE=1.0835]


Epoch 15/50 - Train RMSE: 1.0835, Val RMSE: 1.1732


Epoch 16/50: 100%|██████████| 42/42 [00:01<00:00, 39.49it/s, Train RMSE=1.0842]


Epoch 16/50 - Train RMSE: 1.0842, Val RMSE: 1.1739


Epoch 17/50: 100%|██████████| 42/42 [00:01<00:00, 39.52it/s, Train RMSE=1.0845]


Epoch 17/50 - Train RMSE: 1.0845, Val RMSE: 1.1738


Epoch 18/50: 100%|██████████| 42/42 [00:00<00:00, 45.69it/s, Train RMSE=1.0808]


Epoch 18/50 - Train RMSE: 1.0808, Val RMSE: 1.1741


Epoch 19/50: 100%|██████████| 42/42 [00:01<00:00, 39.71it/s, Train RMSE=1.0807]


Epoch 19/50 - Train RMSE: 1.0807, Val RMSE: 1.1741


Epoch 20/50: 100%|██████████| 42/42 [00:00<00:00, 46.19it/s, Train RMSE=1.0809]


Epoch 20/50 - Train RMSE: 1.0809, Val RMSE: 1.1742


Epoch 21/50: 100%|██████████| 42/42 [00:00<00:00, 46.78it/s, Train RMSE=1.0786]


Epoch 21/50 - Train RMSE: 1.0786, Val RMSE: 1.1740


Epoch 22/50: 100%|██████████| 42/42 [00:01<00:00, 40.46it/s, Train RMSE=1.0786]


Epoch 22/50 - Train RMSE: 1.0786, Val RMSE: 1.1739


Epoch 23/50: 100%|██████████| 42/42 [00:01<00:00, 40.10it/s, Train RMSE=1.0787]


Epoch 23/50 - Train RMSE: 1.0787, Val RMSE: 1.1740


Epoch 24/50: 100%|██████████| 42/42 [00:00<00:00, 45.28it/s, Train RMSE=1.0773]


Epoch 24/50 - Train RMSE: 1.0773, Val RMSE: 1.1739


Epoch 25/50: 100%|██████████| 42/42 [00:01<00:00, 39.62it/s, Train RMSE=1.0774]


Epoch 25/50 - Train RMSE: 1.0774, Val RMSE: 1.1739


Epoch 26/50: 100%|██████████| 42/42 [00:00<00:00, 46.77it/s, Train RMSE=1.0774]


Epoch 26/50 - Train RMSE: 1.0774, Val RMSE: 1.1740


Epoch 27/50: 100%|██████████| 42/42 [00:01<00:00, 40.09it/s, Train RMSE=1.0766]


Epoch 27/50 - Train RMSE: 1.0766, Val RMSE: 1.1741


Epoch 28/50: 100%|██████████| 42/42 [00:01<00:00, 39.63it/s, Train RMSE=1.0767]


Epoch 28/50 - Train RMSE: 1.0767, Val RMSE: 1.1739


Epoch 29/50: 100%|██████████| 42/42 [00:00<00:00, 45.29it/s, Train RMSE=1.0767]


Epoch 29/50 - Train RMSE: 1.0767, Val RMSE: 1.1739


Epoch 30/50: 100%|██████████| 42/42 [00:00<00:00, 45.54it/s, Train RMSE=1.0763]


Epoch 30/50 - Train RMSE: 1.0763, Val RMSE: 1.1740


Epoch 31/50: 100%|██████████| 42/42 [00:01<00:00, 39.76it/s, Train RMSE=1.0763]


Epoch 31/50 - Train RMSE: 1.0763, Val RMSE: 1.1740


Epoch 32/50: 100%|██████████| 42/42 [00:01<00:00, 39.76it/s, Train RMSE=1.0763]


Epoch 32/50 - Train RMSE: 1.0763, Val RMSE: 1.1740


Epoch 33/50: 100%|██████████| 42/42 [00:00<00:00, 45.51it/s, Train RMSE=1.0761]


Epoch 33/50 - Train RMSE: 1.0761, Val RMSE: 1.1740


Epoch 34/50: 100%|██████████| 42/42 [00:01<00:00, 38.42it/s, Train RMSE=1.0761]


Epoch 34/50 - Train RMSE: 1.0761, Val RMSE: 1.1740


Epoch 35/50: 100%|██████████| 42/42 [00:00<00:00, 50.31it/s, Train RMSE=1.0761]


Epoch 35/50 - Train RMSE: 1.0761, Val RMSE: 1.1740


Epoch 36/50: 100%|██████████| 42/42 [00:00<00:00, 45.12it/s, Train RMSE=1.0760]


Epoch 36/50 - Train RMSE: 1.0760, Val RMSE: 1.1740


Epoch 37/50: 100%|██████████| 42/42 [00:01<00:00, 39.02it/s, Train RMSE=1.0760]


Epoch 37/50 - Train RMSE: 1.0760, Val RMSE: 1.1740


Epoch 38/50: 100%|██████████| 42/42 [00:01<00:00, 39.46it/s, Train RMSE=1.0760]


Epoch 38/50 - Train RMSE: 1.0760, Val RMSE: 1.1740


Epoch 39/50: 100%|██████████| 42/42 [00:00<00:00, 45.33it/s, Train RMSE=1.0760]


Epoch 39/50 - Train RMSE: 1.0760, Val RMSE: 1.1740


Epoch 40/50: 100%|██████████| 42/42 [00:01<00:00, 39.88it/s, Train RMSE=1.0760]


Epoch 40/50 - Train RMSE: 1.0760, Val RMSE: 1.1740


Epoch 41/50: 100%|██████████| 42/42 [00:00<00:00, 45.23it/s, Train RMSE=1.0760]


Epoch 41/50 - Train RMSE: 1.0760, Val RMSE: 1.1740


Epoch 42/50: 100%|██████████| 42/42 [00:01<00:00, 39.62it/s, Train RMSE=1.0760]


Epoch 42/50 - Train RMSE: 1.0760, Val RMSE: 1.1740


Epoch 43/50: 100%|██████████| 42/42 [00:01<00:00, 39.52it/s, Train RMSE=1.0760]


Epoch 43/50 - Train RMSE: 1.0760, Val RMSE: 1.1740


Epoch 44/50: 100%|██████████| 42/42 [00:00<00:00, 45.67it/s, Train RMSE=1.0760]


Epoch 44/50 - Train RMSE: 1.0760, Val RMSE: 1.1740


Epoch 45/50: 100%|██████████| 42/42 [00:00<00:00, 44.43it/s, Train RMSE=1.0759]


Epoch 45/50 - Train RMSE: 1.0759, Val RMSE: 1.1740


Epoch 46/50: 100%|██████████| 42/42 [00:01<00:00, 37.54it/s, Train RMSE=1.0759]


Epoch 46/50 - Train RMSE: 1.0759, Val RMSE: 1.1740


Epoch 47/50: 100%|██████████| 42/42 [00:01<00:00, 38.89it/s, Train RMSE=1.0759]


Epoch 47/50 - Train RMSE: 1.0759, Val RMSE: 1.1740


Epoch 48/50: 100%|██████████| 42/42 [00:00<00:00, 51.33it/s, Train RMSE=1.0759]


Epoch 48/50 - Train RMSE: 1.0759, Val RMSE: 1.1740


Epoch 49/50: 100%|██████████| 42/42 [00:01<00:00, 39.74it/s, Train RMSE=1.0759]


Epoch 49/50 - Train RMSE: 1.0759, Val RMSE: 1.1740


Epoch 50/50: 100%|██████████| 42/42 [00:00<00:00, 44.81it/s, Train RMSE=1.0759]


Epoch 50/50 - Train RMSE: 1.0759, Val RMSE: 1.1740
  Training complete. Best Val RMSE: 1.1732


In [35]:
ranking_results = evaluate_ranking_metrics(
    model=mf_torch_model,
    test_df=movies_test,
    all_data_df=encoded_df,
    domain="Movies_and_TV",
    k=10,
    rating_threshold=POSITIVE_THRESHOLD,  # 4.0
    n_neg_samples=100
)


  Evaluating Ranking Metrics @k=10 for domain = Movies_and_TV


Calculating Ranking Metrics: 100%|██████████| 910/910 [00:01<00:00, 587.22it/s]


--- Ranking Evaluation Results (domain=Movies_and_TV, k=10) ---
   Precision@10: 0.0138
   Recall@10:    0.1327
   MAP@10:       0.0535
   NDCG@10:      0.0724
-----------------------------------------





## Experiment with Implicit Data
- Use implicit data instead of explicit ratings
- Convert ratings to binary relevance (rating >= 4.0)
- Train the model on implicit feedback

In [21]:
def make_implicit(df, threshold=4.0):
    """Convert explicit ratings to implicit binary feedback."""
    df = df.copy()
    df["label"] = (df["rating"] >= threshold).astype(np.float32)  # 1.0 if rating >= threshold, else 0.0
    return df

implicit_df = make_implicit(encoded_df, 1)
implicit_df["label"].value_counts()

label
1.0    100891
Name: count, dtype: int64

In [22]:
for domain, sub in encoded_df.groupby("domain"):
    print(domain, sub)

Movies_and_TV                                 user        item  rating         domain  \
0       AFIMCCRTD3PWOJY7BAOJMA5C7I4A  B00003CXC3     5.0  Movies_and_TV   
3       AHM2XKCUWJ4GZQNGDGNEVEN7FJYQ  6305313687     4.0  Movies_and_TV   
8       AGXJY52WC5KWDEA3BXTVA5E4YIHQ  B00003CWU3     5.0  Movies_and_TV   
11      AHRGTIMQO47C2VLJILIDU53BQKSA  B00005ALS0     4.0  Movies_and_TV   
13      AH6C7M7CXHOVGKJ2JXDFFDCWOONA  B00003CX5P     5.0  Movies_and_TV   
...                              ...         ...     ...            ...   
100855  AGQBF2Z2HGQUIL5BJAVTNGETLH5A  B08XP4KH56     5.0  Movies_and_TV   
100858  AFPR3Q7LSH25LTCTO2BWZRDAWGMQ  B0B857V4TH     3.0  Movies_and_TV   
100861  AFGLFLHKZQI35Z7UIJWNUCTFSGXQ  B01LW32XQV     5.0  Movies_and_TV   
100868  AHZTJHB7BM7ATQG3FD2O53U2VEPQ  B0B18G8R9B     5.0  Movies_and_TV   
100879  AEK2SIZXOXZJYYLKNBVBULSU3CKA  B095RHJ52R     1.0  Movies_and_TV   

        verified_purchase      timestamp  trust_weight  user_id  item_id  
0         

In [23]:
def create_implicit_splits(df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Split interactions per domain with leave-one-out (latest -> val, 2nd latest -> test, rest -> train)
    independently for each (domain, user).
    """
    temp = df.copy()
    temp["rank_latest"] = (
        temp.sort_values("timestamp", ascending=False)
            .groupby(["domain", "user_id"])["timestamp"]
            .rank(method="first", ascending=False)
    )
    train = temp[temp["rank_latest"] > 2].drop(columns="rank_latest")
    val   = temp[temp["rank_latest"] == 1].drop(columns="rank_latest")
    test  = temp[temp["rank_latest"] == 2].drop(columns="rank_latest")

    print(f"Train set size: {train.shape[0]}")
    print(f"Validation set size: {val.shape[0]}")
    print(f"Test set size: {test.shape[0]}")
    print(f"Common users in train and val: {len(set(train['user_id']).intersection(set(val['user_id'])))}")
    print(f"Common users in train and test: {len(set(train['user_id']).intersection(set(test['user_id'])))}")
    return train.reset_index(drop=True), val.reset_index(drop=True), test.reset_index(drop=True)

In [24]:
new_train_df, new_val_df, new_test_df = create_implicit_splits(implicit_df)

Train set size: 77534
Validation set size: 12487
Test set size: 10870
Common users in train and val: 8935
Common users in train and test: 8935


In [25]:
# For negatives & candidate pools we must exclude ALL seen items (any rating) in-domain:
def seen_and_items_by_domain(df, domain):
    dom_all = df[df["domain"] == domain]
    domain_items = np.array(sorted(dom_all["item_id"].unique()))
    seen_by_user = dom_all.groupby("user_id")["item_id"].apply(set).to_dict()
    return domain_items, seen_by_user

In [26]:
movies_domain_items, movies_seen_by_user = seen_and_items_by_domain(implicit_df, SOURCE_DOMAIN)
movies_domain_items, movies_seen_by_user

(array([   0,    1,    2, ..., 4236, 4237, 4238], shape=(4239,)),
 {0: {102, 611, 1746},
  1: {1665, 3150},
  2: {1113, 2504, 2593, 3092, 3577},
  3: {1081, 1540, 1806, 1827, 2393, 2835, 2950, 3062, 3158, 3179},
  4: {195, 222, 823, 1078, 1276, 1319, 1787, 1819, 1912},
  5: {986, 2402, 2694, 2717, 3169, 3181},
  6: {1334, 1867, 2055, 2345},
  7: {1146,
   1514,
   1709,
   2159,
   2358,
   2688,
   2824,
   2889,
   3178,
   3289,
   3388,
   3393,
   3399,
   3402,
   3418,
   3583,
   3949},
  8: {267, 923, 1003, 1004, 1098, 1577, 1592, 1714, 1879, 2188, 2393, 2764},
  9: {2063},
  10: {1250, 1622, 2100, 2234, 2345, 2815, 2826, 3172},
  11: {393, 468, 590, 632, 924, 1207, 1611, 1920, 2089, 2314, 2671},
  13: {1497, 2411, 2729},
  15: {2688, 3086, 3473, 3583, 3702, 3749, 3946},
  16: {1133,
   1449,
   1700,
   2159,
   2287,
   2489,
   2628,
   2685,
   2693,
   2838,
   2888,
   3062,
   3074,
   3136,
   3172,
   3176,
   3203,
   3253,
   3325},
  17: {28,
   94,
   115,
   137,

In [27]:
class PositiveImplicitDataset(Dataset):
    def __init__(self, df_pos: pd.DataFrame):
        self.user_ids = torch.tensor(df_pos["user_id"].values, dtype=torch.long)
        self.item_ids = torch.tensor(df_pos["item_id"].values, dtype=torch.long)
        self.weights  = torch.tensor(df_pos["trust_weight"].values, dtype=torch.float32)

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, idx):
        return self.user_ids[idx], self.item_ids[idx], self.weights[idx]

In [28]:
class ImplicitBCETrainer:
    def __init__(self, model, domain_items, seen_by_user,
                 n_epochs=20, lr=1e-3, weight_decay=1e-5, batch_size=1024,
                 n_neg=5, pos_weight_scale=1, neg_weight=1,
                 max_grad_norm=5.0, patience=3, device="cpu"):

        self.model = model.to(device)
        self.domain_items = np.array(domain_items)
        self.seen_by_user = seen_by_user
        self.n_epochs = n_epochs
        self.lr = lr
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.n_neg = n_neg
        self.pos_weight_scale = pos_weight_scale
        self.neg_weight = neg_weight
        self.max_grad_norm = max_grad_norm
        self.patience = patience
        self.device = device

        self.loss_fn = nn.BCEWithLogitsLoss(reduction="none")
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, mode="min", factor=0.5, patience=2, min_lr=1e-5
        )

    def _sample_negatives(self, users_np: np.ndarray) -> np.ndarray:
        neg_items = []
        for u in users_np:
            seen = self.seen_by_user.get(int(u), set())
            pool = self.domain_items[~np.isin(self.domain_items, list(seen))]
            if len(pool) == 0:
                pool = self.domain_items
            replace = len(pool) < self.n_neg
            neg_items.append(np.random.choice(pool, size=self.n_neg, replace=replace))
        return np.array(neg_items)

    def train(self, train_df_pos: pd.DataFrame, val_df_pos: pd.DataFrame):
        train_loader = DataLoader(PositiveImplicitDataset(train_df_pos), batch_size=self.batch_size, shuffle=True)
        val_loader   = DataLoader(PositiveImplicitDataset(val_df_pos),   batch_size=self.batch_size, shuffle=False)

        best_val, best_state, bad = float("inf"), None, 0
        for epoch in range(self.n_epochs):
            # ---- train ----
            self.model.train()
            loss_acc, denom = 0.0, 0
            pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{self.n_epochs}")
            for u_pos, i_pos, w_pos in pbar:
                u_pos = u_pos.to(self.device)
                i_pos = i_pos.to(self.device)
                w_pos = w_pos.to(self.device)

                neg_mat = self._sample_negatives(u_pos.cpu().numpy())  # [B, n_neg]
                u_neg = torch.tensor(u_pos.cpu().numpy().repeat(self.n_neg), dtype=torch.long, device=self.device)
                i_neg = torch.tensor(neg_mat.reshape(-1), dtype=torch.long, device=self.device)

                users  = torch.cat([u_pos, u_neg], dim=0)
                items  = torch.cat([i_pos, i_neg], dim=0)
                labels = torch.cat([torch.ones_like(u_pos, dtype=torch.float32),
                                    torch.zeros_like(u_neg, dtype=torch.float32)], dim=0)
                w_neg  = torch.full_like(u_neg, fill_value=self.neg_weight, dtype=torch.float32)
                weights = torch.cat([w_pos * self.pos_weight_scale, w_neg], dim=0).to(self.device)

                self.optimizer.zero_grad()
                logits = self.model(users, items)
                loss_vec = self.loss_fn(logits, labels)
                loss = (weights * loss_vec).sum() / (weights.sum() + 1e-8)
                loss.backward()
                if self.max_grad_norm: nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)
                self.optimizer.step()

                loss_acc += loss.item() * labels.size(0)
                denom += labels.size(0)
                pbar.set_postfix({"Train loss": f"{loss.item():.4f}"})
            train_loss = loss_acc / max(1, denom)

            # ---- validate ----
            self.model.eval()
            with torch.no_grad():
                loss_acc, denom = 0.0, 0
                for u_pos, i_pos, w_pos in val_loader:
                    u_pos = u_pos.to(DEVICE)
                    i_pos = i_pos.to(self.device)
                    w_pos = w_pos.to(self.device)

                    neg_mat = self._sample_negatives(u_pos.cpu().numpy())
                    u_neg = torch.tensor(u_pos.cpu().numpy().repeat(self.n_neg), dtype=torch.long, device=self.device)
                    i_neg = torch.tensor(neg_mat.reshape(-1), dtype=torch.long, device=self.device)

                    users  = torch.cat([u_pos, u_neg], dim=0)
                    items  = torch.cat([i_pos, i_neg], dim=0)
                    labels = torch.cat([torch.ones_like(u_pos, dtype=torch.float32),
                                        torch.zeros_like(u_neg, dtype=torch.float32)], dim=0)
                    w_neg  = torch.full_like(u_neg, fill_value=self.neg_weight, dtype=torch.float32)
                    weights = torch.cat([w_pos * self.pos_weight_scale, w_neg], dim=0).to(self.device)

                    logits = self.model(users, items)
                    loss_vec = self.loss_fn(logits, labels)
                    loss = (weights * loss_vec).sum() / (weights.sum() + 1e-8)

                    loss_acc += loss.item() * labels.size(0)
                    denom += labels.size(0)
            val_loss = loss_acc / max(1, denom)
            self.scheduler.step(val_loss)
            print(f"Epoch {epoch+1}: Train loss={train_loss:.4f} | Val loss={val_loss:.4f}")
            # print(f"Epoch {epoch+1}: train_BCE={train_loss:.4f} | val_BCE={val_loss:.4f}")

            if val_loss < best_val - 1e-4:
                best_val, best_state, bad = val_loss, {k: v.detach().cpu() for k, v in self.model.state_dict().items()}, 0
            else:
                bad += 1
                if bad >= self.patience:
                    print(f"Early stopping at epoch {epoch+1}. Best val loss={best_val:.4f}")
                    break

        if best_state is not None:
            self.model.load_state_dict({k: v.to(self.device) for k, v in best_state.items()})

    # same API your evaluator expects
    def predict_dataframe(self, df: pd.DataFrame) -> np.ndarray:
        self.model.eval()
        users = torch.tensor(df["user_id"].values, dtype=torch.long, device=self.device)
        items = torch.tensor(df["item_id"].values, dtype=torch.long, device=self.device)
        out = []
        with torch.no_grad():
            for i in range(0, len(df), 100_000):
                pu = users[i:i+100_000]; pi = items[i:i+100_000]
                out.append(self.model(pu, pi).detach().cpu().numpy())
        return np.concatenate(out)

In [29]:
class NeuMF(nn.Module):
    def __init__(self, n_users, n_items,
                 factors_gmf=32, factors_mlp=32,
                 mlp_layers=(128,64,32), dropout=0.2, use_bias=True):
        super().__init__()
        # GMF
        self.gmf_user = nn.Embedding(n_users, factors_gmf)
        self.gmf_item = nn.Embedding(n_items, factors_gmf)
        # MLP
        self.mlp_user = nn.Embedding(n_users, factors_mlp)
        self.mlp_item = nn.Embedding(n_items, factors_mlp)
        in_dim = factors_mlp * 2
        layers = []
        for h in mlp_layers:
            layers += [nn.Linear(in_dim, h), nn.ReLU(), nn.Dropout(dropout)]
            in_dim = h
        self.mlp = nn.Sequential(*layers)
        # Combine & predict logits
        pred_in = factors_gmf + (mlp_layers[-1] if mlp_layers else factors_mlp * 2)
        self.pred = nn.Linear(pred_in, 1)

        self.use_bias = use_bias
        if use_bias:
            self.user_bias = nn.Embedding(n_users, 1)
            self.item_bias = nn.Embedding(n_items, 1)
        else:
            self.user_bias = None
            self.item_bias = None

        # init
        for emb in [self.gmf_user, self.gmf_item, self.mlp_user, self.mlp_item]:
            nn.init.normal_(emb.weight, std=0.01)
        for m in self.mlp:
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight); nn.init.zeros_(m.bias)
        nn.init.xavier_uniform_(self.pred.weight); nn.init.zeros_(self.pred.bias)
        if self.use_bias:
            nn.init.zeros_(self.user_bias.weight); nn.init.zeros_(self.item_bias.weight)

    def forward(self, users, items):
        gmf = self.gmf_user(users) * self.gmf_item(items)
        mlp_in = torch.cat([self.mlp_user(users), self.mlp_item(items)], dim=1)
        mlp_out = self.mlp(mlp_in)
        x = torch.cat([gmf, mlp_out], dim=1)
        y = self.pred(x).squeeze(1)
        if self.use_bias:
            y = y + self.user_bias(users).squeeze(1) + self.item_bias(items).squeeze(1)
        return y

In [33]:
movies_new_train = new_train_df[new_train_df["domain"] == SOURCE_DOMAIN]
movies_new_val   = new_val_df[new_val_df["domain"] == SOURCE_DOMAIN]
movies_new_test  = new_test_df[new_test_df["domain"] == SOURCE_DOMAIN]

# Shapes from your implicit prep:
n_users_total  = implicit_df["user_id"].max() + 1
n_movies_items = implicit_df[implicit_df["domain"] == SOURCE_DOMAIN]["item_id"].max() + 1

# Build MF model and trainer
# mf_imp = SimpleMatrixFactorization(n_users_total, n_movies_items, embedding_dim=8)
mf_imp = NeuMF(
    n_users=n_users_total,
    n_items=n_movies_items,
    factors_gmf=8,
    factors_mlp=8,
    mlp_layers=(64, 32),
    dropout=0.2,
    use_bias=True
)

mf_trainer = ImplicitBCETrainer(
    model=mf_imp,
    domain_items=movies_domain_items,
    seen_by_user=movies_seen_by_user,
    n_epochs=100, lr=0.005, weight_decay=5e-3, batch_size=1024,
    n_neg=5, # negatives per positive (training)
    pos_weight_scale=1, # multiplier on positives’ trust_weight for BCE
    neg_weight=1, # multiplier on positives’ trust_weight for BCE
    max_grad_norm=5.0, patience=10, device=DEVICE
)

mf_trainer.train(movies_new_train, movies_new_val)

Epoch 1/100: 100%|██████████| 39/39 [00:06<00:00,  5.80it/s, Train loss=0.4427]


Epoch 1: Train loss=0.4813 | Val loss=0.4404


Epoch 2/100: 100%|██████████| 39/39 [00:06<00:00,  5.71it/s, Train loss=0.4398]


Epoch 2: Train loss=0.4417 | Val loss=0.4414


Epoch 3/100: 100%|██████████| 39/39 [00:06<00:00,  5.74it/s, Train loss=0.4408]


Epoch 3: Train loss=0.4420 | Val loss=0.4414


Epoch 4/100: 100%|██████████| 39/39 [00:07<00:00,  5.44it/s, Train loss=0.4433]


Epoch 4: Train loss=0.4417 | Val loss=0.4415


Epoch 5/100: 100%|██████████| 39/39 [00:06<00:00,  5.68it/s, Train loss=0.4410]


Epoch 5: Train loss=0.4415 | Val loss=0.4414


Epoch 6/100: 100%|██████████| 39/39 [00:06<00:00,  5.80it/s, Train loss=0.4425]


Epoch 6: Train loss=0.4415 | Val loss=0.4414


Epoch 7/100: 100%|██████████| 39/39 [00:06<00:00,  5.81it/s, Train loss=0.4433]


Epoch 7: Train loss=0.4415 | Val loss=0.4414


Epoch 8/100: 100%|██████████| 39/39 [00:06<00:00,  5.73it/s, Train loss=0.4430]


Epoch 8: Train loss=0.4416 | Val loss=0.4414


Epoch 9/100: 100%|██████████| 39/39 [00:06<00:00,  5.98it/s, Train loss=0.4421]


Epoch 9: Train loss=0.4414 | Val loss=0.4414


Epoch 10/100: 100%|██████████| 39/39 [00:06<00:00,  5.60it/s, Train loss=0.4417]


Epoch 10: Train loss=0.4413 | Val loss=0.4414


Epoch 11/100: 100%|██████████| 39/39 [00:06<00:00,  5.80it/s, Train loss=0.4418]


Epoch 11: Train loss=0.4415 | Val loss=0.4414
Early stopping at epoch 11. Best val loss=0.4404


In [34]:
def dcg_at_k(relevances, k):
    r = np.asarray(relevances)[:k]
    if r.size == 0: return 0.0
    discounts = 1.0 / np.log2(np.arange(2, r.size + 2))
    return float(np.sum(r * discounts))

def ndcg_at_k(predicted_items, true_rel_map, k):
    gains = [true_rel_map.get(i, 0.0) for i in predicted_items[:k]]
    dcg = dcg_at_k(gains, k)
    ideal_gains = sorted(true_rel_map.values(), reverse=True)
    idcg = dcg_at_k(ideal_gains, k)
    return (dcg / idcg) if idcg > 0 else 0.0

def precompute_negatives(all_df: pd.DataFrame, test_df: pd.DataFrame, domain: str, n_neg=100, seed=42):
    rs = np.random.RandomState(seed)
    dom_all = all_df[all_df["domain"] == domain]
    dom_items = set(dom_all["item_id"].unique())
    seen_by_user = dom_all.groupby("user_id")["item_id"].apply(set).to_dict()
    negs = {}
    for uid, _ in test_df[test_df["domain"] == domain].groupby("user_id"):
        seen = seen_by_user.get(uid, set())
        pool = np.array(list(dom_items - seen))
        if len(pool) == 0: continue
        m = min(n_neg, len(pool))
        negs[int(uid)] = rs.choice(pool, size=m, replace=False)
    return negs

def evaluate_ranking_metrics(
    model, test_df, all_data_df, domain: str,
    k=10, rating_threshold=4.0, neg_samples=100,
    precomputed_negatives=None, graded=False
):
    print(f"\n📊 Evaluating @k={k} for domain={domain}")
    test_dom = test_df[test_df["domain"] == domain].copy()
    hist_dom = all_data_df[all_data_df["domain"] == domain].copy()
    if test_dom.empty:
        print("No test rows for domain.")
        return {f"precision_at_{k}":0.0, f"recall_at_{k}":0.0, f"map_at_{k}":0.0, f"ndcg_at_{k}":0.0}

    # relevance map per user (binary by default)
    precisions, recalls, aps, ndcgs = [], [], [], []

    # fallback for negatives if not provided
    if precomputed_negatives is None:
        precomputed_negatives = precompute_negatives(all_data_df, test_df, domain, n_neg=neg_samples, seed=SEED)

    for user_id, g in tqdm(test_dom.groupby("user_id"), desc="Ranking"):
        if graded:
            rel_map = {iid: max(float(r) - (rating_threshold - 1.0), 0.0)
                       for iid, r in zip(g["item_id"].values, g["rating"].values)}
        else:
            rel_map = {iid: 1.0 if float(r) >= rating_threshold else 0.0
                       for iid, r in zip(g["item_id"].values, g["rating"].values)}
        rel_items = {iid for iid, gain in rel_map.items() if gain > 0}
        if not rel_items:  # no relevant test items
            continue

        # candidates: relevant test items + fixed negatives
        negs = precomputed_negatives.get(int(user_id), None)
        if negs is None or len(negs) == 0:
            # skip user if no negatives available (rare)
            continue

        items_to_rank = np.concatenate([np.array(list(rel_map.keys()), dtype=np.int64), negs.astype(np.int64)])
        pred_df = pd.DataFrame({"user_id": int(user_id), "item_id": items_to_rank})
        pred_df["score"] = model.predict_dataframe(pred_df)
        pred_df = pred_df.sort_values("score", ascending=False)
        top_k = pred_df["item_id"].values[:k]

        hits = set(top_k).intersection(rel_items)
        precisions.append(len(hits) / k)
        recalls.append(len(hits) / len(rel_items))

        # MAP@k
        ap, got = 0.0, 0
        for rank, it in enumerate(top_k, start=1):
            if it in rel_items:
                got += 1
                ap += got / rank
        aps.append(ap / len(rel_items))

        # NDCG@k
        ndcgs.append(ndcg_at_k(top_k.tolist(), rel_map, k))

    out = {
        f"precision_at_{k}": float(np.mean(precisions)) if precisions else 0.0,
        f"recall_at_{k}":    float(np.mean(recalls))    if recalls    else 0.0,
        f"map_at_{k}":       float(np.mean(aps))        if aps        else 0.0,
        f"ndcg_at_{k}":      float(np.mean(ndcgs))      if ndcgs      else 0.0,
    }
    print(f"Precision@{k}: {out[f'precision_at_{k}']:.4f} | "
          f"Recall@{k}: {out[f'recall_at_{k}']:.4f} | "
          f"MAP@{k}: {out[f'map_at_{k}']:.4f} | "
          f"NDCG@{k}: {out[f'ndcg_at_{k}']:.4f}")
    return out

In [35]:
movies_fixed_negs = precompute_negatives(implicit_df, movies_test, SOURCE_DOMAIN, n_neg=100, seed=SEED)
_ = evaluate_ranking_metrics(
    model=mf_trainer,
    test_df=movies_new_test,
    all_data_df=implicit_df,
    domain=SOURCE_DOMAIN,
    k=10,
    rating_threshold=4.0,
    neg_samples=100,
    precomputed_negatives=movies_fixed_negs,
    graded=False
)


📊 Evaluating @k=10 for domain=Movies_and_TV


Ranking: 100%|██████████| 6106/6106 [00:10<00:00, 592.03it/s]

Precision@10: 0.0252 | Recall@10: 0.2519 | MAP@10: 0.1185 | NDCG@10: 0.1495





In [215]:
import math
import torch
import torch.nn as nn

class NeuMF(nn.Module):
    def __init__(
        self,
        n_users: int,
        n_items: int,
        factors_gmf: int = 32,
        factors_mlp: int = 32,
        mlp_layers=(128, 64, 32),
        dropout: float = 0.2,
        use_bias: bool = True,
    ):
        super().__init__()
        # GMF embeddings
        self.gmf_user = nn.Embedding(n_users, factors_gmf)
        self.gmf_item = nn.Embedding(n_items, factors_gmf)
        # MLP embeddings
        self.mlp_user = nn.Embedding(n_users, factors_mlp)
        self.mlp_item = nn.Embedding(n_items, factors_mlp)

        # MLP tower
        in_dim = factors_mlp * 2
        mlp = []
        for h in mlp_layers:
            mlp += [nn.Linear(in_dim, h), nn.ReLU(), nn.Dropout(dropout)]
            in_dim = h
        self.mlp = nn.Sequential(*mlp)

        # Prediction layer over [GMF ⊕ MLP]
        pred_in = factors_gmf + (mlp_layers[-1] if len(mlp_layers) > 0 else factors_mlp * 2)
        self.pred = nn.Linear(pred_in, 1)

        # Optional biases + global mean (helpful for ratings)
        self.use_bias = use_bias
        if use_bias:
            self.user_bias = nn.Embedding(n_users, 1)
            self.item_bias = nn.Embedding(n_items, 1)
        else:
            self.user_bias = None
            self.item_bias = None
        self.global_mean = nn.Parameter(torch.zeros(1), requires_grad=False)

        # init
        for emb in [self.gmf_user, self.gmf_item, self.mlp_user, self.mlp_item]:
            nn.init.normal_(emb.weight, std=0.01)
        for m in self.mlp:
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight); nn.init.zeros_(m.bias)
        nn.init.xavier_uniform_(self.pred.weight); nn.init.zeros_(self.pred.bias)
        if self.use_bias:
            nn.init.zeros_(self.user_bias.weight); nn.init.zeros_(self.item_bias.weight)

    def forward(self, users, items):
        gmf = self.gmf_user(users) * self.gmf_item(items)  # elementwise product
        mlp_in = torch.cat([self.mlp_user(users), self.mlp_item(items)], dim=1)
        mlp_out = self.mlp(mlp_in)
        x = torch.cat([gmf, mlp_out], dim=1)
        y = self.pred(x).squeeze(1)
        if self.use_bias:
            y = y + self.user_bias(users).squeeze(1) + self.item_bias(items).squeeze(1) + self.global_mean
        return y

In [216]:
from torch.utils.data import DataLoader

class PyTorchNeuMFModel:
    def __init__(
        self,
        factors_gmf=32,
        factors_mlp=32,
        mlp_layers=(128, 64, 32),
        dropout=0.2,
        use_bias=True,
        n_epochs=30,
        lr=1e-3,
        weight_decay=1e-5,
        batch_size=1024,
        max_grad_norm=5.0,
        patience=4,  # for ReduceLROnPlateau-driven convergence
    ):
        self.factors_gmf = factors_gmf
        self.factors_mlp = factors_mlp
        self.mlp_layers = mlp_layers
        self.dropout = dropout
        self.use_bias = use_bias
        self.n_epochs = n_epochs
        self.lr = lr
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.max_grad_norm = max_grad_norm
        self.patience = patience
        self.model = None

    def train(self, train_df, val_df, n_users, n_items):
        print("\n🚀 Training NeuMF (ratings regression with sample-weighted MSE)")
        print(f"   Device: {DEVICE} | GMF:{self.factors_gmf} | MLP:{self.factors_mlp} "
              f"| Layers:{self.mlp_layers} | Dropout:{self.dropout}")

        train_loader = DataLoader(SimpleMFDataset(train_df), batch_size=self.batch_size, shuffle=True)
        val_loader   = DataLoader(SimpleMFDataset(val_df),   batch_size=self.batch_size, shuffle=False)

        self.model = NeuMF(
            n_users, n_items,
            factors_gmf=self.factors_gmf,
            factors_mlp=self.factors_mlp,
            mlp_layers=self.mlp_layers,
            dropout=self.dropout,
            use_bias=self.use_bias
        ).to(DEVICE)

        # set global mean from train ratings
        self.model.global_mean.data.fill_(float(train_df["rating"].mean()))

        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode="min", factor=0.5, patience=2, min_lr=1e-5
        )

        best_val_rmse = float("inf")
        best_state = None
        bad_epochs = 0

        for epoch in range(self.n_epochs):
            # -------- train --------
            self.model.train()
            se_sum, denom = 0.0, 0.0
            pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{self.n_epochs}")
            for users, items, ratings, weights in pbar:
                users = users.to(DEVICE); items = items.to(DEVICE)
                ratings = ratings.to(DEVICE); weights = weights.to(DEVICE)

                optimizer.zero_grad()
                preds = self.model(users, items)

                se = (preds - ratings) ** 2
                loss = (weights * se).sum() / (weights.sum() + 1e-8)
                loss.backward()

                if self.max_grad_norm is not None:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)

                optimizer.step()

                se_sum += se.detach().sum().item()
                denom += ratings.numel()
                pbar.set_postfix({"Train RMSE": f"{math.sqrt(se_sum/denom):.4f}"})

            train_rmse = math.sqrt(se_sum / max(1, denom))

            # -------- validate --------
            self.model.eval()
            with torch.no_grad():
                se_sum, denom = 0.0, 0.0
                for users, items, ratings, _ in val_loader:
                    users = users.to(DEVICE); items = items.to(DEVICE); ratings = ratings.to(DEVICE)
                    preds = self.model(users, items)
                    se_sum += torch.sum((preds - ratings) ** 2).item()
                    denom += ratings.numel()
            val_rmse = math.sqrt(se_sum / max(1, denom))
            scheduler.step(val_rmse)

            print(f"Epoch {epoch+1}/{self.n_epochs} - Train RMSE: {train_rmse:.4f} | Val RMSE: {val_rmse:.4f}")

            # early-stop restore
            if val_rmse < best_val_rmse - 1e-4:
                best_val_rmse = val_rmse
                best_state = {k: v.detach().cpu() for k, v in self.model.state_dict().items()}
                bad_epochs = 0
            else:
                bad_epochs += 1
                if bad_epochs >= self.patience:
                    print(f"Early stopping at epoch {epoch+1}. Best Val RMSE: {best_val_rmse:.4f}")
                    break

        if best_state is not None:
            self.model.load_state_dict({k: v.to(DEVICE) for k, v in best_state.items()})
        print("✅ Training complete! Best Val RMSE:", f"{best_val_rmse:.4f}")

    def predict_dataframe(self, df):
        """df must have user_id, item_id"""
        if self.model is None:
            raise ValueError("Model not trained yet!")
        self.model.eval()

        users = torch.tensor(df["user_id"].values, dtype=torch.long, device=DEVICE)
        items = torch.tensor(df["item_id"].values, dtype=torch.long, device=DEVICE)

        preds = []
        with torch.no_grad():
            for i in range(0, len(df), self.batch_size):
                pu = users[i:i+self.batch_size]
                pi = items[i:i+self.batch_size]
                p = self.model(pu, pi)
                preds.append(p.detach().cpu().numpy())
        return np.concatenate(preds)

In [226]:
print("\nTraining NeuMF on Movies domain:")
movies_train = train_df[train_df["domain"] == "Movies_and_TV"]
movies_val   = val_df[val_df["domain"]   == "Movies_and_TV"]
movies_test  = test_df[test_df["domain"] == "Movies_and_TV"]

n_total_users  = encoded_df["user_id"].max() + 1
n_movies_items = encoded_df[encoded_df["domain"] == "Movies_and_TV"]["item_id"].max() + 1

neumf_model = PyTorchNeuMFModel(
    factors_gmf=16, factors_mlp=16,
    mlp_layers=(64, 32),
    dropout=0.3,
    use_bias=True,
    n_epochs=50,
    lr=5e-3,
    weight_decay=1e-4,
    batch_size=1024,
    max_grad_norm=5.0,
    patience=5
)
neumf_model.train(movies_train, movies_val, n_total_users, n_movies_items)


Training NeuMF on Movies domain:

🚀 Training NeuMF (ratings regression with sample-weighted MSE)
   Device: cuda | GMF:16 | MLP:16 | Layers:(64, 32) | Dropout:0.3


Epoch 1/50: 100%|██████████| 6/6 [00:00<00:00, 31.90it/s, Train RMSE=1.0685]


Epoch 1/50 - Train RMSE: 1.0685 | Val RMSE: 1.0925


Epoch 2/50: 100%|██████████| 6/6 [00:00<00:00, 35.33it/s, Train RMSE=1.0167]


Epoch 2/50 - Train RMSE: 1.0167 | Val RMSE: 1.0479


Epoch 3/50: 100%|██████████| 6/6 [00:00<00:00, 36.43it/s, Train RMSE=0.8932]


Epoch 3/50 - Train RMSE: 0.8932 | Val RMSE: 1.0571


Epoch 4/50: 100%|██████████| 6/6 [00:00<00:00, 38.96it/s, Train RMSE=0.7898]


Epoch 4/50 - Train RMSE: 0.7898 | Val RMSE: 1.0990


Epoch 5/50: 100%|██████████| 6/6 [00:00<00:00, 37.70it/s, Train RMSE=0.7241]


Epoch 5/50 - Train RMSE: 0.7241 | Val RMSE: 1.0497


Epoch 6/50: 100%|██████████| 6/6 [00:00<00:00, 41.46it/s, Train RMSE=0.6882]


Epoch 6/50 - Train RMSE: 0.6882 | Val RMSE: 1.0574


Epoch 7/50: 100%|██████████| 6/6 [00:00<00:00, 35.50it/s, Train RMSE=0.6685]

Epoch 7/50 - Train RMSE: 0.6685 | Val RMSE: 1.0710
Early stopping at epoch 7. Best Val RMSE: 1.0479
✅ Training complete! Best Val RMSE: 1.0479





In [227]:
# Evaluate with your domain-aware metrics (same protocol)
neumf_results = evaluate_ranking_metrics(
    model=neumf_model,
    test_df=movies_test,
    all_data_df=encoded_df,
    domain="Movies_and_TV",
    k=10,
    rating_threshold=POSITIVE_THRESHOLD,  # 4.0
    n_neg_samples=100,
    rng_seed=42,
    graded=False
)
print(neumf_results)


📊 Evaluating Ranking Metrics @k=10 for domain = Movies_and_TV


Calculating Ranking Metrics: 100%|██████████| 910/910 [00:02<00:00, 397.12it/s]


--- Ranking Evaluation Results (domain=Movies_and_TV, k=10) ---
   Precision@10: 0.0130
   Recall@10:    0.1246
   MAP@10:       0.0432
   NDCG@10:      0.0627
-----------------------------------------
{'precision_at_10': 0.013025780189959296, 'recall_at_10': 0.1246042514699231, 'map_at_10': 0.0432475070534772, 'ndcg_at_10': 0.06274462438909985}



