## Recommender Systems Notebook

### Setup & Demo Data

We implement some  common recommender algorithms used in production :

- Popularity baseline
- Content-based TF-IDF
- Item-Item Co-visitation
- Collaborative Filtering (kNN) user-based and item-based
- Collaborative Filtering Matrix Factorization with Tensorflow
- Two-Tower Retrieval with Tensorflow

### Imports

In [45]:
# Numerical computing
import numpy as np

# Data handling
import pandas as pd

# For clean "struct-like" models (optional)
from dataclasses import dataclass

# Typing clarity (optional but good practice)
from typing import Dict, List, Tuple, Optional, Callable

# Useful for co-visitation counting
from collections import defaultdict, Counter

# Content-based TF-IDF
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from twisted.python.reflect import prefixedMethods

# Reproducibility
np.random.seed(42)


### Demo dataset generator

In [46]:
def make_demo_data(
        n_users: int = 30,
        n_items: int = 60,
        n_categories: int = 6,
        ratings_per_user: int = 12,
        session_len: int = 8,
        seed: int = 42,
):
    """
    Creates a toy dataset that behaves like a real recommendation dataset.

    Users:
      - each user prefers one category

    Items:
      - each item belongs to one category
      - each item has a text description (category-specific keywords)

    Ratings:
      - user gives higher ratings to items in their preferred category

    Sessions:
      - implicit sequences of interacted items (mostly from preferred category)
    """

    rng = np.random.RandomState(seed)

    # Assign each iteam a category ID
    item_category = rng.randint(0, n_categories, size=n_items)

    # Assign each user a preferred category
    user_pref = rng.randint(0, n_categories, size=n_users)

    # Words to generate item descriptions per category
    category_words = {
        0: [  # Action / Adventure
            "action", "fast", "adventure", "hero", "battle", "explosion",
            "chase", "mission", "fight", "weapon", "danger", "rescue"
        ],
        1: [  # Romance / Drama
            "romance", "love", "drama", "heart", "relationship", "emotion",
            "passion", "kiss", "betrayal", "wedding", "tearful", "affection"
        ],
        2: [  # Sci-Fi
            "scifi", "space", "future", "alien", "robot", "technology",
            "galaxy", "time", "experiment", "spaceship", "cyber", "planet"
        ],
        3: [  # Comedy
            "comedy", "funny", "joke", "laugh", "humor", "awkward",
            "satire", "parody", "prank", "clumsy", "ridiculous", "smile"
        ],
        4: [  # Horror
            "horror", "scary", "ghost", "dark", "monster", "fear",
            "nightmare", "blood", "curse", "haunted", "evil", "scream"
        ],
        5: [  # Documentary
            "documentary", "history", "facts", "nature", "real",
            "science", "culture", "wildlife", "investigation", "education",
            "truth", "archive"
        ],
    }

    # Create item text and titles
    item_text: Dict[int, str] = {}
    item_title: Dict[int, str] = {}
    for i in range(n_items):
        category = int(item_category[i])
        words = category_words[category]

        desc = " ".join(rng.choice(words, size=5, replace=False))

        item_text[i] = desc
        item_title[i] = f"Item_{i:02d}_Category_{category:02d}"

    # Build ratings as a list of (user_id,item_id,rating)
    ratings: List[Tuple[int, int, float]] = []
    for u in range(n_users):
        # Items in user's preferred category
        preferred_items = np.where(item_category == user_pref[u])[0]

        # Items Not in preferred category
        other_items = np.where(item_category != user_pref[u])[0]

        # Choose ~70% from preferred and ~30% from others
        n_pref = int(ratings_per_user * 0.7)
        n_other = ratings_per_user - n_pref

        # Choose without replacement
        chosen_pref = rng.choice(preferred_items, size=min(n_pref, len(preferred_items)), replace=False)
        chosen_other = rng.choice(other_items, size=min(n_other, len(other_items)), replace=False)

        chosen = np.concatenate((chosen_pref, chosen_other))
        rng.shuffle(chosen)

        for item_id in chosen:
            # Base rating is higher if matches preference
            base = 4.2 if item_category[item_id] == user_pref[u] else 2.8

            # Add Gaussian noise and clip into [1...5]
            r = np.clip(rng.normal(base, 0.6), 1.0, 5.0)

            # Round to 0.1 to look more realistic
            ratings.append((u, int(item_id), float(np.round(r, 1))))

    # Build the sessions ( view/click sequences)

    sessions: List[List[int]] = []

    for u in range(n_users):
        # sample from preferred category with replacement ( views  can repeat )
        pref_items = np.where(item_category == user_pref[u])[0]
        session = rng.choice(pref_items, size=session_len, replace=True)
        sessions.append(session)

    # Item metadata table
    items_df = pd.DataFrame({
        "item_id": np.arange(n_items),
        "title": [item_title[i] for i in range(n_items)],
        "category": item_category,
        "description": [item_text[i] for i in range(n_items)],
    })

    # Ratings dataframe
    ratings_df = pd.DataFrame(
        ratings,
        columns=["user_id", "item_id", "rating"]
    )

    return ratings_df, sessions, items_df, item_text

In [47]:
ratings_df, sessions, items_df, item_text = make_demo_data()

In [48]:
ratings_df.head(10)

Unnamed: 0,user_id,item_id,rating
0,0,39,4.4
1,0,32,5.0
2,0,47,2.6
3,0,24,2.4
4,0,40,3.2
5,0,59,4.9
6,0,7,2.9
7,0,0,4.2
8,0,38,5.0
9,0,50,3.6


In [49]:
items_df.head(10)

Unnamed: 0,item_id,title,category,description
0,0,Item_00_Category_03,3,smile joke awkward comedy ridiculous
1,1,Item_01_Category_04,4,fear scary haunted monster ghost
2,2,Item_02_Category_02,2,technology experiment cyber alien spaceship
3,3,Item_03_Category_04,4,scream haunted evil dark ghost
4,4,Item_04_Category_04,4,blood dark scary curse evil
5,5,Item_05_Category_01,1,wedding relationship love emotion heart
6,6,Item_06_Category_02,2,experiment scifi robot space spaceship
7,7,Item_07_Category_02,2,cyber planet scifi galaxy space
8,8,Item_08_Category_02,2,technology time space galaxy future
9,9,Item_09_Category_04,4,haunted fear scream nightmare blood


###

### Train/Test Split & Metrics

- For each user keep 1 rating as a test , the rest remains for training

In [50]:
def leave_last_one_out_split(ratings: pd.DataFrame, seed: int = 42) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    For each user, hold out 1 interaction for test.
    """
    train_parts = []  # list of train chunks for each user
    test_rows = []    # list of single held-out rows for each user

    # Group ratings by user
    for user_id, group in ratings.groupby("user_id"):
        # Shuffle this user's ratings so "last one out" isn't biased by item_id ordering
        group = group.sample(frac=1.0, random_state=seed)

        # Last row becomes test
        test_rows.append(group.iloc[-1])

        # All except last become train
        train_parts.append(group.iloc[:-1])

    train_df = pd.concat(train_parts).reset_index(drop=True)
    test_df = pd.DataFrame(test_rows).reset_index(drop=True)
    return train_df, test_df


def precision_recall_at_k(recs: List[int], relevant: set, k: int) -> Tuple[float, float]:
    """
    Compute precision@k and recall@k.

    - precision@k: fraction of recommended items (top k) that are relevant
    - recall@k: fraction of relevant items that appear in top k
    """
    top_k = recs[:k]                            # keep only top-k recommendations
    hits = sum(1 for x in top_k if x in relevant)  # count matches with relevant set

    precision = hits / k
    recall = hits / max(1, len(relevant))
    return precision, recall


def evaluate_model(
    recommend_fn: Callable[[int, set, int], List[int]],
    train_df: pd.DataFrame,
    test_df: pd.DataFrame,
    k: int = 10,
    name: str = "model"
):
    """
    Evaluates a recommender function using leave-last-one-out test.

    recommend_fn signature: (user_id, seen_set, k) -> list[item_id]
    """
    # Build helper dicts for speed:
    # - seen items per user (from train)
    seen_by_user = train_df.groupby("user_id")["item_id"].apply(set).to_dict()

    # - test relevant item per user (from test)
    relevant_by_user = test_df.groupby("user_id")["item_id"].apply(list).to_dict()

    precisions = []
    recalls = []

    for u, rel_items in relevant_by_user.items():
        relevant_set = set(rel_items)             # relevant items (here only 1)
        seen_set = seen_by_user.get(u, set())     # seen in training

        recs = recommend_fn(u, seen_set, k)       # top-k recommendations
        p, r = precision_recall_at_k(recs, relevant_set, k)

        precisions.append(p)
        recalls.append(r)

    print(f"\n{name} @ {k}")
    print(f"Precision@{k}: {np.mean(precisions):.3f}")
    print(f"Recall@{k}:    {np.mean(recalls):.3f}")


def show_recommendations(user_id: int, recs: List[int], title: str):
    """Prints recommended item titles and categories."""
    print(f"\n{title} (user={user_id})")
    display(items_df.set_index("item_id").loc[recs][["title", "category"]])





In [51]:
# Split the data
train_df, test_df = leave_last_one_out_split(ratings_df)

# Useful global counts
n_users = int(ratings_df["user_id"].max() + 1)
n_items = int(ratings_df["item_id"].max() + 1)

print("Train size:", len(train_df), "Test size:", len(test_df))

Train size: 328 Test size: 30


### Popularity Baseline Algorithm


In [52]:
class PopularityRecommender:
    """
    Recommends items based on global popularity (interaction count).
    Very common baseline and fallback in production.
    """

    def __init__(self):
        self.ranked_items : List[int] = []

    def fit(self,train_df:pd.DataFrame):
        # How much an item appears in train
        counts = train_df['item_id'].value_counts()

        self.ranked_items = [int(item_id) for item_id in counts.index]
    def recommend(self,user_id:int , seen:set,k:int) -> List[int]:
        recs = []

        for item_id in self.ranked_items:
            if item_id not in seen:
                recs.append(item_id)
            if len(recs) == k:
                break
        return recs

pop = PopularityRecommender()
pop.fit(train_df)

u_demo = 0
seen_demo = set(train_df[train_df.user_id == u_demo].item_id)
recs_demo = pop.recommend(u_demo, seen_demo, k=10)
show_recommendations(u_demo, recs_demo, "Popularity recommendations")

# Evaluate
evaluate_model(lambda u, seen, k: pop.recommend(u, seen, k), train_df, test_df, k=10, name="Popularity")


Popularity recommendations (user=0)


Unnamed: 0_level_0,title,category
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1
34,Item_34_Category_05,5
17,Item_17_Category_05,5
35,Item_35_Category_05,5
18,Item_18_Category_01,1
16,Item_16_Category_05,5
56,Item_56_Category_01,1
12,Item_12_Category_05,5
5,Item_05_Category_01,1
36,Item_36_Category_05,5
52,Item_52_Category_01,1



Popularity @ 10
Precision@10: 0.030
Recall@10:    0.300


Those numbers mean: with the **Most Popular** baseline, **only ~3% of the top-10 recommendations are correct** on average (≈0.3 “hits” per user), but it still manages to include each user’s **one held-out test item** in the top-10 for about **30% of users** (that’s what Recall@10 = 0.300 means in your leave-one-out setup).
