#### Popularity-based ranking by average rating
Each entry of rating_matrix is either 1-4 or 0. 
1. we compute average rating for each item if the entry is not empty(i.e., per column)
2. Replace NaN (items with no ratings) with 0
3. sorting to find out the POPULARITY

In [None]:
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)
# 500 users × 20 items rating matrix (0 = unrated, 1–4 = rating)
rating_matrix = np.random.choice([0, 1, 2, 3, 4], size=(500, 20),
                                 p=[0.8, 0.05, 0.05, 0.05, 0.05])

# ------------------------------------------------------------------
# POPULARITY-BASED RECOMMENDER
# ------------------------------------------------------------------
def popularity_recommender(rating_matrix, top_k=5):
    """
    Recommends the top-k most popular items based on average rating.
    Ignores unrated (0) entries when computing averages.
    """
    # Compute average rating per item, excluding zeros
    item_averages = np.mean(rating_matrix, axis=0, where=(rating_matrix > 0))
    
    # Replace NaN (items with no ratings) with 0
    item_averages = np.nan_to_num(item_averages, nan=0.0)
    
    # Sort items by average rating (descending)
    top_indices = np.argsort(item_averages)[::-1][:top_k]
    top_scores  = item_averages[top_indices]
    
    return top_indices, top_scores


# ------------------------------------------------------------------
# Top-5 recommendations
# ------------------------------------------------------------------
top_k = 5
rec_items, rec_scores = popularity_recommender(rating_matrix, top_k)

print(f"Popularity-Based Top-{top_k} Items (Global):")
for rank, (item, score) in enumerate(zip(rec_items, rec_scores), 1):
    print(f"Rank {rank}: Item {item+1} (Avg Rating: {score:.2f})")


# ------------------------------------------------------------------
# Personalized view for User 1 (only show unrated items)
# ------------------------------------------------------------------
user_id = 0
user_ratings = rating_matrix[user_id]

print(f"\nUser {user_id+1}'s current ratings: {user_ratings}")
print(f"Recommendations for User {user_id+1} (unrated only):")
for item, score in zip(rec_items, rec_scores):
    if user_ratings[item] == 0:
        print(f"→ Item {item+1} (Popularity: {score:.2f})")
    else:
        print(f"  Item {item+1} (already rated {user_ratings[item]}) → skipped")

#### Item-based Collaborative Filtering
The similarity matrix item_similarity is of shape (20, 20) (since there are 20 items).
Each entry item_similarity[i, j] represents how similar item i is to item j.
Then,
1. Identify the target user’s unrated items: indices of items not yet rated
2. For each unrated item, predict what the user would rate it
For a given unrated item i, the prediction uses:
  a. The user’s historical ratings on other items
  b. The similarity between item i and those rated items
  c. using the weighted average to prediction for unrated items.
3.  Sort the predicted ratings in descending order and pick the top-k highest-scoring unrated items

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Set random seed for reproducibility
np.random.seed(42)
# 500 users × 20 items rating matrix (0 = unrated, 1–4 = actual rating)
rating_matrix = np.random.choice([0, 1, 2, 3, 4], size=(500, 20),
                                 p=[0.8, 0.05, 0.05, 0.05, 0.05])

# ------------------------------------------------------------------
# ITEM-BASED COLLABORATIVE FILTERING
# ------------------------------------------------------------------
def item_based_collaborative_filtering(rating_matrix, user_id, top_k=5):
    """
    Recommends top-k unrated items for a user using item-item similarity.
    """
    # 1. Compute item-item cosine similarity (transpose matrix → items as rows)
    rating_matrix_nan = np.where(rating_matrix == 0, np.nan, rating_matrix)
    item_similarity = cosine_similarity(np.nan_to_num(rating_matrix_nan.T, nan=0.0))

    # 2. Get target user's ratings and find unrated items
    user_ratings = rating_matrix[user_id]
    unrated_items = np.where(user_ratings == 0)[0]

    # 3. Predict rating for each unrated item
    predicted_ratings = []
    for item in unrated_items:
        rated_items = np.where(user_ratings > 0)[0]      # items user has rated
        if len(rated_items) == 0:
            predicted_ratings.append(0)
            continue

        sim_scores = item_similarity[item, rated_items]  # similarity to user's rated items
        user_rated_values = user_ratings[rated_items]

        # Weighted average: sum(sim * rating) / sum(|sim|)
        if np.sum(np.abs(sim_scores)) == 0:
            pred = 0.0
        else:
            pred = np.sum(sim_scores * user_rated_values) / np.sum(np.abs(sim_scores))
        predicted_ratings.append(pred)

    # 4. Return top-k highest-predicted unrated items
    if len(unrated_items) == 0:
        return np.array([]), np.array([])

    top_indices = np.argsort(predicted_ratings)[::-1][:min(top_k, len(unrated_items))]
    recommended_items = unrated_items[top_indices]
    recommended_scores = np.array(predicted_ratings)[top_indices]

    return recommended_items, recommended_scores


# ------------------------------------------------------------------
# Recommend top-5 items for User 1
# ------------------------------------------------------------------
user_id = 0
top_k = 5
rec_items, rec_scores = item_based_collaborative_filtering(rating_matrix, user_id, top_k)

print(f"Item-Based CF Recommendations for User {user_id+1} (Top-{top_k}):")
print(f"User {user_id+1}'s ratings: {rating_matrix[user_id]}")
if len(rec_items) > 0:
    for rank, (item, score) in enumerate(zip(rec_items, rec_scores), 1):
        print(f"Rank {rank}: Item {item+1} (Predicted: {score:.3f})")
else:
    print("No recommendations (all items rated or no rated items).")

#### User-based Collaborative Filtering
The similarity matrix user_similarity is of shape (500, 500) (since there are 500 users).
Each entry of user_similarity[i, j] represents how similar item i is to item j.
Then,
1. Identify the target user’s unrated items: indices of items not yet rated
2. For each unrated item, predict what the user would rate it
For a given unrated item i, the prediction uses:
  a. The user’s historical ratings on other items
  b. The similarity between the target user and other users who rated item i ***
  c. using the weighted average to prediction for unrated items.
3.  Sort the predicted ratings in descending order and pick the top-k highest-scoring unrated items

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Set random seed for reproducibility
np.random.seed(42)

# 500 users × 20 items rating matrix (0 = unrated, 1-4 = rating)
rating_matrix = np.random.choice([0, 1, 2, 3, 4], size=(500, 20),
                                 p=[0.8, 0.05, 0.05, 0.05, 0.05])

# ------------------------------------------------------------------
# USER-BASED COLLABORATIVE FILTERING
# ------------------------------------------------------------------
def user_based_collaborative_filtering(rating_matrix, user_id, top_k=5):
    """
    Predict ratings for a target user using user-user similarity.
    Returns top-k unrated items with their predicted scores.
    """
    # ------------------------------------------------------------------
    # 1. Compute user-user similarity (cosine) – ignore missing ratings
    # ------------------------------------------------------------------
    # Replace 0s with NaN so that cosine_similarity skips them
    rating_matrix_nan = np.where(rating_matrix == 0, np.nan, rating_matrix)
    # cosine_similarity works on rows → user similarity matrix
    user_similarity = cosine_similarity(np.nan_to_num(rating_matrix_nan, nan=0.0))

    # ------------------------------------------------------------------
    # 2. Gather data for the target user
    # ------------------------------------------------------------------
    target_ratings = rating_matrix[user_id]               # ratings of user_id
    unrated_items  = np.where(target_ratings == 0)[0]     # items to predict

    # ------------------------------------------------------------------
    # 3. Predict rating for each unrated item
    # ------------------------------------------------------------------
    predicted_ratings = []
    for item_j in unrated_items:
        # users that have rated item_j
        users_who_rated = np.where(rating_matrix[:, item_j] > 0)[0]

        if len(users_who_rated) == 0:               # no one rated it → cannot predict
            predicted_ratings.append(0.0)
            continue

        # similarity of target user to those who rated item_j
        sim_scores = user_similarity[user_id, users_who_rated]
        # their actual ratings for item_j
        neighbor_ratings = rating_matrix[users_who_rated, item_j]

        # Weighted average (absolute similarity as weight)
        if np.sum(np.abs(sim_scores)) == 0:
            pred = 0.0
        else:
            pred = np.sum(sim_scores * neighbor_ratings) / np.sum(np.abs(sim_scores))
        predicted_ratings.append(pred)

    # ------------------------------------------------------------------
    # Rank and return top-k recommendations
    # ------------------------------------------------------------------
    if len(unrated_items) == 0:
        return np.array([]), np.array([])

    top_indices = np.argsort(predicted_ratings)[::-1][:min(top_k, len(unrated_items))]
    recommended_items = unrated_items[top_indices]
    recommended_scores = np.array(predicted_ratings)[top_indices]

    return recommended_items, recommended_scores


# ------------------------------------------------------------------
# Recommend top-5 items for User 1 (index 0)
# ------------------------------------------------------------------
user_id = 0
top_k   = 5
rec_items, rec_scores = user_based_collaborative_filtering(rating_matrix, user_id, top_k)

print(f"\nUser-Based Collaborative Filtering Recommendations for User {user_id+1} (Top-{top_k}):")
print(f"User {user_id+1}'s current ratings: {rating_matrix[user_id]}")
if len(rec_items) > 0:
    for rank, (item, score) in enumerate(zip(rec_items, rec_scores), 1):
        print(f"Rank {rank}: Item {item+1} (Predicted Rating: {score:.3f})")
else:
    print(f"No unrated items for User {user_id+1}.")

### Content-Based Recommender Systems
The user profile is a feature vector that summarizes what the user likes, built from the features of rated items, weighted by their ratings.
Measures cosine similarity between user profile and every item.
  - The "content" is in the item_features matrix. "content" refers to descriptive attributes or features of the items
  - item_features is the content whose Each row = one item and Each column = one content feature
  - How does the user profile relate to content?
    - The user profile is built entirely from item content (i.e., item_features of items the user rated)
    - the user profile is a content-based preference vector
Cosine similarity is just the method used to compare content — the actual content is encoded in the feature vectors of the items    
Cosine similarity = good for comparing direction (i.e., preference alignment), not magnitude
  - how aligned two content vectors (item_features and user profile) are in direction

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Set random seed for reproducibility
np.random.seed(42)

# 500 users × 20 items rating matrix (0 = unrated, 1–4 = rating)
rating_matrix = np.random.choice([0, 1, 2, 3, 4], size=(500, 20),
                                 p=[0.8, 0.05, 0.05, 0.05, 0.05])

# Item features: 20 items × 5 binary genre features
item_features = np.random.choice([0, 1], size=(20, 5), p=[0.4, 0.6])
genres = ["Action", "Comedy", "Drama", "Sci-Fi", "Romance"]

# Show sample item genres
print("Sample Item Features (Item ID: Genres):")
for i in range(3):
    item_genres = [genres[j] for j in range(5) if item_features[i, j] == 1]
    print(f"Item {i+1}: {item_genres}")

# ------------------------------------------------------------------
# CONTENT-BASED RECOMMENDER
# ------------------------------------------------------------------
def build_user_profile(user_ratings, item_features):
    """Create user profile as weighted average of rated item features."""
    rated_mask = user_ratings > 0
    if not np.any(rated_mask):
        return np.zeros(item_features.shape[1])
    weights = user_ratings[rated_mask]
    profile = np.average(item_features[rated_mask], axis=0, weights=weights)
    return profile

def content_based_recommender(rating_matrix, item_features, user_id, top_k=5):
    """Recommend unrated items similar to user's profile."""
    user_ratings = rating_matrix[user_id]
    user_profile = build_user_profile(user_ratings, item_features)

    # Cosine similarity between user profile and all items
    similarities = cosine_similarity([user_profile], item_features)[0]

    # Recommend only unrated items
    unrated_items = np.where(user_ratings == 0)[0]
    if len(unrated_items) == 0:
        return np.array([]), np.array([])

    unrated_sim = similarities[unrated_items]
    top_idx = np.argsort(unrated_sim)[::-1][:top_k]
    rec_items = unrated_items[top_idx]
    rec_scores = unrated_sim[top_idx]

    return rec_items, rec_scores


# ------------------------------------------------------------------
# Recommend top-5 items for User 1
# ------------------------------------------------------------------
user_id = 0
top_k = 5
rec_items, rec_scores = content_based_recommender(rating_matrix, item_features, user_id, top_k)

print(f"\nContent-Based Recommendations for User {user_id+1} (Top-{top_k}):")
print(f"User {user_id+1}'s current ratings: {rating_matrix[user_id]}")
if len(rec_items) > 0:
    for rank, (item, score) in enumerate(zip(rec_items, rec_scores), 1):
        item_genres = [genres[j] for j in range(5) if item_features[item, j] == 1]
        print(f"Rank {rank}: Item {item+1} (Similarity: {score:.3f}, Genres: {item_genres})")
else:
    print("No unrated items to recommend.")