#Collaborative Filtering (User-Item Matrix)

In [110]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from collections import Counter
from IPython.display import display


In [111]:
# Load datasets
df = pd.read_csv("/content/movies.csv")
ratings_df = pd.read_csv("/content/ratings.csv")

In [112]:
# Merge ratings with movie titles
ratings_with_titles = pd.merge(ratings_df, df[['movieId', 'title']], on='movieId', how='left')

# Split into train/test
train_data, test_data = train_test_split(ratings_with_titles, test_size=0.2, random_state=42)


In [113]:
# Create mappings
unique_users = train_data['userId'].unique()
unique_movies = train_data['movieId'].unique()

In [114]:
user_to_index = {uid: idx for idx, uid in enumerate(unique_users)}
index_to_user = {idx: uid for uid, idx in user_to_index.items()}
movie_to_index = {mid: idx for idx, mid in enumerate(unique_movies)}
index_to_movie = {idx: mid for mid, idx in movie_to_index.items()}

In [115]:
# Create user-item sparse matrix for training
train_matrix = csr_matrix((
    train_data['rating'],
    (
        train_data['userId'].map(user_to_index),
        train_data['movieId'].map(movie_to_index)
    )
))


In [116]:
# Train KNN model
def train_knn_model(matrix, n_neighbors=10):
    model = NearestNeighbors(metric="cosine", algorithm="brute", n_neighbors=n_neighbors)
    model.fit(matrix)
    return model

In [117]:
# Recommend top N movies
def recommend_movies(user_id, knn_model, train_matrix, n_recommendations=5):
    if user_id not in user_to_index:
        return []

    user_idx = user_to_index[user_id]
    distances, indices = knn_model.kneighbors(train_matrix[user_idx], n_neighbors=knn_model.n_neighbors)
    similar_users = indices.flatten()[1:]

    # Gather recommendations
    movie_counter = Counter()
    for user in similar_users:
        rated = train_matrix[user].nonzero()[1]
        movie_counter.update(rated)

    # Remove already seen movies
    seen = set(train_matrix[user_idx].nonzero()[1])
    for s in seen:
        movie_counter.pop(s, None)

    # Top-N recommendations
    top_indices = [idx for idx, _ in movie_counter.most_common(n_recommendations)]
    top_movie_ids = [index_to_movie[i] for i in top_indices]
    return top_movie_ids


In [118]:
# Precision@k metric
def precision_at_k(recommended, actual, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(actual))
    return hits / k if k > 0 else 0

In [119]:
# Evaluate KNN for different k values
def evaluate_knn_k_values(user_id, k_values, train_matrix, test_data, df_movies):
    user_actual = test_data[test_data['userId'] == user_id]['movieId'].tolist()
    if not user_actual:
        print("User has no data in the test set.")
        return

    for k in k_values:
        print(f"\n🔍 Evaluating n_neighbors = {k}")
        knn_model = train_knn_model(train_matrix, n_neighbors=k)
        recommended_ids = recommend_movies(user_id, knn_model, train_matrix, n_recommendations=5)

        precision = precision_at_k(recommended_ids, user_actual, 5)
        print(f"🎯 Precision@5: {precision:.2f}")

        recommended_titles = df_movies[df_movies['movieId'].isin(recommended_ids)][['title']]
        display(recommended_titles)


In [120]:
# Get input from user
user_input = input("Please enter your user ID: ").strip()

if user_input.isdigit() and int(user_input) in unique_users:
    user_id = int(user_input)
    k_values = [5, 10, 15, 20, 25]
    evaluate_knn_k_values(user_id, k_values, train_matrix, test_data, df)
else:
    print(f"User ID {user_input} not found in the training set.")

Please enter your user ID: 1027

🔍 Evaluating n_neighbors = 5
🎯 Precision@5: 0.20


Unnamed: 0,title
255,"Kid in King Arthur's Court, A (1995)"
1623,Sliding Doors (1998)
1883,Labyrinth (1986)
2487,10 Things I Hate About You (1999)
13644,17 Again (2009)



🔍 Evaluating n_neighbors = 10
🎯 Precision@5: 0.20


Unnamed: 0,title
1178,12 Angry Men (1957)
1623,Sliding Doors (1998)
1883,Labyrinth (1986)
2487,10 Things I Hate About You (1999)
13644,17 Again (2009)



🔍 Evaluating n_neighbors = 15
🎯 Precision@5: 0.20


Unnamed: 0,title
823,Emma (1996)
1178,12 Angry Men (1957)
1623,Sliding Doors (1998)
1810,Six Days Seven Nights (1998)
2487,10 Things I Hate About You (1999)



🔍 Evaluating n_neighbors = 20
🎯 Precision@5: 0.20


Unnamed: 0,title
523,Schindler's List (1993)
535,Sleepless in Seattle (1993)
591,Pretty Woman (1990)
1623,Sliding Doors (1998)
2487,10 Things I Hate About You (1999)



🔍 Evaluating n_neighbors = 25
🎯 Precision@5: 0.20


Unnamed: 0,title
315,"Shawshank Redemption, The (1994)"
523,Schindler's List (1993)
1623,Sliding Doors (1998)
2487,10 Things I Hate About You (1999)
2772,American Beauty (1999)
