<a href="https://colab.research.google.com/github/jayaprabhapalani/movie-recommender-system/blob/main/UBCF_Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Load & Prepare the Data**

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load ratings
ratings = pd.read_csv("https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
                      sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Load movie titles
movies = pd.read_csv("https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
                     sep='|', encoding='latin-1',
                     names=["movie_id", "title", "release_date", "video_release_date", "IMDb_URL",
                            "unknown", "Action", "Adventure", "Animation", "Children's", "Comedy",
                            "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror",
                            "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"])

# Merge
data = pd.merge(ratings, movies[['movie_id', 'title']], on='movie_id')


In [None]:
data.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


 Create User-Item Matrix

In [None]:
user_item_matrix = data.pivot_table(index='user_id', columns='title', values='rating')
user_item_matrix.fillna(0, inplace=True)
user_item_matrix.head()


title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,2.0,5.0,0.0,0.0,3.0,4.0,0.0,0.0,...,0.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,2.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0


Compute User Similarity

In [None]:
# Compute cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

user_similarity_df.head()


user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.168937,0.048388,0.064561,0.37967,0.429682,0.443097,0.320079,0.078385,0.377733,...,0.372213,0.11986,0.26986,0.193343,0.197949,0.118722,0.315064,0.149086,0.181612,0.399432
2,0.168937,1.0,0.113393,0.179694,0.073623,0.242106,0.108604,0.104257,0.16247,0.161273,...,0.147095,0.310661,0.363328,0.410725,0.322713,0.231096,0.228793,0.162911,0.175273,0.106732
3,0.048388,0.113393,1.0,0.349781,0.021592,0.074018,0.067423,0.084419,0.062039,0.066217,...,0.033885,0.043453,0.16714,0.071288,0.126278,0.026758,0.164539,0.102899,0.136757,0.02699
4,0.064561,0.179694,0.349781,1.0,0.031804,0.068431,0.091507,0.18806,0.101284,0.060859,...,0.054615,0.036784,0.133619,0.196561,0.146058,0.030202,0.196858,0.152041,0.171538,0.058752
5,0.37967,0.073623,0.021592,0.031804,1.0,0.238636,0.374733,0.24893,0.056847,0.201427,...,0.340183,0.08058,0.095284,0.081053,0.148607,0.071612,0.239955,0.139595,0.153799,0.313941


Get Top-K Similar Users (Neighbors)

In [None]:
def get_top_k_neighbors(user_id, k=5):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)
    top_users = similar_users[1:k+1]  # exclude self
    return top_users


 Predict Ratings for Unseen Movies

In [None]:
def predict_ratings(user_id, k=5):
    # Step 1: Get the top k most similar users (neighbors) to the given user
    top_users = get_top_k_neighbors(user_id, k)

    # Step 2: Get the ratings of those top users (rows) for all movies (columns),
    # and multiply them by their similarity scores to compute a weighted sum
    weighted_sum = np.dot(top_users.values, user_item_matrix.loc[top_users.index])

    # Step 3: Get the sum of the similarity scores of the top users
    sim_sum = top_users.sum()

    # Step 4: Divide weighted sum by total similarity to get predicted ratings for each movie
    predicted_ratings = weighted_sum / sim_sum

    # Step 5: Get the current user's existing ratings (row of the matrix)
    user_rated = user_item_matrix.loc[user_id]

    # Step 6: Find movies the user hasn't rated yet (rating == 0)
    unseen = user_rated[user_rated == 0].index

    # Step 7: Create a Series of predicted ratings for all movies
    recommendations = pd.Series(predicted_ratings, index=user_item_matrix.columns)

    # Step 8: Return only the movies the user hasn't seen,
    # sorted by predicted rating (highest first)
    return recommendations[unseen].sort_values(ascending=False)



Recommend Top-N Movies

In [None]:
user_id = 4
recommendations = predict_ratings(user_id, k=5)
recommendations.head(10)


Unnamed: 0_level_0,0
title,Unnamed: 1_level_1
"Ice Storm, The (1997)",3.414813
L.A. Confidential (1997),3.329308
"Peacemaker, The (1997)",3.208065
Seven Years in Tibet (1997),2.945046
"English Patient, The (1996)",2.80727
G.I. Jane (1997),2.736128
"Edge, The (1997)",2.626103
Dante's Peak (1997),2.400084
"Devil's Own, The (1997)",2.391259
"Mrs. Brown (Her Majesty, Mrs. Brown) (1997)",2.206238
