# Import Libraries


In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns


# Load Dataset


In [5]:
ratings = pd.read_csv("ratings.csv")
movies = pd.read_csv("movies.csv")

print("✅ Dataset loaded!")
print("Ratings shape:", ratings.shape)
print("Movies shape:", movies.shape)

print("\nRatings sample:\n", ratings.head())
print("\nMovies sample:\n", movies.head())

✅ Dataset loaded!
Ratings shape: (105339, 4)
Movies shape: (10329, 3)

Ratings sample:
    userId  movieId  rating   timestamp
0       1       16     4.0  1217897793
1       1       24     1.5  1217895807
2       1       32     4.0  1217896246
3       1       47     4.0  1217896556
4       1       50     4.0  1217896523

Movies sample:
    movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


#  Merge Data & Prepare User-Item Matrix



In [7]:
data = pd.merge(ratings, movies, on="movieId")

print("Merged dataset shape:", data.shape)
print("\nSample:\n", data.head())


Merged dataset shape: (105339, 6)

Sample:
    userId  movieId  rating   timestamp  \
0       1       16     4.0  1217897793   
1       1       24     1.5  1217895807   
2       1       32     4.0  1217896246   
3       1       47     4.0  1217896556   
4       1       50     4.0  1217896523   

                                       title                   genres  
0                              Casino (1995)              Crime|Drama  
1                              Powder (1995)             Drama|Sci-Fi  
2  Twelve Monkeys (a.k.a. 12 Monkeys) (1995)  Mystery|Sci-Fi|Thriller  
3                Seven (a.k.a. Se7en) (1995)         Mystery|Thriller  
4                 Usual Suspects, The (1995)   Crime|Mystery|Thriller  


In [8]:
user_item_matrix = data.pivot_table(
    index="userId", columns="title", values="rating"
)

print("User-Item Matrix shape:", user_item_matrix.shape)
user_item_matrix.head()

User-Item Matrix shape: (668, 10323)


title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 (1979),...,[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),a/k/a Tommy Chong (2005),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


# User Similarity

In [9]:
matrix_filled = user_item_matrix.fillna(0)

# Compute cosine similarity
user_similarity = cosine_similarity(matrix_filled)

# Convert to DataFrame
user_similarity_df = pd.DataFrame(
    user_similarity,
    index=user_item_matrix.index,
    columns=user_item_matrix.index
)

print("User similarity matrix shape:", user_similarity_df.shape)

User similarity matrix shape: (668, 668)


# Movie Recommendation

In [10]:
def recommend_movies(user_id, top_n=5):
    # Get similarity scores for the user
    sim_scores = user_similarity_df[user_id].sort_values(ascending=False)
    
    # Pick top similar users 
    top_users = sim_scores.iloc[1:6].index
    
    #  movies watched by these top users
    similar_users_ratings = user_item_matrix.loc[top_users].mean().sort_values(ascending=False)
    
    # Exclude watched movies
    watched = user_item_matrix.loc[user_id].dropna().index
    recommendations = similar_users_ratings.drop(watched, errors="ignore")
    
    return recommendations.head(top_n)

# Example
print("🎬 Recommendations for User 1:")
print(recommend_movies(1, top_n=5))


🎬 Recommendations for User 1:
title
Taxi Driver (1976)            5.0
Brazil (1985)                 5.0
Serenity (2005)               5.0
Sound of Music, The (1965)    5.0
Resident Evil (2002)          5.0
dtype: float64


# Evaluate Performance

In [11]:
def precision_at_k(user_id, k=5):
    recs = recommend_movies(user_id, top_n=k)
    return len(recs) / k

print("Precision@5 (User 1):", precision_at_k(1, k=5))

Precision@5 (User 1): 1.0


# Item-Based Filtering (Bonus) 

In [12]:

movie_user_matrix = user_item_matrix.T.fillna(0)

#  similarity between movies
item_similarity = cosine_similarity(movie_user_matrix)
item_similarity_df = pd.DataFrame(
    item_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index
)

def recommend_similar_movies(movie_title, top_n=5):
    sim_scores = item_similarity_df[movie_title].sort_values(ascending=False)
    return sim_scores.iloc[1:top_n+1]

print("🎥 Movies similar to 'Toy Story (1995)':")
print(recommend_similar_movies("Toy Story (1995)", top_n=5))

🎥 Movies similar to 'Toy Story (1995)':
title
Star Wars: Episode VI - Return of the Jedi (1983)                                 0.629837
Star Wars: Episode IV - A New Hope (1977)                                         0.628777
Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)    0.586057
Independence Day (a.k.a. ID4) (1996)                                              0.578368
Back to the Future (1985)                                                         0.577313
Name: Toy Story (1995), dtype: float64
