In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

In [2]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Load datasets
ratings_path = "/content/drive/MyDrive/UITS/UITS /Eight Semester/Data Mining/Assignments/Assignment 1/ratings.csv"
movies_path = "/content/drive/MyDrive/UITS/UITS /Eight Semester/Data Mining/Assignments/Assignment 1/movies.xlsx"

ratings_df = pd.read_csv(ratings_path)
movies_df = pd.read_excel(movies_path)


In [4]:
# Merge datasets
df = pd.merge(ratings_df, movies_df, on='movieId')

# Create sparse matrix
row = df['movieId'].astype('category').cat.codes
col = df['userId'].astype('category').cat.codes
data = df['rating'].values
sparse_matrix = csr_matrix((data, (row, col)))


In [5]:
# Compute cosine similarity (sparse)
movie_similarity = cosine_similarity(sparse_matrix, dense_output=False)


In [6]:
# Convert similarity matrix to DataFrame
movie_ids = df['movieId'].astype('category').cat.categories
movie_sim_df = pd.DataFrame(movie_similarity.toarray(), index=movie_ids, columns=movie_ids)


In [7]:
# Generate recommendations for a user
user_id_example = 1
user_ratings = df[df['userId'] == user_id_example].sort_values(by='rating', ascending=False)
top_movies = user_ratings['movieId'].head(3).values



In [8]:
similar_movies_list = set()
for movie in top_movies:
    similar_movies_list.update(movie_sim_df[movie].nlargest(6).iloc[1:].index)

unrated_movies = list(similar_movies_list - set(user_ratings['movieId']))
recommended_movies = movies_df[movies_df['movieId'].isin(unrated_movies)].head(5)

In [9]:
print("\nRecommended movies for the user:")
print(recommended_movies)


Recommended movies for the user:
     movieId                        title                       genres
43        47  Seven (a.k.a. Se7en) (1995)             Mystery|Thriller
46        50   Usual Suspects, The (1995)       Crime|Mystery|Thriller
76        85    Angels and Insects (1995)                Drama|Romance
257      296          Pulp Fiction (1994)  Comedy|Crime|Drama|Thriller
284      326      To Live (Huozhe) (1994)                        Drama
