In [9]:
import pandas as pd

In [10]:
import numpy as np

In [11]:
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
# Load the dataset
ratings = pd.read_csv("ratings.csv")[["userId", "movieId", "rating"]]
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [15]:
# Create a pivot table with users as rows and movies as columns
ratings_pivot = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

In [16]:
# Apply Singular Value Decomposition (SVD)
svd = TruncatedSVD(n_components=20, random_state=42)
matrix = svd.fit_transform(ratings_pivot)

In [17]:
# Calculate similarity matrix using cosine similarity
similarity_matrix = cosine_similarity(matrix)

In [18]:
# Function to recommend movies based on collaborative filtering
def recommend_movies(user_id, n_recommendations):
    # Find the index of the user in the pivot table
    user_idx = ratings_pivot.index.get_loc(user_id)
    
    # Get similarity scores for the user
    similarity_scores = list(enumerate(similarity_matrix[user_idx]))
    
    # Sort similarity scores in descending order
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top n users
    top_users = [idx for idx, score in sorted_scores[1:n_recommendations+1]]
    
    # Get movie recommendations based on similar users
    recommended_movies = ratings_pivot.iloc[top_users].mean(axis=0).sort_values(ascending=False)
    
    # Return the top n recommended movies
    return recommended_movies.head(n_recommendations).index.tolist()

In [19]:
# Example: Recommend 5 movies for user with userId 15
recommended_movies = recommend_movies(user_id=15, n_recommendations=5)

In [20]:
recommended_movies

[3481, 2997, 1060, 1259, 1466]