In [29]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [30]:
# Load the dataset (Make sure the ratings.csv is in the correct location)
df = pd.read_csv('ratings.csv')  # Replace with full path if the file is elsewhere

In [31]:
# Create the User-Item Matrix
user_movie_ratings = df.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)

In [32]:
# Check User 1's ratings
print(f"User 1's ratings:\n{user_movie_ratings.loc[1]}")

User 1's ratings:
movie_id
1    5
2    4
3    3
Name: 1, dtype: int64


In [33]:
# Compute the Cosine Similarity between movies
cosine_sim = cosine_similarity(user_movie_ratings.T)  # Using .T to transpose the matrix for items (movies)
cosine_sim_df = pd.DataFrame(cosine_sim, index=user_movie_ratings.columns, columns=user_movie_ratings.columns)

In [34]:
# Check the cosine similarity matrix
print(f"Cosine Similarity Matrix:\n{cosine_sim_df.head()}")

# Function to recommend movies for a given user
def recommend_movies(user_id, user_movie_ratings, cosine_sim_df, top_n=10):
    # Get the movies rated by the user
    rated_movies = user_movie_ratings.loc[user_id]
    rated_movie_ids = rated_movies[rated_movies > 0].index  # Filter out unrated movies
    
    if len(rated_movie_ids) == 0:
        print(f"User {user_id} has not rated any movies!")
        return []
    
    print(f"Movies rated by User {user_id}: {rated_movie_ids}")
    
    # Create an empty dictionary to store predicted ratings
    predicted_ratings = {}
    
    # For each unrated movie, calculate the predicted rating using the ratings of similar movies
    unrated_movies = user_movie_ratings.columns.difference(rated_movie_ids)
    
    for unrated_movie in unrated_movies:
        weighted_sum = 0
        total_similarity = 0
        
        # Calculate the weighted sum of ratings for unrated movies using similar movies
        for rated_movie in rated_movie_ids:
            similarity = cosine_sim_df[rated_movie][unrated_movie]
            weighted_sum += similarity * rated_movies[rated_movie]
            total_similarity += abs(similarity)
        
        if total_similarity > 0:
            predicted_ratings[unrated_movie] = weighted_sum / total_similarity  # Normalize by total similarity

    if not predicted_ratings:
        print("No predictions were made.")
        return []

    # Print predicted ratings for debugging
    print(f"Predicted Ratings: {predicted_ratings}")
    
    # Sort the movies based on the predicted ratings and return the top N movies
    recommended_movie_ids = sorted(predicted_ratings, key=predicted_ratings.get, reverse=True)[:top_n]
    
    return recommended_movie_ids

Cosine Similarity Matrix:
movie_id         1         2         3
movie_id                              
1         1.000000  0.926872  0.862045
2         0.926872  1.000000  0.904534
3         0.862045  0.904534  1.000000


In [35]:
# Example usage
user_id = 1  # Replace with the user ID for whom you want recommendations
recommended_movies = recommend_movies(user_id, user_movie_ratings, cosine_sim_df)
print(f"Recommended movies for User {user_id}: {recommended_movies}")

Movies rated by User 1: Index([1, 2, 3], dtype='int64', name='movie_id')
No predictions were made.
Recommended movies for User 1: []
