In [13]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Sample data with movie titles (movieId, title)
movies_data = {
    'movieId': [1, 2, 3, 4, 5, 6],
    'title': ['The Shawshank Redemption', 'The Godfather', 'The Dark Knight', 'Pulp Fiction', 'Forrest Gump', 'Inception']
}
movies_df = pd.DataFrame(movies_data)

# User ratings data (userId, movieId, rating)
data = {
    'userId': [1, 1, 1, 2, 2, 3, 3],
    'movieId': [1, 2, 3, 1, 3, 2, 3],
    'rating': [5, 3, 4, 4, 5, 5, 5]
}
df = pd.DataFrame(data)

# Step 2: Create a user-item matrix (pivot table)
user_movie_matrix = df.pivot_table(index='userId', columns='movieId', values='rating', fill_value=0)

# Step 3: Compute the cosine similarity between items (movies)
cosine_sim = cosine_similarity(user_movie_matrix.T)  # Similarity between movies

# Convert the cosine similarity matrix into a DataFrame
cosine_sim_df = pd.DataFrame(cosine_sim, index=user_movie_matrix.columns, columns=user_movie_matrix.columns)

# Step 4: Build a function to recommend movies for a given user
def recommend_movies(user_id, top_n=3):
    # Get the movies the user has rated
    user_ratings = user_movie_matrix.loc[user_id]
    
    # Identify movies that the user hasn't rated yet
    unrated_movies = user_ratings[user_ratings == 0].index.tolist()
    
    # Initialize an empty list for predicted ratings
    predicted_ratings = {}
    
    # For each unrated movie, compute the predicted rating based on the user's past ratings and movie similarities
    for movie in unrated_movies:
        similar_movies = cosine_sim_df[movie].sort_values(ascending=False).index.tolist()[1:]  # Exclude the movie itself
        
        # Calculate a weighted average of ratings of similar movies
        weighted_sum = 0
        sim_sum = 0
        
        for similar_movie in similar_movies:
            if user_movie_matrix.loc[user_id, similar_movie] > 0:  # The user has rated this similar movie
                weighted_sum += user_movie_matrix.loc[user_id, similar_movie] * cosine_sim_df[movie][similar_movie]
                sim_sum += cosine_sim_df[movie][similar_movie]
        
        if sim_sum > 0:
            predicted_rating = weighted_sum / sim_sum
        else:
            predicted_rating = 0  # If no similar movies have been rated by the user
        
        predicted_ratings[movie] = predicted_rating
    
    # Sort the predicted ratings in descending order and get the top N recommendations
    recommended_movies = sorted(predicted_ratings.items(), key=lambda x: x[1], reverse=True)[:top_n]
    
    # Fetch the movie titles from the movies_df
    recommended_movies_with_titles = [(movies_df[movies_df['movieId'] == movie_id]['title'].values[0], predicted_rating) for movie_id, predicted_rating in recommended_movies]
    
    # Return the list of recommended movies (movie name and predicted rating)
    return recommended_movies_with_titles

# Step 5: Example usage
user_id = 2  # Let's use user_id 2
recommended_movies = recommend_movies(user_id, top_n=3)

print(f"Recommended movies for user {user_id}:")
for movie_title, predicted_rating in recommended_movies:
    print(f"Movie: {movie_title}, Predicted Rating: {predicted_rating:.2f}")


Recommended movies for user 2:
Movie: The Godfather, Predicted Rating: 4.66
