In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


In [3]:
# Load the dataset
ratings = pd.read_csv('ratings.csv')

# Create a pivot table with users as rows, movies as columns, and ratings as values
user_movie_matrix = ratings.pivot_table(index='user_id', columns='movie_id', values='ratings')

# Fill NaN values with 0 (or you could use the mean rating of each movie/user if preferred)
user_movie_matrix = user_movie_matrix.fillna(0)


In [4]:
# Calculate the cosine similarity between users
user_similarity = cosine_similarity(user_movie_matrix)

# Convert the similarity matrix to a DataFrame for easier manipulation
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)


In [5]:
def get_recommendations(user_id, user_movie_matrix, user_similarity_df, n_recommendations=5):
    # Find similar users
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:]

    # Get the ratings of the similar users
    similar_users_ratings = user_movie_matrix.loc[similar_users]

    # Compute the weighted sum of ratings of similar users
    weighted_sum_ratings = similar_users_ratings.apply(lambda row: np.dot(row, user_similarity_df[user_id].loc[similar_users]), axis=0)

    # Compute the sum of similarities for normalization
    similarity_sum = user_similarity_df[user_id].loc[similar_users].sum()

    # Compute the weighted average of ratings
    recommendations = weighted_sum_ratings / similarity_sum

    # Filter out movies the user has already rated
    user_rated_movies = user_movie_matrix.loc[user_id]
    recommendations = recommendations[user_rated_movies == 0]

    # Return the top n recommendations
    return recommendations.sort_values(ascending=False).head(n_recommendations)

# Example: Get recommendations for user 1
recommendations = get_recommendations(user_id=1, user_movie_matrix=user_movie_matrix, user_similarity_df=user_similarity_df)
print("Recommendations for User 1:")
print(recommendations)


Recommendations for User 1:
Series([], dtype: float64)
