In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load the datasets
# 'ratings.csv' contains the user ratings for movies.
# 'movies.csv' contains metadata about the movies, including movie IDs and titles.
df_rating = pd.read_csv("ratings.csv")
df_movies = pd.read_csv("movies.csv")

# Print the loaded datasets to understand their structure (optional)
# print(df_rating)
# print(df_movies)

# Merge the ratings and movies dataframes on 'movieId' to combine user ratings with movie titles.
# This creates a single dataframe where each row contains a user ID, movie ID, rating, and movie title.
df_all = pd.merge(df_rating, df_movies, on='movieId')

# Print the merged dataframe to verify the merge (optional)
# print(df_all)

# Create a user-item matrix where rows are user IDs, columns are movie titles, and values are the ratings.
# This matrix allows us to see how each user rated each movie.
user_movie_matrix = df_all.pivot_table(index='userId', columns='title', values='rating')

# Print the user-item matrix to verify its structure (optional)
# print(user_movie_matrix)

# Fill missing values with 0. This is important because cosine similarity cannot handle NaN values.
# Filling with 0 means that missing ratings are treated as 0, indicating no rating given.
user_movie_matrix_filled = user_movie_matrix.fillna(0)

# Print the filled user-item matrix to verify (optional)
# print(user_movie_matrix_filled)

# Compute the cosine similarity matrix between users.
# Cosine similarity is a measure of similarity between two vectors, calculated as:
# cosine_similarity(A, B) = (A · B) / (||A|| ||B||)
# where A and B are vectors, A · B is the dot product, and ||A|| and ||B|| are the magnitudes of A and B.
similarity_matrix = cosine_similarity(user_movie_matrix_filled)

# Print the similarity matrix to verify (optional)
# print(similarity_matrix)

# Convert the similarity matrix to a DataFrame for easier handling.
# Rows and columns are user IDs, and the values are the cosine similarity scores between users.
similarity_df = pd.DataFrame(similarity_matrix, index=user_movie_matrix_filled.index, columns=user_movie_matrix_filled.index)

# Print the similarity DataFrame to verify (optional)
# print(similarity_df)

# Define a function to get movie recommendations for a given user.
# The function will recommend movies that the user has not rated yet, based on the ratings of similar users.
def get_recommend(user_id, num_recommendations=5):
    """
    Generate movie recommendations for a user based on the similarity scores with other users.

    Parameters:
    user_id (int): The ID of the user for whom to generate recommendations.
    num_recommendations (int): The number of recommendations to generate (default is 5).

    Returns:
    pandas.Series: A series of recommended movie titles and their predicted ratings.
    """
    # Get the user's ratings. Drop any NaN values as we only want movies the user has rated.
    user_ratings = user_movie_matrix.loc[user_id].dropna()

    # Get the similarity scores for the user from the similarity DataFrame.
    user_similarities = similarity_df[user_id]

    # Compute the weighted sum of ratings from similar users.
    # This is done by taking the dot product of the transposed user-item matrix and the similarity scores.
    # We divide by the sum of the similarity scores to normalize the ratings.
    weighted_scores = user_movie_matrix_filled.T.dot(user_similarities).div(user_similarities.sum())

    # Filter out movies that the user has already rated.
    recommendations = weighted_scores.drop(user_ratings.index, errors='ignore')

    # Return the top 'num_recommendations' movies as the recommendations.
    return recommendations.nlargest(num_recommendations)

# Example: Get recommendations for user 1 and print them.
print(get_recommend(user_id=1, num_recommendations=6))
