Load the Dataset

Use Pandas to read the movie ratings data

In [None]:
# Movie Recommendation System - Collaborative Filtering

# Import required libraries
import pandas as pd # For data manipulation
import numpy as np # For numerical computations

# Load user ratings

# Define column names for user ratings
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
# Load dataset with only required columns: user_id, movie_id, rating
ratings = pd.read_csv('./dataset/u.data', sep='\t', names=r_cols, usecols=[0,1,2])

# Load movie information dataset

# Define column names for movie details
m_cols = ['movie_id', 'title']

# Load dataset with only required columns: movie_id, title
movies = pd.read_csv('./dataset/u.item', sep='|', names=m_cols, usecols=[0,1], encoding="ISO-8859-1")

# Merge both datasets
ratings = pd.merge(ratings, movies, on="movie_id")

# Display the first few rows to check dataset structure
ratings.head()

In [None]:
# Create user-movie rating matrix

# Pivot data to create a matrix where:
# Rows = Users, Columns = Movies, Values = Ratings given by users
user_movie_ratings = ratings.pivot_table(index='user_id', columns='title', values='rating')

# Display the user-movie rating matrix
user_movie_ratings.head()

In [None]:
# Import scikit-learn for similarity calculations
from sklearn.metrics.pairwise import cosine_similarity

# Compute item-based similarity (Movie similarity)

# Fill missing values with 0 (since some users may not rate all movies)

# Compute cosine similarity between movies
movie_similarity = cosine_similarity(user_movie_ratings.fillna(0).T)

# Convert similarity matrix into a DataFrame for easy lookup
movie_similarity_df = pd.DataFrame(movie_similarity, index=user_movie_ratings.columns, columns=user_movie_ratings.columns)

Find similarity between users to suggest movies based on similar preferences

In [None]:
# Compute user-based similarity

# Fill missing values with 0 (as some users may not rate all movies)

# Compute cosine similarity between users
user_similarity = cosine_similarity(user_movie_ratings.fillna(0))

# Convert similarity matrix into a DataFrame for easy lookup
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_ratings.index, columns=user_movie_ratings.index)

Recommend movies based on similarity

In [None]:
def get_movie_recommendations(movie_name, num_recommendations=5):
    # """
    # Returns 'num_recommendations' movies similar to the given 'movie_name'.
    # Uses precomputed movie similarity matrix for recommendations.
    
    # Parameters:
    # movie_name (str): Movie title to find similar movies.
    # num_recommendations (int): Number of movies to return.
    
    # Returns:
    # list: List of recommended movie titles.
    # """
    similar_movies = movie_similarity_df[movie_name].sort_values(ascending=False)[1:num_recommendations+1]
    return similar_movies.index.tolist()

# Example usage to get recommendations for "Toy Story (1995)"
print(get_movie_recommendations("Toy Story (1995)", 5))

Suggest movies based on user preferences

In [None]:
# Function: Get Movie Recommendations for a Specific User

def recommend_movies_for_user(user_id, num_recommendations=5):
    # """
    # Returns 'num_recommendations' recommended movies for a given user 
    # by analyzing preferences of similar users.
    
    # Parameters:
    # user_id (int): User ID for whom recommendations are needed.
    # num_recommendations (int): Number of movies to return.
    
    # Returns:
    # pandas.Series: Recommended movies with predicted ratings.
    # """
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)[1:6].index
    recommendations = user_movie_ratings.loc[similar_users].mean().sort_values(ascending=False)
    return recommendations.head(num_recommendations)

# Example usage to get recommendations for user ID 300
print(recommend_movies_for_user(300))

**Root Mean Squared Error (RMSE)**

Evaluate recommendation system:


In [None]:
# Import the mean_squared_error function from scikit-learn

# Used to calculate the difference between actual and predicted ratings,

# helping to evaluate the accuracy of the recommendation system.
from sklearn.metrics import mean_squared_error

# Evaluation Metric: Root Mean Squared Error (RMSE)

# Prepare actual user ratings
actual_ratings = ratings.pivot_table(index='user_id', columns='title', values='rating')

# Predict ratings by averaging ratings from similar users
predicted_ratings = user_movie_ratings.copy()
for user_id in user_movie_ratings.index:
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)[1:6].index
    predicted_ratings.loc[user_id] = user_movie_ratings.loc[similar_users].mean()

# Compute RMSE to evaluate recommendation accuracy
mse = mean_squared_error(actual_ratings.fillna(0), predicted_ratings.fillna(0))
rmse = np.sqrt(mse)

# Display RMSE value
print("RMSE", rmse)

In [None]:
# Evaluation Metric: Precision@K

def precision_at_k(recommended_movies, relevant_movies, k):
    # """
    # Computes Precision@K to measure how many recommended movies align with
    # the movies a user rated highly.
    
    # Parameters:
    # recommended_movies (list): List of recommended movie titles.
    # relevant_movies (list): List of top-rated movies by the user.
    # k (int): The 'K' threshold for evaluation.
    
    # Returns:
    # float: Precision score (ranges between 0 and 1).
    # """
    recommended_at_k = recommended_movies[:k]
    hits = len(set(recommended_at_k) & set(relevant_movies))
    return hits / k


# Example usage for user ID 300
user_id = 300 # Example user
recommended_movies = recommend_movies_for_user(user_id, 10).index.tolist()
relevant_movies = ratings[ratings['user_id'] == user_id].sort_values(by='rating', ascending=False)['title'].head(10).tolist()

# Compute Precision@K and display the result
precision = precision_at_k(recommended_movies, relevant_movies, 5)
print(f"Precision@5: {precision:.2f}")

In [None]:
from sklearn.decomposition import TruncatedSVD

user_movie_matrix = ratings.pivot_table(index='user_id', columns='title', values='rating')

# Fill missing values with 0 (since some users may not have rated all movies)

user_movie_matrix_filled = user_movie_matrix.fillna(0)

# Apply singular value Decomposition

svd = TruncatedSVD(n_components=20) # Reduce dimensions to 20 latent factors

latent_matrix = svd.fit_transform(user_movie_matrix_filled)

# Convert back to DataFrame for readability

latent_df = pd.DataFrame(latent_matrix, index=user_movie_matrix_filled.index)

In [None]:
def recommend_movies_svd(user_id, num_recommendations=5):
    # Identify similar users based on latent factors

    user_latent_vector = latent_df.loc[user_id].values.reshape(1, -1)
    user_similarity = np.dot(latent_df, user_latent_vector.T).flatten()
    user_movie_pred = np.dot(user_similarity, user_movie_matrix_filled.fillna(0))

    # Rank movies based on predicted preferences
    user_movie_pred_df = pd.Series(user_movie_pred, index=user_movie_matrix_filled.columns)
    recommended_movies = user_movie_pred_df.sort_values(ascending=False).head(num_recommendations)
   
    
    return recommended_movies.index.tolist()

# Example usage: Get recommendations for user ID 300

print(recommend_movies_svd(300))

> # **Testing the Movie Recommendation System**

In [None]:
print(get_movie_recommendations("Star Wars (1977)", 5))

In [None]:
# RECOMMEND A MOVIE FOR ANY USER FROM (USER>0) tO (USER>943) 

print(recommend_movies_for_user(873))
print(recommend_movies_for_user(333))

In [None]:
# TEST YOUR DATASET INTEGRITY
# DOES MOVIE TITLES EXIST
print(movie_similarity_df.head())


In [None]:
# DOES USER ID's EXIST
print(user_similarity_df.head())

In [None]:
# DEBUGGING COMMON ISSUES
# DOUBLE CHECK IF THE MOVIE TITLE IS IT IN INDEX
print(movie_similarity_df.index)

In [None]:
# VERIFY IF THE USER ID IS IN INDEX
print(user_similarity_df.index)