In [2]:
import pandas as pd
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np

credits_df = pd.read_csv('../dataset/credits.csv', low_memory=False)
keywords_df = pd.read_csv('../dataset/keywords.csv', low_memory=False)
links_df = pd.read_csv('../dataset/links.csv', low_memory=False)
links_small_df = pd.read_csv('../dataset/links_small.csv', low_memory=False)
movies_metadata_df = pd.read_csv('../dataset/movies_metadata.csv', low_memory=False)
ratings_df = pd.read_csv('../dataset/ratings.csv', low_memory=False)
ratings_small_df = pd.read_csv('../dataset/ratings_small.csv', low_memory=False)

In [3]:
print(movies_metadata_df[movies_metadata_df['id'] == '318'])
print(movies_metadata_df.columns)

      adult belongs_to_collection   budget  \
4020  False                   NaN  8000000   

                                                 genres homepage   id  \
4020  [{'id': 18, 'name': 'Drama'}, {'id': 53, 'name...      NaN  318   

        imdb_id original_language            original_title  \
4020  tt0120753                en  The Million Dollar Hotel   

                                               overview  ... release_date  \
4020  The Million Dollar Hotel starts with a jump fr...  ...   2000-02-09   

     revenue runtime                          spoken_languages    status  \
4020     0.0   122.0  [{'iso_639_1': 'en', 'name': 'English'}]  Released   

      tagline                     title  video vote_average vote_count  
4020      NaN  The Million Dollar Hotel  False          5.9       76.0  

[1 rows x 24 columns]
Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       '

# Ratings Based Recommender

In [4]:
# Ratings-Based Recommendation
# Load and prepare data
reader = Reader(rating_scale=(ratings_small_df['rating'].min(), ratings_small_df['rating'].max()))
data = Dataset.load_from_df(ratings_small_df[['userId', 'movieId', 'rating']], reader)

# Split data and train SVD model
trainset, testset = train_test_split(data, test_size=0.2)
svd = SVD()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x13d887ad0>

In [7]:
user_id = 2

# Get all movie IDs in the dataset
all_movie_ids = ratings_small_df['movieId'].unique()

# Filter out movies the user has already rated
rated_movies = ratings_small_df[ratings_small_df['userId'] == user_id]['movieId']
unrated_movies = [movie for movie in all_movie_ids if movie not in rated_movies.values]

# Predict ratings for each unrated movie
predictions = [(movie_id, svd.predict(user_id, movie_id).est) for movie_id in unrated_movies]
sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)


# Step 5: Get Top 5 Recommendations with Valid Titles
top_5_recommendations = []
for movie_id, predicted_rating in sorted_predictions:
    # Retrieve title
    title = movies_metadata_df[movies_metadata_df['id'] == str(movie_id)]['title']
    
    # Check if the title exists and is not empty
    if not title.empty:
        top_5_recommendations.append((movie_id, title.values[0], predicted_rating))
    
    # Stop once we have 5 valid recommendations
    if len(top_5_recommendations) == 5:
        break

# Display recommended movie IDs, titles, and predicted ratings
print("Top 5 Recommended Movies (movie_id, title, predicted_rating):")
for movie_id, title, predicted_rating in top_5_recommendations:
    print(f"Movie ID: {movie_id}, Movie Name: {title}, Predicted Rating: {round(predicted_rating, 2)}")

Top 5 Recommended Movies (movie_id, title, predicted_rating):
Movie ID: 905, Movie Name: Pandora's Box, Predicted Rating: 4.49
Movie ID: 4993, Movie Name: 5 Card Stud, Predicted Rating: 4.47
Movie ID: 2064, Movie Name: While You Were Sleeping, Predicted Rating: 4.41
Movie ID: 318, Movie Name: The Million Dollar Hotel, Predicted Rating: 4.35
Movie ID: 951, Movie Name: Kindergarten Cop, Predicted Rating: 4.33
