In [8]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [9]:
# Load the ratings file
ratings = pd.read_csv("ratings_small.csv")

# Load the movie metadata
metadata = pd.read_csv("movies_metadata.csv", sep=',', quotechar='"', engine='python', parse_dates=['release_date'], date_format='%Y-%m-%d', on_bad_lines='skip')

# Filter movies with numeric IDs only
metadata = metadata[metadata['id'].apply(lambda x: x.isnumeric())]
metadata['id'] = metadata['id'].astype(int)

# Merge ratings with metadata
merged = pd.merge(ratings, metadata, left_on='movieId', right_on='id')

In [10]:
# Create a pivot table: rows = users, columns = movie titles, values = ratings
user_movie_matrix = merged.pivot_table(index='userId', columns='title', values='rating')

# Fill NaN with 0 (assume unrated movies as 0)
user_movie_matrix.fillna(0, inplace=True)

In [11]:
# Compute cosine similarity between movies
item_similarity = cosine_similarity(user_movie_matrix.T)

# Create a DataFrame to store similarity scores
item_sim_df = pd.DataFrame(item_similarity, index=user_movie_matrix.columns, columns=user_movie_matrix.columns)

In [12]:
def recommend_movies(movie_title, sim_df, top_n=5):
    if movie_title not in sim_df.columns:
        return ["Movie not found in dataset."]

    # Get similar movies, sort and return top_n
    similar_movies = sim_df[movie_title].sort_values(ascending=False)[1:top_n+1]
    return list(similar_movies.index)

In [13]:
print(list(user_movie_matrix.columns))



In [15]:
recommended = recommend_movies("'Twas the Night Before Christmas", item_sim_df)

print("Recommended Movies:")
for movie in recommended:
    print("🎬", movie)

Recommended Movies:
🎬 Pumping Iron
🎬 Night of the Demons
🎬 Gladiator
🎬 Girl Shy
🎬 I Can't Sleep
