In [12]:
# 📦 Step 1: Import libraries
!pip install tabulate
from tabulate import tabulate
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import pandas as pd
from collections import defaultdict
# 📊 Step 2: Load the MovieLens 100k dataset (built-in)
data = Dataset.load_builtin('ml-100k')

# 📂 Step 3: Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# 🔍 Step 4: Build and train the SVD (matrix factorization) model
model = SVD()
model.fit(trainset)

# 🧪 Step 5: Predict ratings for the test set and evaluate performance
predictions = model.test(testset)
print("\n📏 Model Evaluation:")
accuracy.rmse(predictions)
# 🎬 Step 6: Load movie titles and genres from MovieLens 100k (matches dataset!)
movie_titles = pd.read_csv(
    'https://files.grouplens.org/datasets/movielens/ml-100k/u.item',
    sep='|',
    encoding='latin-1',
    names=['movieId', 'title', 'release_date', 'video_release_date', 'IMDb_URL',
           'unknown', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy',
           'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
           'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
)

# 🧠 Step 7: Create genre string from one-hot encoded columns
genre_cols = ['Action', 'Adventure', 'Animation', "Children's", 'Comedy',
              'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
              'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

def get_genres(row):
    return '|'.join([genre for genre in genre_cols if row[genre] == 1])

movie_titles['genres'] = movie_titles.apply(get_genres, axis=1)
# 📌 Step 8: Function to get top-N recommendations for each user
def get_top_n(predictions, n=5):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

# Get top 5 recommendations per user
top_n = get_top_n(predictions, n=5)
# 📋 Step 9: Display top 5 recommendations with movie title and genres for 3 users
for uid, user_ratings in list(top_n.items())[:3]:
    print(f"\n User {uid} recommendations:")

    user_recs = []
    for iid, rating in user_ratings:
        try:
            movie_row = movie_titles[movie_titles['movieId'] == int(iid)].iloc[0]
            title = movie_row['title']
            genre = movie_row['genres']
        except IndexError:
            title = f"Movie ID {iid}"
            genre = "Unknown"

        user_recs.append({'Title': title, 'Estimated Rating': round(rating, 2), 'Genre': genre})

    df_user = pd.DataFrame(user_recs)
    print(tabulate(df_user, headers='keys', tablefmt='pretty', showindex=False))




📏 Model Evaluation:
RMSE: 0.9336

 User 907 recommendations:
+----------------------------------+------------------+-----------------------------+
|              Title               | Estimated Rating |            Genre            |
+----------------------------------+------------------+-----------------------------+
|   Celluloid Closet, The (1995)   |       5.0        |         Documentary         |
|       Fugitive, The (1993)       |       5.0        |       Action|Thriller       |
|            Ran (1985)            |       5.0        |          Drama|War          |
|         Toy Story (1995)         |       4.96       | Animation|Children's|Comedy |
| In the Name of the Father (1993) |       4.94       |            Drama            |
+----------------------------------+------------------+-----------------------------+

 User 371 recommendations:
+-------------------------------------------+------------------+------------------------------+
|                   Title               