<a href="https://colab.research.google.com/github/jerome-keli/movie_recommender/blob/main/Movie_Recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Installing scikit-surprise
!pip install scikit-surprise



In [None]:
#Import necessary libraries
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import random

In [None]:
#File paths
DATA_PATH = "/content/u.data"
MOVIE_PATH = "/content/u.item"
USER_PATH = "/content/u.user"

In [None]:
#Load user-item interaction data
ratings = pd.read_csv(
    DATA_PATH,
    sep='\t',
    names=["user_id", "item_id", "rating", "timestamp"],
    header=None
)

#Load movie metadata
movies = pd.read_csv(
    MOVIE_PATH,
    sep='|',
    names=["movie_id", "title", "release_date", "video_release_date", "IMDb_URL"] + [f"genre_{i}" for i in range(19)],
    encoding='latin-1',
    usecols=[0, 1, 2],
    header=None
)

#Grouping ages into groups
age_bins = [0, 18, 25, 35, 50, 100]
age_labels = ['Under 18', '18-24', '25-34', '35-49', '50+']

In [None]:
#Train SVD model
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['user_id', 'item_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

algo = SVD(random_state=42)
algo.fit(trainset)
predictions = algo.test(testset)
rmse = accuracy.rmse(predictions, verbose=True)
print(f"Test RMSE: {rmse:.4f}")

RMSE: 0.9352
Test RMSE: 0.9352


In [None]:
#Recommend movies by age group
def recommend_movies_by_age(age, n=10):
    #Determine age group
    user_age_group = pd.cut([age], bins=age_bins, labels=age_labels)[0]
    print(f"Based on your age ({age}), you belong to the '{user_age_group}' age group.")

    #Get all users in the same age group
    users = pd.read_csv(
        USER_PATH,
        sep='|',
        names=["user_id", "age", "gender", "occupation", "zip_code"],
        header=None,
        encoding='latin-1'
    )
    users['age_group'] = pd.cut(users['age'], bins=age_bins, labels=age_labels)
    similar_users = users[users['age_group'] == user_age_group]['user_id'].values

    #Aggregate ratings for movies rated by similar users
    similar_user_ratings = ratings[ratings['user_id'].isin(similar_users)]
    avg_ratings = similar_user_ratings.groupby('item_id')['rating'].mean()

    #Select a larger pool of top-rated items
    top_items_pool = avg_ratings.nlargest(n * 3)  # Get 3 times the number of desired recommendations

    #Random sampling for diversity
    sampled_items = top_items_pool.sample(n=min(len(top_items_pool), n), random_state=random.randint(1, 100)).index

    #Merge with movie titles
    top_movies = movies[movies['movie_id'].isin(sampled_items)]
    return top_movies[['movie_id', 'title']]


In [None]:
#Predict Ratings for User Preferences
def predict_user_preferences(preferences, n=10):
    #Generate predictions for all items not rated by the user
    all_movie_ids = set(ratings['item_id'].unique())
    rated_movie_ids = set(preferences.keys())
    unrated_movie_ids = all_movie_ids - rated_movie_ids

    predictions = []
    for movie_id in unrated_movie_ids:
        est_rating = algo.predict(uid=1, iid=movie_id, r_ui=None).est  # Use a pseudo-user ID
        predictions.append((movie_id, est_rating))

    #Sort by estimated rating and return top N
    predictions.sort(key=lambda x: x[1], reverse=True)
    top_predictions = predictions[:n]
    top_movies = movies[movies['movie_id'].isin([p[0] for p in top_predictions])]
    top_movies['predicted_rating'] = [p[1] for p in top_predictions]
    return top_movies[['title', 'predicted_rating']]

In [None]:
#Recommendation System
print("Welcome to the Enhanced Movie Recommender System!")
try:
    #Ask for user's age
    user_age = int(input("Enter your age: "))
    if user_age < 0 or user_age > 120:
        raise ValueError("Age must be between 0 and 120.")

    #Get recommendations based on age group
    age_group_recommendations = recommend_movies_by_age(user_age, n=5)
    print("\nAge-Group-Based Recommendations:")
    for i, row in age_group_recommendations.iterrows():
        print(f"{i + 1}. {row['title']}")

    #Ask for user's preferences
    print("\nLet's tailor your recommendations further. Please rate the following movies (1-5):")
    preferences = {}
    for i, row in age_group_recommendations.iterrows():
        try:
            rating = float(input(f"How would you rate '{row['title']}'? (1-5): "))
            if rating < 1 or rating > 5:
                raise ValueError("Rating must be between 1 and 5.")
            preferences[row['movie_id']] = rating
        except ValueError as e:
            print(f"Invalid rating: {e}")
            continue

    #Get recommendations based on preferences
    preference_recommendations = predict_user_preferences(preferences, n=10)
    print("\nPersonalized Recommendations Based on Your Preferences:")
    for i, row in preference_recommendations.iterrows():
        print(f"{i + 1}. {row['title']} (Predicted Rating: {row['predicted_rating']:.2f})")

except ValueError as e:
    print(f"Invalid input: {e}")

Welcome to the Enhanced Movie Recommender System!
Enter your age: 36
Based on your age (36), you belong to the '35-49' age group.

Age-Group-Based Recommendations:
976. Solo (1996)
1229. Poison Ivy II (1995)
1347. Ballad of Narayama, The (Narayama Bushiko) (1958)
1367. Faust (1994)
1389. Mondo (1996)

Let's tailor your recommendations further. Please rate the following movies (1-5):
How would you rate 'Solo (1996)'? (1-5): 1
How would you rate 'Poison Ivy II (1995)'? (1-5): 4
How would you rate 'Ballad of Narayama, The (Narayama Bushiko) (1958)'? (1-5): 3
How would you rate 'Faust (1994)'? (1-5): 3
How would you rate 'Mondo (1996)'? (1-5): 5

Personalized Recommendations Based on Your Preferences:
48. Hoop Dreams (1994) (Predicted Rating: 5.00)
50. Star Wars (1977) (Predicted Rating: 4.97)
114. Wallace & Gromit: The Best of Aardman Animation (1996) (Predicted Rating: 4.94)
134. Citizen Kane (1941) (Predicted Rating: 4.94)
169. Wrong Trousers, The (1993) (Predicted Rating: 4.93)
172. Em

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_movies['predicted_rating'] = [p[1] for p in top_predictions]
