loading the essential libraries. pandas and numpy handle data processing, plotly handles visualizations, and LightFM is the core recommendation engine. ast parses the stringified JSON fields in datasets.

In [None]:
!pip install gradio plotly lightfm

Collecting gradio
  Downloading gradio-5.20.0-py3-none-any.whl.metadata (16 kB)
Collecting lightfm
  Using cached lightfm-1.17.tar.gz (316 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.2 (from gradio)
  Downloading gradio_client-1.7.2-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=

In [None]:
# ============================
# 📚 Import Libraries
# ============================
import pandas as pd
import numpy as np
import ast
import os
import json
import plotly.express as px
from sklearn.preprocessing import MultiLabelBinarizer
from scipy.sparse import csr_matrix
from lightfm import LightFM
import plotly.io as pio

# Ensure plotly works in Colab
pio.renderers.default = 'colab'


In [None]:
# ============================
# 📥 Load and Preprocess Data
# ============================
print("📥 Loading Data...")

movies = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Movie Recommender System/tmdb_5000_movies.csv')
credits = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Movie Recommender System/tmdb_5000_credits.csv')

movies = movies.merge(credits, left_on='id', right_on='movie_id').drop('movie_id', axis=1)
movies['title'] = movies['title_x'].fillna(movies['title_y'])
movies = movies.drop(columns=['title_x', 'title_y'])

def parse_genres(genre_str):
    try:
        genres = [g['name'].strip().lower() for g in ast.literal_eval(genre_str)]
        return genres if genres else ['unknown']
    except (ValueError, SyntaxError):
        return ['unknown']

movies['genres'] = movies['genres'].apply(parse_genres)
movies = movies[movies['genres'].apply(lambda g: 'unknown' not in g)]
movies['title'] = movies['title'].str.strip()
movies = movies.drop_duplicates(subset=['id'])

print("✅ Data Loaded & Preprocessed")

# ============================
# 🔗 Genre Features Matrix
# ============================
mlb = MultiLabelBinarizer()
genre_features = pd.DataFrame(mlb.fit_transform(movies['genres']), columns=mlb.classes_, index=movies['id'])

print("✅ Genre Features Matrix Created")

# ============================
# 👤 Simulated User Interactions
# ============================
np.random.seed(42)
user_movie_interactions = []
num_users = 100

for user_id in range(num_users):
    watched_movies = np.random.choice(movies['id'].values, size=np.random.randint(5, 20), replace=False)
    for movie_id in watched_movies:
        user_movie_interactions.append((user_id, movie_id, 1))

interactions_df = pd.DataFrame(user_movie_interactions, columns=['user_id', 'item_id', 'rating'])
movie_id_to_idx = {movie_id: idx for idx, movie_id in enumerate(movies['id'])}
movies['matrix_index'] = movies['id'].map(movie_id_to_idx)
interactions_df['item_id'] = interactions_df['item_id'].map(movie_id_to_idx)

print("✅ Simulated User Interactions Created")

# ============================
# 📊 Interaction & Feature Matrices
# ============================
interaction_matrix = csr_matrix(
    (interactions_df['rating'], (interactions_df['user_id'], interactions_df['item_id'])),
    shape=(num_users, len(movies))
)
item_features = csr_matrix(genre_features.loc[movies['id']].values)

print("✅ Matrices Ready")

# ============================
# 🧠 Train LightFM Model
# ============================
print("🧠 Training LightFM Model...")
model = LightFM(loss='warp')
model.fit(interaction_matrix, item_features=item_features, epochs=30, num_threads=4)
print("✅ Model Trained")

# ============================
# 💾 Persistent Session Tracking
# ============================
session_file = "user_session_memory.json"

def load_session():
    if os.path.exists(session_file):
        with open(session_file, 'r') as f:
            return json.load(f)
    return {}

def save_session(session_data):
    with open(session_file, 'w') as f:
        json.dump(session_data, f)

session_memory = load_session()

# ============================
# ⭐ Trending Movies
# ============================
def recommend_trending_movies_by_genre(genre=None, top_n=10):
    if genre:
        mask = movies['genres'].apply(lambda g: genre.lower() in g)
        genre_movies = movies[mask]
    else:
        genre_movies = movies

    genre_movies['popularity'] = genre_movies['matrix_index'].map(interactions_df['item_id'].value_counts()).fillna(0)
    trending = genre_movies.sort_values('popularity', ascending=False).head(top_n)

    print(f"\n🔥 Trending Movies {'in ' + genre.capitalize() if genre else 'Overall'}:")

    display_df = trending[['title', 'genres', 'popularity']].copy()
    display_df['genres'] = display_df['genres'].apply(lambda g: ', '.join(g))

    print(display_df.to_string(index=False))

# ============================
# 📊 Visualization
# ============================
def visualize_recommendations(user_id, recommended_movies, scores):
    fig = px.bar(
        x=scores,
        y=recommended_movies['title'],
        orientation='h',
        color=recommended_movies['genres'].apply(lambda g: ', '.join(g)),
        title=f"Top {len(recommended_movies)} Recommendations for User {user_id}",
    )
    fig.show()

# ============================
# 🎯 Recommend Movies for User
# ============================
def recommend_for_user(user_id, genre_filter=None, num_recommendations=10):
    if genre_filter is None and str(user_id) in session_memory:
        print(f"\n✅ Using Cached Recommendations for User {user_id}:")
        for idx, title in enumerate(session_memory[str(user_id)], start=1):
            print(f"{idx}. {title}")
        return

    scores = model.predict(user_id, np.arange(len(movies)), item_features=item_features)

    if genre_filter:
        mask = movies['genres'].apply(lambda g: genre_filter.lower() in g)
        scores = np.where(mask, scores, -np.inf)

    recommended_indices = np.argsort(-scores)[:num_recommendations]
    recommended_movies = movies.iloc[recommended_indices]

    session_memory[str(user_id)] = recommended_movies['title'].tolist()
    save_session(session_memory)

    print(f"\n🎯 Recommended Movies for User {user_id}:")
    display_df = recommended_movies[['title', 'genres']].copy()
    display_df['genres'] = display_df['genres'].apply(lambda g: ', '.join(g))
    print(display_df.to_string(index=False))

    visualize_recommendations(user_id, recommended_movies, scores[recommended_indices])

# ============================
# 🎬 Main Interactive Loop
# ============================
print("\n🚀 Movie Recommender Ready!")
print(f"Available User IDs: [0 - {num_users-1}]")

# Ask for trending genre
genre = input("\nEnter a genre for trending movies (or press Enter to show all): ").strip()
recommend_trending_movies_by_genre(genre if genre else None)

# Ask for user ID
user_input = input("\nEnter User ID for personalized recommendations (0-99) or press Enter to skip: ").strip()

if user_input:
    user_id = int(user_input)

    # Ask for genre filter
    genre_filter = input("Filter recommendations by genre (or press Enter for all genres): ").strip()
    recommend_for_user(user_id, genre_filter if genre_filter else None)
else:
    print("✅ Skipped Personalized Recommendations")

print("\n✅ Session Complete!")


📥 Loading Data...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



✅ Data Loaded & Preprocessed
✅ Genre Features Matrix Created
✅ Simulated User Interactions Created
✅ Matrices Ready
🧠 Training LightFM Model...
✅ Model Trained

🚀 Movie Recommender Ready!
Available User IDs: [0 - 99]

Enter a genre for trending movies (or press Enter to show all): comey

🔥 Trending Movies in Comey:
Empty DataFrame
Columns: [title, genres, popularity]
Index: []

Enter User ID for personalized recommendations (0-99) or press Enter to skip: 1
Filter recommendations by genre (or press Enter for all genres): action

🎯 Recommended Movies for User 1:
                           title                                                                 genres
       Jimmy Neutron: Boy Genius action, adventure, animation, comedy, family, fantasy, science fiction
Teenage Mutant Ninja Turtles III            action, adventure, comedy, family, fantasy, science fiction
                    Thunderbirds            action, adventure, comedy, family, fantasy, science fiction
                 


✅ Session Complete!
