In [1]:

# Content-based movie recommender system using genres

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load cleaned data
df = pd.read_csv("cleaned_netflix_data.csv")

# Fill NaNs in description or genre
df['description'].fillna("", inplace=True)
df['listed_in'].fillna("", inplace=True)

# Combine genres and description for better context
df['combined_features'] = df['listed_in'] + " " + df['description']

# Vectorize combined text
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_features'])

# Compute cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Reset index and map titles
df = df.reset_index()
title_indices = pd.Series(df.index, index=df['title'].str.lower())

# Recommender function
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = title_indices.get(title.lower())
    if idx is None:
        return ["Title not found. Please check the spelling or try another."]

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices].tolist()

# Example usage
print("\nRecommended for 'Breaking Bad':")
print(get_recommendations("Breaking Bad"))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['description'].fillna("", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['listed_in'].fillna("", inplace=True)



Recommended for 'Breaking Bad':
['Extracurricular', 'Iron Ladies', 'Have You Ever Fallen in Love, Miss Jiang?', 'Sparta', 'The Mess You Leave Behind', 'The Writer', 'Love 101', 'Good Morning Call', 'The Underclass', 'Age of Rebellion']
