In [5]:
# Step 1: Import Libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 2: Load Dataset from GitHub (replace with your GitHub raw link)
url = 'https://raw.githubusercontent.com/Victoria-Aholu/Netflix-Movie-Recommendation-System/refs/heads/main/netflix_titles.csv?token=GHSAT0AAAAAADFSEI7VFVLB7FV6BJ7QQXSG2CLMAEQ'
df = pd.read_csv(url)

# Step 3: Basic Cleaning
df.dropna(subset=['description'], inplace=True)
df.reset_index(drop=True, inplace=True)

# Step 4: Vectorize Descriptions using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

# Step 5: Compute Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 6: Index Mapping
indices = pd.Series(df.index, index=df['title']).drop_duplicates()

# Step 7: Recommendation Function
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices.get(title)
    if idx is None:
        return "Title not found"

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]
    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices]

# Step 8: Try It!
movie_title = "Sherlock Holmes"
print(f"Recommendations for '{movie_title}':")
print(get_recommendations(movie_title))


Recommendations for 'Sherlock Holmes':
8076       Spy Kids 3: Game Over
3836           In Family I Trust
4997                 Borderliner
8032                   Skiptrace
7494    Monster High: Fright On!
Name: title, dtype: object
