<a href="https://colab.research.google.com/github/ishuu9837/NETFLIX-CONTENT-RECOMMENDATION/blob/main/PROJECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
# Step 1: Load the dataset
df = pd.read_csv('/content/netflix_titles.csv')  # Update with the correct file path

In [12]:
# Step 2: Data Preprocessing
# Fill missing values with empty strings
df['director'] = df['director'].fillna('')
df['cast'] = df['cast'].fillna('')
df['listed_in'] = df['listed_in'].fillna('')

# Combine 'director', 'cast', and 'listed_in' into a single 'metadata' column for content-based filtering
df['metadata'] = df['director'] + ' ' + df['cast'] + ' ' + df['listed_in']

In [13]:
# Step 3: Text Vectorization
# Use TF-IDF to convert the 'metadata' column into a matrix of numerical values
tfidf = TfidfVectorizer(stop_words='english')  # Remove common English stop words
tfidf_matrix = tfidf.fit_transform(df['metadata'])  # Fit and transform the metadata

In [14]:
# Step 4: Compute Cosine Similarity
# Compute cosine similarity between all titles
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [15]:
# Step 5: Create a reverse mapping of title to index
indices = pd.Series(df.index, index=df['title']).drop_duplicates()

In [16]:
# Step 6: Define the recommendation function
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the title that matches the input title
    idx = indices[title]

    # Get the pairwise similarity scores of all titles with that title
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the titles based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar titles
    sim_scores = sim_scores[1:11]

    # Get the title indices of the 10 most similar titles
    title_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar titles
    return df['title'].iloc[title_indices]

In [17]:
# Step 7: Test the recommendation system with an example
print(get_recommendations('Kota Factory'))  # Replace with any title from your dataset


1038                         Dancing Angels
2982                          Find Yourself
7438       Melodies of Life - Born This Way
242                   Comedy Premium League
751                          Guru Aur Bhole
1535                  How To Ruin Christmas
1331    Five Came Back: The Reference Films
4250      Pioneers: First Women Filmmakers*
8173                              Thackeray
1093                            The Big Day
Name: title, dtype: object
