In [28]:
import pandas as pd

df = pd.read_csv("/content/Netflix Dataset.csv")

display(df.head())
df.info()
display(df.describe())

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,07:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",PG-13,123 min,Dramas,A brilliant group of students become card-coun...


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7789 entries, 0 to 7788
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Show_Id       7789 non-null   object
 1   Category      7789 non-null   object
 2   Title         7789 non-null   object
 3   Director      5401 non-null   object
 4   Cast          7071 non-null   object
 5   Country       7282 non-null   object
 6   Release_Date  7779 non-null   object
 7   Rating        7782 non-null   object
 8   Duration      7789 non-null   object
 9   Type          7789 non-null   object
 10  Description   7789 non-null   object
dtypes: object(11)
memory usage: 669.5+ KB


Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description
count,7789,7789,7789,5401,7071,7282,7779,7782,7789,7789,7789
unique,7787,2,7787,4050,6831,681,1565,14,216,492,7769
top,s6621,Movie,The Lost Okoroshi,"Raúl Campos, Jan Suter",David Attenborough,United States,"January 1, 2020",TV-MA,1 Season,Documentaries,Multiple women report their husbands as missin...
freq,2,5379,2,18,18,2556,118,2865,1608,334,3


## Data preprocessing and feature engineering

### Subtask:
Clean the data, handle missing values, and engineer relevant features for clustering.


**Reasoning**:
Handle missing values and convert 'Release_Date' to datetime, then extract the year.



In [29]:
# Handle missing values
# For Director, Cast, Country, and Rating, fill missing values with a placeholder
df['Director'] = df['Director'].fillna('Unknown')
df['Cast'] = df['Cast'].fillna('Unknown')
df['Country'] = df['Country'].fillna('Unknown')
df['Rating'] = df['Rating'].fillna('Unknown')

# Convert 'Release_Date' to datetime using the 'mixed' format and coerce errors
df['Release_Date'] = pd.to_datetime(df['Release_Date'], format='mixed', errors='coerce')

# Handle potential NaT values created by coercion during datetime conversion
# For simplicity, we'll fill NaT values in 'Release_Date' with a default date (e.g., the earliest date in the dataset or a placeholder)
# Let's fill with the earliest date found after successful conversion
earliest_date = df['Release_Date'].min()
df['Release_Date'] = df['Release_Date'].fillna(earliest_date)

# Extract the year from 'Release_Date'
df['Release_Year'] = df['Release_Date'].dt.year

# Display the updated dataframe info and head
df.info()
display(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7789 entries, 0 to 7788
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Show_Id       7789 non-null   object        
 1   Category      7789 non-null   object        
 2   Title         7789 non-null   object        
 3   Director      7789 non-null   object        
 4   Cast          7789 non-null   object        
 5   Country       7789 non-null   object        
 6   Release_Date  7789 non-null   datetime64[ns]
 7   Rating        7789 non-null   object        
 8   Duration      7789 non-null   object        
 9   Type          7789 non-null   object        
 10  Description   7789 non-null   object        
 11  Release_Year  7789 non-null   int32         
dtypes: datetime64[ns](1), int32(1), object(10)
memory usage: 699.9+ KB


Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_Year
0,s1,TV Show,3%,Unknown,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,2020-08-14,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020
1,s2,Movie,07:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,2016-12-23,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...,2016
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,2018-12-20,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow...",2018
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,2017-11-16,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi...",2017
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,2020-01-01,PG-13,123 min,Dramas,A brilliant group of students become card-coun...,2020


**Reasoning**:
The previous attempt to convert 'Release_Date' to datetime failed due to inconsistent date formats. Use the 'mixed' format option in `pd.to_datetime` to handle variations in the date strings.



In [30]:
# Convert 'Release_Date' to datetime using the 'mixed' format
df['Release_Date'] = pd.to_datetime(df['Release_Date'], format='mixed', errors='coerce')

# Handle potential NaT values created by coercion during datetime conversion
# For simplicity, we'll fill NaT values in 'Release_Date' with a default date (e.g., the earliest date in the dataset or a placeholder)
# Let's fill with the earliest date found after successful conversion
earliest_date = df['Release_Date'].min()
df['Release_Date'] = df['Release_Date'].fillna(earliest_date)


# Extract the year from 'Release_Date'
df['Release_Year'] = df['Release_Date'].dt.year

# Display the updated dataframe info and head
df.info()
display(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7789 entries, 0 to 7788
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Show_Id       7789 non-null   object        
 1   Category      7789 non-null   object        
 2   Title         7789 non-null   object        
 3   Director      7789 non-null   object        
 4   Cast          7789 non-null   object        
 5   Country       7789 non-null   object        
 6   Release_Date  7789 non-null   datetime64[ns]
 7   Rating        7789 non-null   object        
 8   Duration      7789 non-null   object        
 9   Type          7789 non-null   object        
 10  Description   7789 non-null   object        
 11  Release_Year  7789 non-null   int32         
dtypes: datetime64[ns](1), int32(1), object(10)
memory usage: 699.9+ KB


Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_Year
0,s1,TV Show,3%,Unknown,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,2020-08-14,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020
1,s2,Movie,07:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,2016-12-23,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...,2016
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,2018-12-20,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow...",2018
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,2017-11-16,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi...",2017
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,2020-01-01,PG-13,123 min,Dramas,A brilliant group of students become card-coun...,2020


**Reasoning**:
Process text data using TF-IDF and handle categorical features using one-hot encoding, then combine all features into a single dataframe for clustering.



In [31]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Process 'Description' using TF-IDF
tfidf = TfidfVectorizer(stop_words='english', max_features=1000) # Limiting features to 1000 for simplicity
description_features = tfidf.fit_transform(df['Description']).toarray()
description_df = pd.DataFrame(description_features, columns=[f'desc_tfidf_{i}' for i in range(description_features.shape[1])])

# Handle categorical features using one-hot encoding
categorical_cols = ['Category', 'Rating', 'Duration', 'Type']
# For simplicity, we'll only encode a subset of the 'Type' column if it has too many unique values
# Let's check the number of unique values in 'Type'
if df['Type'].nunique() > 50: # Arbitrary threshold
    # Get the top 50 types and encode them, group others into 'Other'
    top_types = df['Type'].value_counts().nlargest(50).index.tolist()
    df['Type_encoded'] = np.where(df['Type'].isin(top_types), df['Type'], 'Other')
    categorical_cols_to_encode = ['Category', 'Rating', 'Duration', 'Type_encoded']
else:
    categorical_cols_to_encode = categorical_cols

encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_features = encoder.fit_transform(df[categorical_cols_to_encode])
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(categorical_cols_to_encode))

# Combine all features
# Include 'Release_Year' as a numerical feature
clustering_df = pd.concat([df[['Release_Year']], description_df, encoded_df], axis=1)

# Display the first few rows and information of the clustering dataframe
display(clustering_df.head())
clustering_df.info()

Unnamed: 0,Release_Year,desc_tfidf_0,desc_tfidf_1,desc_tfidf_2,desc_tfidf_3,desc_tfidf_4,desc_tfidf_5,desc_tfidf_6,desc_tfidf_7,desc_tfidf_8,...,"Type_encoded_International TV Shows, Romantic TV Shows, TV Dramas","Type_encoded_International TV Shows, TV Dramas",Type_encoded_Kids' TV,"Type_encoded_Kids' TV, TV Comedies",Type_encoded_Movies,Type_encoded_Other,Type_encoded_Reality TV,Type_encoded_Stand-Up Comedy,Type_encoded_TV Comedies,Type_encoded_Thrillers
0,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,2016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7789 entries, 0 to 7788
Columns: 1285 entries, Release_Year to Type_encoded_Thrillers
dtypes: float64(1284), int32(1)
memory usage: 76.3 MB


## Clustering

### Subtask:
Apply appropriate clustering algorithms (e.g., K-Means, DBSCAN) to group similar Netflix videos based on their features.


**Reasoning**:
Apply the KMeans clustering algorithm to the `clustering_df` and add the resulting cluster labels to the original `df` DataFrame.



In [32]:
from sklearn.cluster import KMeans

# Instantiate KMeans with a reasonable number of clusters (e.g., 15)
# Setting random_state for reproducibility
kmeans = KMeans(n_clusters=15, random_state=42, n_init=10)

# Fit the KMeans model to the clustering_df DataFrame
kmeans.fit(clustering_df)

# Add the cluster labels to the original df DataFrame
df['cluster_label'] = kmeans.labels_

# Display the head of the original df to show the new column
display(df.head())

Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description,Release_Year,Type_encoded,cluster_label
0,s1,TV Show,3%,Unknown,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,2020-08-14,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020,Other,13
1,s2,Movie,07:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,2016-12-23,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...,2016,"Dramas, International Movies",3
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,2018-12-20,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow...",2018,"Horror Movies, International Movies",1
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,2017-11-16,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi...",2017,Other,1
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,2020-01-01,PG-13,123 min,Dramas,A brilliant group of students become card-coun...,2020,Dramas,4


## Recommendation system development

### Subtask:
Based on the clusters, develop a recommendation system. This could involve recommending other videos within the same cluster as a watched video, or using cluster centroids to find similar videos.


**Reasoning**:
Define a function `recommend_videos` that takes a `Show_Id` as input, finds its cluster, filters the dataframe to that cluster, and returns a few random recommendations.



In [33]:
def recommend_videos(show_id, df, n_recommendations=5):
    """
    Recommends videos from the same cluster as the input video.

    Args:
        show_id: The Show_Id of the video to get recommendations for.
        df: The DataFrame containing video information and cluster labels.
        n_recommendations: The number of recommendations to return.

    Returns:
        A DataFrame containing recommended videos (Title and Category),
        or a message if the show is not found.
    """
    # Find the row for the input Show_Id
    show_row = df[df['Show_Id'] == show_id]

    if show_row.empty:
        return f"Show with Show_Id '{show_id}' not found."

    # Get the cluster label
    cluster_label = show_row['cluster_label'].iloc[0]

    # Filter videos in the same cluster, excluding the input video
    same_cluster_videos = df[(df['cluster_label'] == cluster_label) & (df['Show_Id'] != show_id)]

    # Select random recommendations
    if len(same_cluster_videos) < n_recommendations:
        recommendations = same_cluster_videos
    else:
        recommendations = same_cluster_videos.sample(n=n_recommendations, random_state=42) # Use random_state for reproducibility

    return recommendations[['Title', 'Category']]

# Example usage: Get recommendations for a show
example_show_id = 's1'  # Replace with a valid Show_Id from your dataset
recommendations = recommend_videos(example_show_id, df)
display(recommendations)

Unnamed: 0,Title,Category
5490,Selling Sunset,TV Show
3967,Marvel's Jessica Jones,TV Show
1192,Can You Hear Me?,TV Show
1341,Chip and Potato,TV Show
1413,Cleo & Cuquin,TV Show


## Evaluation

### Subtask:
Evaluate the performance of the clustering and recommendation system using relevant metrics.


**Reasoning**:
Calculate and display the distribution of videos across the created clusters and display the top occurring video categories within a sample of clusters.



In [34]:
# 1. Calculate and display the distribution of videos across the created clusters.
cluster_distribution = df['cluster_label'].value_counts().sort_index()
print("Distribution of videos across clusters:")
display(cluster_distribution)

# 2. For a sample of clusters, display the top occurring video categories within each cluster.
# Let's sample a few clusters, e.g., the first 5 clusters
sample_clusters = cluster_distribution.head(5).index.tolist()

print("\nTop video categories in sample clusters:")
for cluster in sample_clusters:
    print(f"\nCluster {cluster}:")
    # Get videos in the current cluster
    cluster_videos = df[df['cluster_label'] == cluster]
    # Calculate the frequency of each Category within the cluster
    category_counts = cluster_videos['Category'].value_counts()
    # Display the top categories (e.g., top 3)
    display(category_counts.head(3))

Distribution of videos across clusters:


Unnamed: 0_level_0,count
cluster_label,Unnamed: 1_level_1
0,584
1,745
2,654
3,398
4,755
5,395
6,42
7,568
8,557
9,303



Top video categories in sample clusters:

Cluster 0:


Unnamed: 0_level_0,count
Category,Unnamed: 1_level_1
Movie,584



Cluster 1:


Unnamed: 0_level_0,count
Category,Unnamed: 1_level_1
Movie,745



Cluster 2:


Unnamed: 0_level_0,count
Category,Unnamed: 1_level_1
TV Show,654



Cluster 3:


Unnamed: 0_level_0,count
Category,Unnamed: 1_level_1
Movie,335
TV Show,63



Cluster 4:


Unnamed: 0_level_0,count
Category,Unnamed: 1_level_1
Movie,755


**Reasoning**:
Discuss the limitations of the current recommendation system based on clustering and suggest potential areas for improvement or further evaluation metrics.



In [35]:
print("Limitations of the current clustering-based recommendation system:")
print("- The recommendation system is based solely on content similarity derived from clustering. It does not take into account user viewing history, preferences, or ratings.")
print("- The quality of recommendations is directly dependent on the effectiveness of the clustering. If clusters are not well-defined or do not capture meaningful relationships between videos, the recommendations will be poor.")
print("- The current approach recommends videos within the same cluster. This might lead to a lack of diversity in recommendations, potentially showing users many similar items they might not be interested in after the first few.")
print("- The 'Unknown' values filled in during preprocessing for Director, Cast, Country, etc., might introduce noise into the clustering process.")
print("- The TF-IDF vectorization with a limited number of features (1000) might not fully capture the nuances of the descriptions.")
print("- The one-hot encoding of 'Type' by grouping into 'Other' for less frequent types might lose valuable information.")
print("- The number of clusters (15) was chosen arbitrarily and may not be optimal for this dataset.")


print("\nPotential areas for improvement and further evaluation metrics:")
print("- Incorporate user-based filtering or collaborative filtering techniques to leverage user interaction data.")
print("- Explore different clustering algorithms and evaluate their performance using metrics like Silhouette score or Davies-Bouldin index.")
print("- Experiment with different feature engineering techniques, such as using word embeddings (e.g., Word2Vec, GloVe) for descriptions or more sophisticated methods for handling categorical features.")
print("- Use dimensionality reduction techniques (e.g., PCA) before clustering to potentially improve performance and reduce noise.")
print("- Implement methods to evaluate the diversity and novelty of recommendations.")
print("- Conduct A/B testing or user studies to get feedback on the quality of recommendations.")
print("- Explore techniques for dynamic clustering or recommendations that adapt to user behavior over time.")
print("- Evaluate cluster stability and robustness.")
print("- Consider using external data sources or knowledge graphs to enrich video features.")

Limitations of the current clustering-based recommendation system:
- The recommendation system is based solely on content similarity derived from clustering. It does not take into account user viewing history, preferences, or ratings.
- The quality of recommendations is directly dependent on the effectiveness of the clustering. If clusters are not well-defined or do not capture meaningful relationships between videos, the recommendations will be poor.
- The current approach recommends videos within the same cluster. This might lead to a lack of diversity in recommendations, potentially showing users many similar items they might not be interested in after the first few.
- The 'Unknown' values filled in during preprocessing for Director, Cast, Country, etc., might introduce noise into the clustering process.
- The TF-IDF vectorization with a limited number of features (1000) might not fully capture the nuances of the descriptions.
- The one-hot encoding of 'Type' by grouping into 'Other

## Summary:

### Data Analysis Key Findings

*   The dataset contains 7789 entries with 11 columns and includes missing values in `Director`, `Cast`, `Country`, `Rating`, and `Release_Date`.
*   The `Release_Date` column was successfully converted to datetime format, and a `Release_Year` column was extracted.
*   Missing categorical and director/cast/country values were filled with 'Unknown'.
*   TF-IDF vectorization was applied to the `Description` column, and one-hot encoding was applied to selected categorical features (`Category`, `Rating`, `Duration`, and a grouped `Type`).
*   KMeans clustering with 15 clusters was applied to the engineered features, and cluster labels were added to the original dataframe.
*   A function was created to recommend videos from the same cluster as a given input video.
*   The distribution of videos across the 15 clusters varies, with some clusters containing more videos than others.
*   Analysis of sample clusters shows that some clusters are dominated by a single category (e.g., "Movie"), while others contain a mix of categories.

### Insights or Next Steps

*   The current content-based clustering approach has limitations as it doesn't use user data. Future work should explore integrating collaborative filtering techniques for more personalized recommendations.
*   Evaluate the clustering performance using metrics like Silhouette score and explore different numbers of clusters and feature engineering methods to potentially improve the quality of the clusters and thus the recommendations.


In [41]:
import pandas as pd
import pickle
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# -------------------------------
# Load dataset
# -------------------------------
# Make sure your dataset has a "title" column and some text column like "tags" (genres, description, etc.)
# Correcting the file path and name
movies_df = pd.read_csv("/content/Netflix Dataset.csv")

# If no "tags" column exists, create one by combining useful columns (adjust as per your dataset)
# Based on the original dataframe structure, the relevant columns are 'Title', 'Type', and 'Description'.
if "tags" not in movies_df.columns:
    # Combine 'Title', 'Type', and 'Description' columns into 'tags'
    movies_df["tags"] = (
        movies_df["Title"].astype(str) + " " +
        movies_df["Type"].astype(str) + " " +
        movies_df["Description"].astype(str)
    )

# -------------------------------
# Feature extraction
# -------------------------------
cv = CountVectorizer(max_features=5000, stop_words="english")
vectors = cv.fit_transform(movies_df["tags"]).toarray()

# Compute similarity matrix
similarity = cosine_similarity(vectors)

# -------------------------------
# Save model + dataframe
# -------------------------------
# Saving the similarity matrix and the movies_df with relevant columns for recommendation
with open("netflix_recommender.pkl", "wb") as f:
    # We can save the similarity matrix and the original dataframe (or a subset)
    # that contains the Title and other relevant info to look up recommendations later.
    # Saving the dataframe with Title and Category for easy lookup.
    pickle.dump((movies_df[['Show_Id', 'Title', 'Category']], similarity), f)


print("✅ Pickle file saved successfully: netflix_recommender.pkl")

✅ Pickle file saved successfully: netflix_recommender.pkl
