In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set a style for better visualizations
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

# --- 1. DATA LOADING AND PREPARATION ---

# Load the dataset using the accessible filename
try:
    df = pd.read_csv("/content/Netflix Dataset.csv")
except FileNotFoundError:
    print("Error: The file 'Netflix Dataset (4).csv' was not found.")
    # You may need to adjust the path if running outside this environment
    exit()

# Rename columns for consistency and drop duplicates
df.columns = df.columns.str.replace(' ', '_')
df = df.rename(columns={'Category': 'Content_Type', 'Type': 'Genres'})
df.drop_duplicates(inplace=True)

# Handle Missing Values: Fill Country with 'Missing' and Genres with 'Unknown'
df['Country'] = df['Country'].fillna('Missing')
df['Genres'] = df['Genres'].fillna('Unknown')

# Extract Release Year: Convert Release_Date to datetime and extract the year
df['Release_Date'] = pd.to_datetime(df['Release_Date'], errors='coerce')
df['Date_Added_Year'] = df['Release_Date'].dt.year
df.dropna(subset=['Date_Added_Year'], inplace=True)
df['Date_Added_Year'] = df['Date_Added_Year'].astype(int)

# --- 2. ANALYSIS 1: CONTENT DISTRIBUTION (MOVIES VS. TV SHOWS) OVER TIME ---

# Group by year and content type, then pivot for plotting
df_type_trend = df.groupby(['Date_Added_Year', 'Content_Type']).size().reset_index(name='Count')
type_pivot = df_type_trend.pivot(index='Date_Added_Year', columns='Content_Type', values='Count').fillna(0)

# --- 3. ANALYSIS 2: GENRE POPULARITY OVER TIME ---

# Split and 'explode' the Genres column for accurate counting
df_genre_expanded = df.copy()
df_genre_expanded['Genre'] = df_genre_expanded['Genres'].str.split(', ')
df_genre_exploded = df_genre_expanded.explode('Genre')
df_genre_exploded['Genre'] = df_genre_exploded['Genre'].str.strip()

# Identify top 10 genres (excluding 'Unknown') for focused trend analysis
top_10_genres = df_genre_exploded['Genre'].value_counts().nlargest(11).index
top_10_genres = top_10_genres[top_10_genres != 'Unknown'][:10]
df_top_genres_trend = df_genre_exploded[df_genre_exploded['Genre'].isin(top_10_genres)]
genre_trend = df_top_genres_trend.groupby(['Date_Added_Year', 'Genre']).size().reset_index(name='Count')

# --- 4. ANALYSIS 3: COUNTRY CONTRIBUTION ---

# Split and 'explode' the Country column
df_country_expanded = df.copy()
df_country_expanded['Origin_Country'] = df_country_expanded['Country'].str.split(', ')
df_country_exploded = df_country_expanded.explode('Origin_Country')
df_country_exploded['Origin_Country'] = df_country_exploded['Origin_Country'].str.strip()
df_country_exploded = df_country_exploded[df_country_exploded['Origin_Country'] != 'Missing']

# Count the total contribution of each country
country_contribution = df_country_exploded['Origin_Country'].value_counts().nlargest(10)

# --- 5. VISUALIZATION (Saving to files) ---

# Plot 1: Content Type Trend
plt.figure(figsize=(14, 7))
type_pivot.plot(kind='bar', stacked=True, ax=plt.gca(), colormap='coolwarm')
plt.title('Evolution of Content Distribution (Movies vs. TV Shows) Added to Netflix')
plt.xlabel('Year Content Was Added')
plt.ylabel('Number of Titles')
plt.xticks(rotation=45, ha='right')
plt.legend(title='Content Type')
plt.tight_layout()
plt.savefig('content_type_trend.png')
plt.close()

# Plot 2: Genre Popularity Trend
plt.figure(figsize=(14, 7))
sns.lineplot(data=genre_trend, x='Date_Added_Year', y='Count', hue='Genre', marker='o')
plt.title('Trend of Top 10 Genre Additions Over Time')
plt.xlabel('Year Content Was Added')
plt.ylabel('Number of Titles Added')
plt.legend(title='Genre', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.savefig('genre_popularity_trend.png')
plt.close()

# Plot 3: Country Contribution (FIXED TO REMOVE FUTUREWARNING)
plt.figure(figsize=(12, 7))
sns.barplot(
    x=country_contribution.index,
    y=country_contribution.values,
    # Fix: Assigning the index to 'hue' and setting legend=False resolves the FutureWarning
    hue=country_contribution.index,
    palette='viridis',
    legend=False
)
plt.title('Top 10 Contributing Countries to Netflix Catalog')
plt.xlabel('Country')
plt.ylabel('Number of Titles')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('country_contribution.png')
plt.close()