# Netflix Dataset Analysis Project

In [None]:

# Netflix Dataset Analysis

## 1. Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv("netflix_titles.csv")
df.head()

## 2. Data Cleaning
# Check null values
df.isnull().sum()

# Drop duplicates
df = df.drop_duplicates()

# Fill missing director with 'Unknown'
df['director'] = df['director'].fillna('Unknown')

## 3. Movies vs TV Shows Count
df['type'].value_counts().plot(kind='bar', title="Movies vs TV Shows")

## 4. Top Genres
from collections import Counter

genres = []
for s in df['listed_in'].dropna():
    genres.extend([g.strip() for g in s.split(',')])

genre_counts = Counter(genres).most_common(10)
genre_df = pd.DataFrame(genre_counts, columns=['Genre', 'Count'])

sns.barplot(x="Count", y="Genre", data=genre_df)
plt.title("Top 10 Genres on Netflix")

## 5. Content Added by Release Year
plt.figure(figsize=(8,5))
sns.countplot(y="release_year", data=df, order=df['release_year'].value_counts().index)
plt.title("Content Released per Year")

## 6. Ratings Distribution
plt.figure(figsize=(6,4))
sns.countplot(y="rating", data=df, order=df['rating'].value_counts().index)
plt.title("Distribution of Ratings")
