# 📊 Netflix Data Analysis & Forecasting

This notebook explores Netflix's content trends, genre popularity, and forecasts future trends using time-series analysis.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from statsmodels.tsa.arima.model import ARIMA
import networkx as nx

# Load dataset
df = pd.read_csv("netflix_titles.csv")

# Display first few rows
df.head()


## 1️⃣ Data Cleaning

- Checking for missing values
- Handling null values
- Converting date columns

In [None]:

# Checking for missing values
df.isnull().sum()

# Filling missing values
df['country'].fillna("Unknown", inplace=True)
df['director'].fillna("Unknown", inplace=True)
df['cast'].fillna("Unknown", inplace=True)
df['release_year'] = pd.to_numeric(df['release_year'], errors='coerce')

# Convert date_added to datetime format
df['date_added'] = pd.to_datetime(df['date_added'])


## 2️⃣ TV Shows vs. Movies per Country

In [None]:

plt.figure(figsize=(12, 6))
sns.countplot(data=df, x="type", order=df["type"].value_counts().index, palette="coolwarm")
plt.title("Distribution of TV Shows vs. Movies")
plt.show()


## 3️⃣ Growth of Netflix Content Over Time

In [None]:

content_growth = df.groupby("release_year").size()
plt.figure(figsize=(12, 6))
plt.plot(content_growth.index, content_growth.values, marker="o", color="royalblue")
plt.title("Growth of Netflix Content Over Years")
plt.xlabel("Year")
plt.ylabel("Number of Titles")
plt.grid(True)
plt.show()


## 4️⃣ Global Genre Distribution

In [None]:

genres = df['listed_in'].str.split(', ').explode().value_counts()

plt.figure(figsize=(12, 6))
genres.head(10).plot(kind="bar", color="royalblue")
plt.title("Top 10 Most Popular Genres")
plt.xlabel("Genre")
plt.ylabel("Number of Titles")
plt.xticks(rotation=45)
plt.show()


## 5️⃣ Director & Actor Network Analysis

In [None]:

G = nx.Graph()
for _, row in df.iterrows():
    director = row["director"]
    actors = row["cast"].split(", ") if row["cast"] != "Unknown" else []
    for actor in actors:
        G.add_edge(director, actor)

plt.figure(figsize=(12, 6))
nx.draw(G, node_size=20, edge_color="gray", alpha=0.5)
plt.title("Director-Actor Network Graph")
plt.show()


## 6️⃣ Forecasting Netflix Content Growth

In [None]:

model = ARIMA(content_growth, order=(5,1,0))
model_fit = model.fit()
forecast_years = range(content_growth.index[-1] + 1, content_growth.index[-1] + 6)
forecast_values = model_fit.forecast(steps=5)

plt.figure(figsize=(12, 6))
plt.plot(content_growth.index, content_growth.values, marker="o", label="Actual Data", color="royalblue")
plt.plot(forecast_years, forecast_values, marker="o", linestyle="dashed", label="Forecast", color="red")
plt.title("Predicted Growth of Netflix Content (Next 5 Years)")
plt.legend()
plt.show()


## 7️⃣ Forecasting Genre Trends

In [None]:

genre_trends = df[['release_year', 'listed_in']].dropna().explode('listed_in').groupby(['release_year', 'listed_in']).size().unstack().fillna(0)
top_genres = genre_trends.sum().nlargest(5).index
genre_trends_top = genre_trends[top_genres]

plt.figure(figsize=(12, 6))
for genre in top_genres:
    model = ARIMA(genre_trends_top[genre], order=(5,1,0))
    model_fit = model.fit()
    forecast_values = model_fit.forecast(steps=5)
    forecast_years = range(genre_trends_top.index[-1] + 1, genre_trends_top.index[-1] + 6)
    plt.plot(genre_trends_top.index, genre_trends_top[genre], linestyle="-", label=f"Actual {genre}")
    plt.plot(forecast_years, forecast_values, linestyle="dashed", label=f"Forecast {genre}")

plt.title("Predicted Genre Popularity Trends (Next 5 Years)")
plt.legend()
plt.show()


## 📌 Conclusion
- Netflix has seen **massive content growth after 2015**.
- **Movies dominate in some countries**, while TV Shows are rising.
- **Drama & Documentaries remain popular**, with future shifts in genre demand.
- Predictions indicate **steady content expansion**, with some genre shifts.
