# Netflix Data Analysis - Review 2: Visualization & Interpretation

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
# Load data again (if running separately)
df = pd.read_csv('netflix_titles.csv')
df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')
df['year_added'] = df['date_added'].dt.year
df['month_added'] = df['date_added'].dt.month
df['type_encoded'] = df['type'].map({'Movie': 0, 'TV Show': 1})

In [None]:
# Step 1: Chart types
fig1 = px.pie(df, names='type', title='Distribution of Content Types')
fig1.show()

top_countries = df['country'].value_counts().nlargest(10)
fig2 = px.bar(x=top_countries.index, y=top_countries.values,
              labels={'x': 'Country', 'y': 'Content Count'},
              title='Top 10 Countries by Netflix Content')
fig2.show()

In [None]:
# Step 2: Aesthetics
plt.figure(figsize=(12,6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='YlGnBu')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Step 3: Interactive elements
fig3 = px.scatter(df, x='release_year', y='year_added', color='type',
                  title='Release Year vs. Year Added')
fig3.show()

In [None]:
# Step 4: Storytelling
movies_by_year = df[df['type'] == 'Movie']['year_added'].value_counts().sort_index()
shows_by_year = df[df['type'] == 'TV Show']['year_added'].value_counts().sort_index()

plt.figure(figsize=(10,6))
plt.plot(movies_by_year.index, movies_by_year.values, label='Movies')
plt.plot(shows_by_year.index, shows_by_year.values, label='TV Shows')
plt.title('Trend of Movies and TV Shows Added Over Years')
plt.xlabel('Year')
plt.ylabel('Count')
plt.legend()
plt.grid(True)
plt.show()