In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Load data with 'duration' column
df = pd.read_csv(r'C:\Users\camro\OneDrive\Github Project1\data\disney_plus_titles.csv')

# Handle missing and date values
df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')
df['director'].fillna('Unknown', inplace=True)
df['cast'].fillna('Unknown', inplace=True)
df['country'].fillna('Unknown', inplace=True)
df['rating'].fillna('Not Rated', inplace=True)

# Create new columns for duration and seasons
df['duration_minutes'] = df['duration'].apply(lambda x: int(x.split()[0]) if 'min' in str(x) else None)
df['seasons'] = df['duration'].apply(lambda x: int(x.split()[0]) if 'Season' in str(x) else None)

# Drop original 'duration' column
df.drop(columns=['duration'], inplace=True)

# Quick look at the data
df.head()


In [None]:

df.describe(include='all')


In [None]:

sns.countplot(data=df, x='type', palette='Set2')
plt.title('Distribution of Content Type')
plt.xlabel('Type')
plt.ylabel('Count')
plt.show()


In [None]:

rating_counts = df['rating'].value_counts().head(10)
sns.barplot(y=rating_counts.index, x=rating_counts.values, palette='rocket')
plt.title('Top 10 Content Ratings')
plt.xlabel('Count')
plt.ylabel('Rating')
plt.show()


In [None]:

top_countries = df['country'].value_counts().head(10)
sns.barplot(y=top_countries.index, x=top_countries.values, palette='viridis')
plt.title('Top 10 Countries by Content Count')
plt.xlabel('Number of Titles')
plt.ylabel('Country')
plt.show()


In [None]:

df['year_added'] = df['date_added'].dt.year
df['year_added'].value_counts().sort_index().plot(kind='line', marker='o', color='teal')
plt.title('Content Added Over Years')
plt.xlabel('Year')
plt.ylabel('Number of Additions')
plt.grid(True)
plt.show()
