In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv('../data/movies_metadata.csv')
df['vote_average'] = df['vote_average'].astype(float)
df['runtime'] = df['runtime'].astype(float)
df['budget'] = pd.to_numeric(df['budget'], errors='coerce')

# Distribution of ratings
plt.figure(figsize=(8, 6))
sns.histplot(df['vote_average'], bins=20, kde=True)
plt.title('Distribution of Movie Ratings')
plt.xlabel('Vote Average')
plt.savefig('../images/rating_distribution.png')
plt.show()

# Correlation heatmap
corr = df[['vote_average', 'runtime', 'budget']].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.savefig('../images/correlation_heatmap.png')
plt.show()

# Boxplot for outliers
plt.figure(figsize=(8, 6))
sns.boxplot(x=df['runtime'])
plt.title('Runtime Outliers')
plt.savefig('../images/runtime_boxplot.png')
plt.show()