In [None]:
import pandas as pd
import json 
import requests
import statistics
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('movies_full_list.csv')

In [None]:
df

In [None]:
# 2. Basic brand summaries
avg = df.groupby("Franchise")["Rotten Tomatoes"].mean()
total = df.groupby("Franchise")["Rotten Tomatoes"].sum()
count = df["Franchise"].value_counts()

print("\n=== Number of Movies per Franchise ===")
print(count)

print("=== Average Rotten Tomatoes Rating by Franchise ===")
print(avg.apply(lambda x: f"{x:,.0f}%"))


# 3. Top & Bottom 3 for each brand
top3_dc = df[df["Franchise"] == "DC"].sort_values("Rotten Tomatoes", ascending=False).head(3)
bottom3_dc = df[df["Franchise"] == "DC"].sort_values("Rotten Tomatoes", ascending=True).head(3)
top3_marvel = df[df["Franchise"] == "Marvel"].sort_values("Rotten Tomatoes", ascending=False).head(3)
bottom3_marvel = df[df["Franchise"] == "Marvel"].sort_values("Rotten Tomatoes", ascending=True).head(3)

print("\n=== Top 3 DC Movies ===")
print(top3_dc[["Title", "Year", "Rotten Tomatoes"]])

print("\n=== Bottom 3 DC Movies ===")
print(bottom3_dc[["Title", "Year", "Rotten Tomatoes"]])

print("\n=== Top 3 Marvel Movies ===")
print(top3_marvel[["Title", "Year", "Rotten Tomatoes"]])

print("\n=== Bottom 3 Marvel Movies ===")
print(bottom3_marvel[["Title", "Year", "Rotten Tomatoes"]])

In [None]:
pd.set_option('display.float_format', '{:.2f}'.format)
df.describe().T

In [None]:
dfc = pd.DataFrame(df)
correlation = df['Rotten Tomatoes'].corr(df['Box Office'])
print("Correlation:", correlation) ## moderate relationship between rotten tomatoes score and box office earnings for all Movies

## No relationship: |r| < 0.25

## Weak relationship: 0.25 ≤ |r| < 0.5

## Moderate relationship: 0.5 ≤ |r| < 0.75

## Strong relationship: |r| ≥ 0.75.

In [None]:
marvel_df = df[df["Franchise"] == "Marvel"]

dc_df = df[df["Franchise"] == "DC"]

marvel_corr = marvel_df["Rotten Tomatoes"].corr(marvel_df["Box Office"])
print("Marvel correlation:", marvel_corr)

## moderate relationship between rotten tomatoes score and box office earnings for marvel Movies

dc_corr = dc_df["Rotten Tomatoes"].corr(dc_df["Box Office"])
print("DC correlation:", dc_corr)

## moderate relationship between rotten tomatoes score and box office earnings for dc Movies

In [None]:
means = {
    'marvel_movies': marvel_df['Rotten Tomatoes'].mean(),
    'dc_movies': dc_df['Rotten Tomatoes'].mean(),
    'all_movies': dfc['Rotten Tomatoes'].mean()
}

mean_df = pd.DataFrame(list(means.items()), columns=['Dataset', 'Mean'])

plt.bar(mean_df['Dataset'], mean_df['Mean'], color='blue')
plt.title('Average Rotten Tomatoes Score Comparison')
plt.ylabel('Rotten Tomatoes Rating')
plt.xlabel('Franchises')
plt.ylim(0, 75)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
marvel_df.plot.scatter(x= 'Box Office', y ='Rotten Tomatoes', color='red', marker='o')
plt.xlabel('Marvel Box Office Earnings in Millions')
plt.ylabel('Rotten Tomatoes Rating out of 100')
plt.title('Rotten Tomatoes Ratings vs Box Office Earnings Marvel')
plt.ticklabel_format(style='plain', axis='x')
plt.xticks(rotation = 45)
plt.show()

In [None]:
dc_df.plot.scatter(x= 'Box Office', y ='Rotten Tomatoes', color='black', marker='o')
plt.xlabel('DC Box Office Earnings in Millions')
plt.ylabel('Rotten Tomatoes Rating out of 100')
plt.title('Rotten Tomatoes Ratings vs Box Office Earnings DC')
plt.ticklabel_format(style='plain', axis='x')
plt.xticks(rotation = 45)
plt.show()