In [27]:
from pathlib import Path
import pandas as pd
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [28]:
# Filepath
filepath = "Attributes_DataFrame.csv"

# Read the CSV file, converting relevant columns to correct data types
df = pd.read_csv(filepath, dtype={'Domestic': float, 'International': float, 'Budget': float})

# Drop rows where the `Budget` column is null
df = df.dropna(subset=['Budget'])

# Create new columns using assign and store in a new variable
df_modified = df.assign(
    num_genre=df['Genres'].astype(str).str.split(';').str.len(),
    #main_genre=df['Genres'].astype(str).str.split(';').str[0],

    main_genre=df['Genres'].astype(str).str.split(';').apply(lambda x: np.random.choice(x)),
    
    total_revenue=df['Domestic'].fillna(0) + df['International'].fillna(0),
    title_without_year=df['Title'].astype(str).str.replace(r"\([^()]*\)", "", regex=True),
    year=pd.to_numeric(df['Title'].astype(str).str.extract(r'\((\d+)\)')[0])  # Extract and convert to numeric
)

# Convert financial columns to integers
df_modified['Domestic'] = df_modified['Domestic'].fillna(0).astype(int)
df_modified['International'] = df_modified['International'].fillna(0).astype(int)
df_modified['Budget'] = df_modified['Budget'].fillna(0).astype(int)
df_modified['total_revenue'] = df_modified['total_revenue'].astype(int)

# Create new columns for currency display
df_modified['Budget_$'] = df_modified['Budget'].apply(lambda x: f'${x:,.0f}')
df_modified['Domestic_$'] = df_modified['Domestic'].apply(lambda x: f'${x:,.0f}')
df_modified['International_$'] = df_modified['International'].apply(lambda x: f'${x:,.0f}')
df_modified['total_revenue_$'] = df_modified['total_revenue'].apply(lambda x: f'${x:,.0f}')

# Preprocessing steps with method chaining on df_modified
df_modified = (
    df_modified
    .loc[~((df_modified['num_genre'] > 1) & (df_modified['main_genre'] == 'Drama'))]  # Filter after creating new columns
    .sort_values(by='total_revenue', ascending=False)
    [['title_without_year', 'year', 'main_genre', 'MPAA-Rating', 'Runtime',
      'Distributor', 'Budget_$', 'Domestic_$', 'International_$', 'total_revenue_$']]  # Display currency columns
)



In [30]:
df_modified.shape

(2008, 10)

In [42]:
df_modified["Distributor"].value_counts().head(20)

Distributor
Warner Bros.                           268
Universal Pictures                     255
Twentieth Century Fox                  210
Sony Pictures Entertainment (SPE)      187
Paramount Pictures                     177
Walt Disney Studios Motion Pictures    176
Lionsgate                              115
Screen Gems                             55
New Line Cinema                         48
Fox Searchlight Pictures                38
Focus Features                          36
DreamWorks Distribution                 35
Metro-Goldwyn-Mayer (MGM)               35
Miramax                                 34
DreamWorks                              28
Relativity Media                        25
Dimension Films                         24
Revolution Studios                      23
STX Entertainment                       20
Open Road Films (II)                    19
Name: count, dtype: int64

In [32]:
df_modified.head(10)

Unnamed: 0,title_without_year,year,main_genre,MPAA-Rating,Runtime,Distributor,Budget_$,Domestic_$,International_$,total_revenue_$
187,Avengers: Endgame,2019,Sci-Fi,PG-13,181,Walt Disney Studios Motion Pictures,"$356,000,000","$858,373,000","$1,939,128,328","$2,797,501,328"
103,Avatar,2009,Sci-Fi,PG-13,162,Twentieth Century Fox,"$237,000,000","$749,766,139","$1,993,811,448","$2,743,577,587"
1,Star Wars: Episode VII - The Force Awakens,2015,Sci-Fi,PG-13,138,Walt Disney Studios Motion Pictures,"$245,000,000","$936,662,225","$1,131,561,399","$2,068,223,624"
0,Jurassic World,2015,Adventure,PG-13,124,Universal Pictures,"$150,000,000","$652,270,625","$1,018,130,012","$1,670,400,637"
188,The Lion King,2019,Animation,PG,118,Walt Disney Studios Motion Pictures,"$260,000,000","$543,638,043","$1,113,305,351","$1,656,943,394"
128,The Avengers,2012,Action,PG-13,143,Walt Disney Studios Motion Pictures,"$220,000,000","$623,357,910","$895,455,078","$1,518,812,988"
4,Furious 7,2015,Thriller,PG-13,137,Universal Pictures,"$190,000,000","$353,007,020","$1,162,040,651","$1,515,047,671"
190,Frozen II,2019,Family,PG,103,Walt Disney Studios Motion Pictures,"$150,000,000","$477,373,578","$972,653,355","$1,450,026,933"
2,Avengers: Age of Ultron,2015,Adventure,PG-13,141,Walt Disney Studios Motion Pictures,"$250,000,000","$459,005,868","$943,800,000","$1,402,805,868"
167,Star Wars: Episode VIII - The Last Jedi,2017,Sci-Fi,PG-13,152,Walt Disney Studios Motion Pictures,"$317,000,000","$620,181,382","$712,358,507","$1,332,539,889"


In [33]:
# Filter for R-rated movies
r_rated_movies = df_modified[df_modified['MPAA-Rating'] == 'R']

r_rated_movies.head(10)

Unnamed: 0,title_without_year,year,main_genre,MPAA-Rating,Runtime,Distributor,Budget_$,Domestic_$,International_$,total_revenue_$
195,Joker,2019,Thriller,R,122,Warner Bros.,"$55,000,000","$335,451,311","$738,800,000","$1,074,251,311"
163,Deadpool,2016,Adventure,R,108,Twentieth Century Fox,"$58,000,000","$363,070,709","$419,541,446","$782,612,155"
41,The Matrix Reloaded,2003,Action,R,138,Warner Bros.,"$150,000,000","$281,576,461","$457,835,574","$739,412,035"
181,Deadpool 2,2018,Comedy,R,119,Twentieth Century Fox,"$110,000,000","$318,491,426","$416,055,185","$734,546,611"
172,It,2017,Horror,R,135,Warner Bros.,"$35,000,000","$327,481,748","$372,900,000","$700,381,748"
51,The Passion of the Christ,2004,Drama,R,127,Newmarket Films,"$30,000,000","$370,274,604","$241,212,132","$611,486,736"
121,The Hangover Part II,2011,Comedy,R,102,Warner Bros.,"$80,000,000","$254,464,305","$332,300,000","$586,764,305"
2456,Fifty Shades of Grey,2015,Thriller,R,125,Universal Pictures,"$40,000,000","$166,167,230","$403,484,237","$569,651,467"
136,Ted,2012,Comedy,R,106,Universal Pictures,"$50,000,000","$218,815,487","$330,552,828","$549,368,315"
5,American Sniper,2014,War,R,133,Warner Bros.,"$58,800,000","$350,126,372","$197,300,000","$547,426,372"


In [34]:
# Filter for PG-13 rated movies
pg13_rated_movies = df_modified[df_modified['MPAA-Rating'] == 'PG-13']

pg13_rated_movies.head(10)

Unnamed: 0,title_without_year,year,main_genre,MPAA-Rating,Runtime,Distributor,Budget_$,Domestic_$,International_$,total_revenue_$
187,Avengers: Endgame,2019,Sci-Fi,PG-13,181,Walt Disney Studios Motion Pictures,"$356,000,000","$858,373,000","$1,939,128,328","$2,797,501,328"
103,Avatar,2009,Sci-Fi,PG-13,162,Twentieth Century Fox,"$237,000,000","$749,766,139","$1,993,811,448","$2,743,577,587"
1,Star Wars: Episode VII - The Force Awakens,2015,Sci-Fi,PG-13,138,Walt Disney Studios Motion Pictures,"$245,000,000","$936,662,225","$1,131,561,399","$2,068,223,624"
0,Jurassic World,2015,Adventure,PG-13,124,Universal Pictures,"$150,000,000","$652,270,625","$1,018,130,012","$1,670,400,637"
128,The Avengers,2012,Action,PG-13,143,Walt Disney Studios Motion Pictures,"$220,000,000","$623,357,910","$895,455,078","$1,518,812,988"
4,Furious 7,2015,Thriller,PG-13,137,Universal Pictures,"$190,000,000","$353,007,020","$1,162,040,651","$1,515,047,671"
2,Avengers: Age of Ultron,2015,Adventure,PG-13,141,Walt Disney Studios Motion Pictures,"$250,000,000","$459,005,868","$943,800,000","$1,402,805,868"
167,Star Wars: Episode VIII - The Last Jedi,2017,Sci-Fi,PG-13,152,Walt Disney Studios Motion Pictures,"$317,000,000","$620,181,382","$712,358,507","$1,332,539,889"
180,Jurassic World: Fallen Kingdom,2018,Adventure,PG-13,128,Universal Pictures,"$170,000,000","$417,719,760","$890,748,184","$1,308,467,944"
176,The Fate of the Furious,2017,Crime,PG-13,136,Universal Pictures,"$250,000,000","$226,008,385","$1,009,996,733","$1,236,005,118"


In [35]:
# Filter for PG rated movies
pg_rated_movies = df_modified[df_modified['MPAA-Rating'] == 'PG']

pg_rated_movies.head(10)

Unnamed: 0,title_without_year,year,main_genre,MPAA-Rating,Runtime,Distributor,Budget_$,Domestic_$,International_$,total_revenue_$
188,The Lion King,2019,Animation,PG,118,Walt Disney Studios Motion Pictures,"$260,000,000","$543,638,043","$1,113,305,351","$1,656,943,394"
190,Frozen II,2019,Family,PG,103,Walt Disney Studios Motion Pictures,"$150,000,000","$477,373,578","$972,653,355","$1,450,026,933"
143,Frozen,2013,Adventure,PG,102,Walt Disney Studios Motion Pictures,"$150,000,000","$400,738,009","$880,064,273","$1,280,802,282"
168,Beauty and the Beast,2017,Musical,PG,129,Walt Disney Studios Motion Pictures,"$160,000,000","$504,014,165","$759,506,961","$1,263,521,126"
6,Minions,2015,Animation,PG,91,Universal Pictures,"$74,000,000","$336,045,770","$823,352,627","$1,159,398,397"
194,Aladdin,2019,Adventure,PG,128,Walt Disney Studios Motion Pictures,"$183,000,000","$355,559,216","$695,134,737","$1,050,693,953"
174,Despicable Me 3,2017,Comedy,PG,89,Universal Pictures,"$80,000,000","$264,624,300","$770,175,109","$1,034,799,409"
110,Alice in Wonderland,2010,Adventure,PG,108,Walt Disney Studios Motion Pictures,"$200,000,000","$334,191,110","$691,276,000","$1,025,467,110"
19,Harry Potter and the Sorcerer's Stone,2001,Adventure,PG,152,Warner Bros.,"$125,000,000","$317,575,550","$657,179,821","$974,755,371"
140,Despicable Me 2,2013,Comedy,PG,98,Universal Pictures,"$76,000,000","$368,065,385","$602,700,620","$970,766,005"


In [36]:
# Filter for G rated movies
g_rated_movies = df_modified[df_modified['MPAA-Rating'] == 'G']

g_rated_movies.head(10)

Unnamed: 0,title_without_year,year,main_genre,MPAA-Rating,Runtime,Distributor,Budget_$,Domestic_$,International_$,total_revenue_$
189,Toy Story 4,2019,Animation,G,100,Walt Disney Studios Motion Pictures,"$200,000,000","$434,038,008","$639,356,585","$1,073,394,593"
86,Ratatouille,2007,Adventure,G,111,Walt Disney Studios Motion Pictures,"$150,000,000","$206,445,654","$417,280,431","$623,726,085"
124,Cars 2,2011,Comedy,G,106,Walt Disney Studios Motion Pictures,"$200,000,000","$191,452,396","$368,400,000","$559,852,396"
21,"Monsters, Inc.",2001,Family,G,92,Walt Disney Studios Motion Pictures,"$115,000,000","$255,873,250","$272,900,000","$528,773,250"
2306,Rio 2,2014,Adventure,G,101,Twentieth Century Fox,"$103,000,000","$131,538,435","$367,242,682","$498,781,117"
1847,Alvin and the Chipmunks: Chipwrecked,2011,Animation,G,87,Twentieth Century Fox,"$75,000,000","$133,110,742","$209,584,693","$342,695,435"
857,Chicken Little,2005,Family,G,81,Walt Disney Studios Motion Pictures,"$150,000,000","$135,386,665","$179,046,172","$314,432,837"
1372,High School Musical 3: Senior Year,2008,Musical,G,112,Walt Disney Studios Motion Pictures,"$11,000,000","$90,559,416","$162,349,761","$252,909,177"
2463,The Peanuts Movie,2015,Comedy,G,88,Twentieth Century Fox,"$99,000,000","$130,178,411","$116,054,702","$246,233,113"
1846,The Lion King,2011,Musical,G,88,Walt Disney Studios Motion Pictures,"$45,000,000","$94,242,001","$91,300,000","$185,542,001"


In [37]:
# Filter for movies released between 2000 and 2009
movies_2000_2009 = df_modified[(df_modified['year'] >= 2000) & (df_modified['year'] <= 2009)]

movies_2000_2009.head(10)

Unnamed: 0,title_without_year,year,main_genre,MPAA-Rating,Runtime,Distributor,Budget_$,Domestic_$,International_$,total_revenue_$
103,Avatar,2009,Sci-Fi,PG-13,162,Twentieth Century Fox,"$237,000,000","$749,766,139","$1,993,811,448","$2,743,577,587"
42,The Lord of the Rings: The Return of the King,2003,Adventure,PG-13,201,New Line Cinema,"$94,000,000","$377,027,325","$763,654,686","$1,140,682,011"
69,Pirates of the Caribbean: Dead Man's Chest,2006,Fantasy,PG-13,151,Walt Disney Studios Motion Pictures,"$225,000,000","$423,315,812","$642,863,913","$1,066,179,725"
89,The Dark Knight,2008,Crime,PG-13,152,Warner Bros.,"$185,000,000","$533,345,358","$469,700,000","$1,003,045,358"
19,Harry Potter and the Sorcerer's Stone,2001,Adventure,PG,152,Warner Bros.,"$125,000,000","$317,575,550","$657,179,821","$974,755,371"
82,Pirates of the Caribbean: At World's End,2007,Adventure,PG-13,169,Walt Disney Studios Motion Pictures,"$300,000,000","$309,420,425","$651,576,067","$960,996,492"
83,Harry Potter and the Order of the Phoenix,2007,Family,PG-13,138,Warner Bros.,"$150,000,000","$292,004,738","$649,672,105","$941,676,843"
34,The Lord of the Rings: The Two Towers,2002,Adventure,PG-13,179,New Line Cinema,"$94,000,000","$339,789,881","$596,899,854","$936,689,735"
100,Harry Potter and the Half-Blood Prince,2009,Fantasy,PG,153,Warner Bros.,"$250,000,000","$301,959,197","$632,000,000","$933,959,197"
49,Shrek 2,2004,Family,PG,93,DreamWorks Distribution,"$150,000,000","$441,226,247","$487,534,523","$928,760,770"


In [38]:
# Filter for movies released between 2010 and 2019
movies_2010_2019 = df_modified[(df_modified['year'] >= 2010) & (df_modified['year'] <= 2019)]

movies_2010_2019.head(10)

Unnamed: 0,title_without_year,year,main_genre,MPAA-Rating,Runtime,Distributor,Budget_$,Domestic_$,International_$,total_revenue_$
187,Avengers: Endgame,2019,Sci-Fi,PG-13,181,Walt Disney Studios Motion Pictures,"$356,000,000","$858,373,000","$1,939,128,328","$2,797,501,328"
1,Star Wars: Episode VII - The Force Awakens,2015,Sci-Fi,PG-13,138,Walt Disney Studios Motion Pictures,"$245,000,000","$936,662,225","$1,131,561,399","$2,068,223,624"
0,Jurassic World,2015,Adventure,PG-13,124,Universal Pictures,"$150,000,000","$652,270,625","$1,018,130,012","$1,670,400,637"
188,The Lion King,2019,Animation,PG,118,Walt Disney Studios Motion Pictures,"$260,000,000","$543,638,043","$1,113,305,351","$1,656,943,394"
128,The Avengers,2012,Action,PG-13,143,Walt Disney Studios Motion Pictures,"$220,000,000","$623,357,910","$895,455,078","$1,518,812,988"
4,Furious 7,2015,Thriller,PG-13,137,Universal Pictures,"$190,000,000","$353,007,020","$1,162,040,651","$1,515,047,671"
190,Frozen II,2019,Family,PG,103,Walt Disney Studios Motion Pictures,"$150,000,000","$477,373,578","$972,653,355","$1,450,026,933"
2,Avengers: Age of Ultron,2015,Adventure,PG-13,141,Walt Disney Studios Motion Pictures,"$250,000,000","$459,005,868","$943,800,000","$1,402,805,868"
167,Star Wars: Episode VIII - The Last Jedi,2017,Sci-Fi,PG-13,152,Walt Disney Studios Motion Pictures,"$317,000,000","$620,181,382","$712,358,507","$1,332,539,889"
180,Jurassic World: Fallen Kingdom,2018,Adventure,PG-13,128,Universal Pictures,"$170,000,000","$417,719,760","$890,748,184","$1,308,467,944"
