In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go


In [21]:
def make_graph(df,x,y,group):
    #plots x and y scatter plot, groups by certain category, adds hover box with title and data
    fig = px.scatter(df, y = y, x=x, color = group, symbol = group, hover_name="Title", hover_data = ["Year"])
    
    #adds titles and axis labels
    fig.update_layout(title='Budget and Worldwide Box Office Sales ($)', xaxis_title = str(x), yaxis_title = str(y))
    
    #adds line to visualize budget of each movie to see which other movies did better at the box office 
    fig.add_trace(go.Scatter(y=df["Worldwide"], x=df["sc_bud"] , name="Shang Chi Budget",line_shape='hv', marker = dict(color = 'gold')))
    
    fig.add_trace(go.Scatter(y=df["Worldwide"], x=df["bp_bud"] , name="Black Panther Budget",line_shape='hv', marker = dict(color = '#83b44b')))
    
    fig.add_trace(go.Scatter(y=df["Worldwide"], x=df["bh_bud"] , name="Big Hero Six Budget",line_shape='hv', marker = dict(color = '#03a9f4')))
    
    #adds stars to highlight Shang Chi, Black Panther, and Big Hero 6 points
    fig.add_trace(go.Scatter(x=shang_chi["Budget"], y=[432243292],name="Shang Chi", mode = 'markers',marker_symbol = 'star', marker = dict(color = '#FFA500', size = 15)))
    
    fig.add_trace(go.Scatter(x=black_panther["Budget"], y=[1346913161], name="Black Panther",mode = 'markers',marker_symbol = 'star', marker = dict(color='#378805', size = 15)))
    
    fig.add_trace(go.Scatter(x=big_hero["Budget"], y=[657827828],name="Big Hero Six", mode = 'markers',marker_symbol = 'star', marker = dict(color='blue', size = 15)))
    
    fig.show()
    
    

In [3]:
data = pd.read_csv("marvel_clean.csv")

In [4]:
#adds new columns that truncates release dates to just year and adds columns budget for each diverse movie to create line
data["Year"] = data["ReleaseDateUS"].str[0:4]
data["sc_bud"] = 150000000
data["bp_bud"] = 200000000
data["bh_bud"] = 165000000



In [5]:
#data for each movie with diverse cast
shang_chi = data[data["Title"] == "Shang-Chi and the Legend of the Ten Rings"]
black_panther = data[data["Title"] == "Black Panther"]
big_hero = data[data["Title"] == "Big Hero 6"]


In [22]:
make_graph(data,x = "Budget", y = "Worldwide", group = "Title")


In [7]:
data.shape

(64, 12)

In [8]:
#data for each movie with diverse cast
shang_chi = data[data["Title"] == "Shang-Chi and the Legend of the Ten Rings"]
black_panther = data[data["Title"] == "Black Panther"]
big_hero = data[data["Title"] == "Big Hero 6"]


In [9]:
shang_chi["Budget"]

60    150000000
Name: Budget, dtype: int64

In [10]:
#larger budget or equal to shang_chi
sc_budget_larger = data[data["Budget"] >= 150000000]

In [11]:
#worldwide profit
shang_chi["Worldwide"]

60    432243292
Name: Worldwide, dtype: int64

In [12]:
schw = sc_budget_larger[sc_budget_larger ["Worldwide"] >= 432243292] 

In [13]:
#shang chi only did more Worldwide sales than 6 movies of the same budget or more
sc_budget_larger.shape[0] - schw.shape[0]

6

In [14]:
#movies with a budget equal to or larger than shang chi that made more than or equal to worldwide
sc_budget_larger[sc_budget_larger ["Worldwide"] >= 432243292] 

Unnamed: 0,Title,Distributor,ReleaseDateUS,Budget,OpeningWeekendNorthAmerica,NorthAmerica,OtherTerritories,Worldwide,Year,sc_bud,bp_bud,bh_bud
9,Spider-Man 2,Sony Pictures,2004-06-30 00:00:00,200000000,88156227,373585825,415390628,788976453,2004,150000000,200000000,165000000
13,X-Men: The Last Stand,20th Century Fox,2006-05-26 00:00:00,210000000,102750665,234362462,224997093,459359555,2006,150000000,200000000,165000000
15,Spider-Man 3,Sony Pictures,2007-05-04 00:00:00,258000000,151116516,336530303,554341323,890871626,2007,150000000,200000000,165000000
21,Iron Man 2,Paramount Pictures,2010-05-07 00:00:00,200000000,128122480,312433331,311500000,623933331,2010,150000000,200000000,165000000
22,Thor,Paramount Pictures,2011-05-06 00:00:00,150000000,65723338,181030624,268295994,449326618,2011,150000000,200000000,165000000
26,The Avengers,Walt Disney Studios Motion Pictures,2012-05-04 00:00:00,220000000,207438708,623357910,895455078,1518812988,2012,150000000,200000000,165000000
27,The Amazing Spider-Man,Sony Pictures,2012-07-03 00:00:00,230000000,62004688,262030663,495900000,757930663,2012,150000000,200000000,165000000
28,Iron Man 3,Walt Disney Studios Motion Pictures,2013-05-03 00:00:00,200000000,174144585,409013994,805797258,1214811252,2013,150000000,200000000,165000000
30,Thor: The Dark World,Walt Disney Studios Motion Pictures,2013-11-08 00:00:00,170000000,85737841,206362140,438209262,644571402,2013,150000000,200000000,165000000
31,Captain America: The Winter Soldier,Walt Disney Studios Motion Pictures,2014-04-04 00:00:00,170000000,95023721,259766572,454497695,714264267,2014,150000000,200000000,165000000


In [15]:
#black panther data
black_panther = data[data["Title"] == "Black Panther"]

In [16]:
black_panther["Budget"]

48    200000000
Name: Budget, dtype: int64

In [17]:
bp_budget_larger = data[data["Budget"] >= 200000000]

In [18]:
bp_budget_larger

Unnamed: 0,Title,Distributor,ReleaseDateUS,Budget,OpeningWeekendNorthAmerica,NorthAmerica,OtherTerritories,Worldwide,Year,sc_bud,bp_bud,bh_bud
9,Spider-Man 2,Sony Pictures,2004-06-30 00:00:00,200000000,88156227,373585825,415390628,788976453,2004,150000000,200000000,165000000
13,X-Men: The Last Stand,20th Century Fox,2006-05-26 00:00:00,210000000,102750665,234362462,224997093,459359555,2006,150000000,200000000,165000000
15,Spider-Man 3,Sony Pictures,2007-05-04 00:00:00,258000000,151116516,336530303,554341323,890871626,2007,150000000,200000000,165000000
21,Iron Man 2,Paramount Pictures,2010-05-07 00:00:00,200000000,128122480,312433331,311500000,623933331,2010,150000000,200000000,165000000
26,The Avengers,Walt Disney Studios Motion Pictures,2012-05-04 00:00:00,220000000,207438708,623357910,895455078,1518812988,2012,150000000,200000000,165000000
27,The Amazing Spider-Man,Sony Pictures,2012-07-03 00:00:00,230000000,62004688,262030663,495900000,757930663,2012,150000000,200000000,165000000
28,Iron Man 3,Walt Disney Studios Motion Pictures,2013-05-03 00:00:00,200000000,174144585,409013994,805797258,1214811252,2013,150000000,200000000,165000000
32,The Amazing Spider-Man 2,Sony Pictures,2014-05-02 00:00:00,255000000,91608337,202853933,506128390,708982323,2014,150000000,200000000,165000000
33,X-Men: Days of Future Past,20th Century Fox,2014-05-23 00:00:00,200000000,90823660,233921534,513941241,747862775,2014,150000000,200000000,165000000
36,Avengers: Age of Ultron,Walt Disney Studios Motion Pictures,2015-05-01 00:00:00,250000000,191271109,459005868,946397826,1405403694,2015,150000000,200000000,165000000


In [19]:
black_panther["Worldwide"]

48    1346913161
Name: Worldwide, dtype: int64

In [20]:
#black panther did better than 13/19 movies of budgets the same or more
print(bp_budget_larger.shape[0] - bpw.shape[0])
print(bp_budget_larger.shape[0])


NameError: name 'bpw' is not defined

In [None]:
bpw = bp_budget_larger[sc_budget_larger ["Worldwide"] >= 1346913161] 

In [None]:
bp_budget_larger[sc_budget_larger ["Worldwide"] >= 1346913161] 

All of the movies above have a budget that is $20million and more above Black Panther's budget. All of the other movies are also apart of larger series, while Black Panther was a one off movie.  

In [None]:
#big hero 6
big_hero = data[data["Title"] == "Big Hero 6"]

In [None]:
big_hero["Budget"]

In [None]:
bh_budget_larger = data[data["Budget"] >=  165000000]

In [None]:
big_hero["Worldwide"]

In [None]:
bh_budget_larger[sc_budget_larger ["Worldwide"] >= 657827828] 