# Importing libraries and files

In [None]:
import pandas as pd
import numpy as np 
import plotly.graph_objects as go

In [None]:
!ls

# Reading in the file

In [None]:
movie_data_df = pd.read_csv('interim_data.csv')

In [None]:
movie_data_df.info()

# Cleanup

In [None]:
movie_data_df.columns

In [None]:
movie_data_df.head(2)

In [None]:
movie_data_df.columns[:10]

## Obtaining Clean Years

In [None]:
movie_data_df = movie_data_df.drop(labels = 'Unnamed: 0', axis = 1)

In [None]:
clean_list = ['Year']

In [None]:
type(movie_data_df['Year'][1]) == int

In [None]:
movie_data_df['Year'].unique()

In [None]:
for column in list(movie_data_df.columns):
    if column in clean_list:
        movie_data_df[column] = movie_data_df[column].map(lambda row:  row if type(row) == int else int(row.replace('–','').replace(' ','')))
        
        

In [None]:
movie_data_df.groupby('Year').size()

In [None]:
idx = movie_data_df['Year'] >= 2015

In [None]:
movie_data_df = movie_data_df[idx]

In [None]:
movie_data_df['Year'].unique()

In [None]:
idx = movie_data_df['Year'] == 20112016 

In [None]:
movie_data_df = movie_data_df[~idx]

In [None]:
idx = movie_data_df['Year'] == 20042011 

In [None]:
movie_data_df = movie_data_df[~idx]

In [None]:
idx = movie_data_df['Year'] == 20102015 

In [None]:
movie_data_df = movie_data_df[~idx]

In [None]:
movie_data_df['Year'].unique()

In [None]:
movie_data_df.groupby('Year').size()

## Obtaining Clean Runtimes

In [None]:
int(movie_data_df['Runtime'][0].replace('min','').replace(' ',''))

In [None]:
clean_list = ['Runtime']

In [None]:
empty_runtimes = movie_data_df['Runtime'].isna()

In [None]:
empty_runtimes.sum()

In [None]:
movie_data_df['Runtime'][empty_runtimes]

In [None]:
for column in list(movie_data_df.columns):
    if column in clean_list:
        empty_runtimes = movie_data_df[column].isna()
        movie_data_df[column][empty_runtimes] = str('0')
        movie_data_df[column] = movie_data_df[column].map(lambda row: str(row))
        movie_data_df[column] = movie_data_df[column].map(lambda row: int(row.replace('min','').replace(' ','')))
        

In [None]:
movie_data_df['Runtime'].head(2)

## Obtaining Clean Metascores

In [None]:
empty_metascores = movie_data_df['Metascore'].isna()

In [None]:
empty_metascores.sum()

In [None]:
clean_list = ['Metascore']

In [None]:
for column in list(movie_data_df.columns):
    if column in clean_list:
        empty_runtimes = movie_data_df[column].isna()
        movie_data_df[column][empty_runtimes] = 0
        

In [None]:
empty_metascores = movie_data_df['Metascore'].isna()
empty_metascores.sum()

# Plotting Movie Count by Year

In [None]:
movies_with_years = movie_data_df[['Title', 'Year']]

In [None]:
movies_with_years.head(2)

In [None]:
movies_with_years_count = movies_with_years.groupby('Year').count().reset_index()

In [None]:
movies_with_years_count

In [None]:
x = list(movies_with_years_count['Year'])

In [None]:
y = list(movies_with_years_count['Title'])

In [None]:
import plotly.graph_objects as go

total_movies_plot = go.Figure([go.Bar(x=x, y=y, marker_color='#bfa878')])
total_movies_plot.update_layout(
    template="ggplot2",
    font=dict(
        family="Calibri",
        size=24,
        color='#7a4b4b'
    ))

total_movies_plot.show()

# Plotting Total Worldwide Revenue

In [None]:
movies_with_worldwide_revenue = movie_data_df[['Year', 'Worldwide']]

In [None]:
movies_with_worldwide_revenue_sum = movies_with_worldwide_revenue.groupby('Year').sum().reset_index()

In [None]:
movies_with_worldwide_revenue_sum

In [None]:
x_sum = list(movies_with_worldwide_revenue_sum['Year'])

In [None]:
y_sum = list(movies_with_worldwide_revenue_sum['Worldwide'])

In [None]:

worldwide_revenue_plot = go.Figure([go.Bar(x=x_sum, y=y_sum)])
worldwide_revenue_plot.show()

# Plotting the same with Domestic and Foreign Split

In [None]:
movies_with_Domestic_and_Foreign_revenue = movie_data_df[['Year', 'Domestic', 'Foreign']]

In [None]:
movies_with_Domestic_and_Foreign_revenue_sum = movies_with_Domestic_and_Foreign_revenue.groupby('Year').sum().reset_index()

In [None]:
movies_with_Domestic_and_Foreign_revenue_sum

In [None]:
y_sum_Domestic = list(movies_with_Domestic_and_Foreign_revenue_sum['Domestic'])
y_sum_Foreign = list(movies_with_Domestic_and_Foreign_revenue_sum['Foreign'])

In [None]:
stacked_worldwide_revenue_plot = go.Figure(data=[
    go.Bar(name='Domestic', x=x_sum, y=y_sum_Domestic, marker_color='#bfa878'),
    go.Bar(name='Foreign', x=x_sum, y=y_sum_Foreign, marker_color='#d9cdb4')
])

stacked_worldwide_revenue_plot.update_layout(
    yaxis_title="Revenue (USD)",
    template="ggplot2",
    showlegend = False,
    font=dict(
        family="Calibri",
        size=24,
        color='#7a4b4b'
    ))

# Change the bar mode
stacked_worldwide_revenue_plot.update_layout(barmode='stack')
stacked_worldwide_revenue_plot.show()

# Plotting Averages

In [None]:
movies_with_worldwide_revenue_and_title  = pd.concat([movies_with_years_count, movies_with_worldwide_revenue_sum['Worldwide']], axis = 1)

In [None]:
movies_with_worldwide_revenue_and_title

In [None]:
movies_with_worldwide_revenue_and_title['Average Revenue'] = movies_with_worldwide_revenue_and_title['Worldwide']/movies_with_worldwide_revenue_and_title['Title']

In [None]:
movies_with_worldwide_revenue_and_title

In [None]:
idx_2015 = movie_data_df['Year'] == 2015
idx_2016 = movie_data_df['Year'] == 2016
idx_2017 = movie_data_df['Year'] == 2017
idx_2018 = movie_data_df['Year'] == 2018
idx_2019 = movie_data_df['Year'] == 2019

In [None]:
movie_data_2015 = movie_data_df[idx_2015]
movie_data_2016 = movie_data_df[idx_2016]
movie_data_2017 = movie_data_df[idx_2017]
movie_data_2018 = movie_data_df[idx_2018]
movie_data_2019 = movie_data_df[idx_2019]

In [None]:
movie_data_2015_revenue = movie_data_2015.filter(['Title','Worldwide','Domestic', 'Foreign'])
movie_data_2016_revenue = movie_data_2016.filter(['Title','Worldwide','Domestic', 'Foreign'])
movie_data_2017_revenue = movie_data_2017.filter(['Title','Worldwide','Domestic', 'Foreign'])
movie_data_2018_revenue = movie_data_2018.filter(['Title','Worldwide','Domestic', 'Foreign'])
movie_data_2019_revenue = movie_data_2019.filter(['Title','Worldwide','Domestic', 'Foreign'])

In [None]:
movie_data_2015_revenue

In [None]:
movie_data_2015_revenue['Worldwide'].quantile(0.25)

In [None]:
movie_data_2015_revenue['Worldwide'].median()

In [None]:
movie_data_2015_revenue['Worldwide'].std()

In [None]:
trace_2015_worldwide = list(movie_data_2015_revenue['Worldwide'])
trace_2016_worldwide = list(movie_data_2016_revenue['Worldwide'])
trace_2017_worldwide = list(movie_data_2017_revenue['Worldwide'])
trace_2018_worldwide = list(movie_data_2018_revenue['Worldwide'])
trace_2019_worldwide = list(movie_data_2019_revenue['Worldwide'])

In [None]:
rev_plot_worldwide = go.Figure()
rev_plot_worldwide.add_trace(go.Box(x=trace_2015_worldwide, name = '2015'))
rev_plot_worldwide.add_trace(go.Box(x=trace_2016_worldwide, name = '2016'))
rev_plot_worldwide.add_trace(go.Box(x=trace_2017_worldwide, name = '2017'))
rev_plot_worldwide.add_trace(go.Box(x=trace_2018_worldwide, name = '2018'))
rev_plot_worldwide.add_trace(go.Box(x=trace_2019_worldwide, name = '2019'))
rev_plot_worldwide.update_layout(
    xaxis_title="Revenue (USD)",
    template="ggplot2",
    showlegend = False,
    font=dict(
        family="Calibri",
        size=24,
        color='#7a4b4b'
    ))

rev_plot_worldwide.show()

In [None]:
def calculate_stats(dataframe, column = 'Worldwide'):
    mean = dataframe[column].mean()
    std = dataframe[column].std()
    std_x_1_upper = mean + std
    std_x_1_lower = mean - std
    std_x_2_upper = mean + 2*std
    std_x_2_lower = mean - 2*std
    return {'mean': mean, 'std_x_1_lower': std_x_1_lower, 'std_x_1_upper': std_x_1_upper,'std_x_2_upper': std_x_2_upper, 'std_x_2_lower':std_x_2_lower }

In [None]:
movie_data_2015_revenue['Worldwide'].mean()

In [None]:
year_dataframe_list = [movie_data_2015_revenue, movie_data_2016_revenue, movie_data_2017_revenue, movie_data_2018_revenue, movie_data_2019_revenue]

In [None]:
Worldwide_line_plot_values = []
for dataframe in year_dataframe_list:
    Worldwide_line_plot_values.append(calculate_stats(dataframe))

In [None]:
Worldwide_line_plot_values

In [None]:
mean_values_worldwide = [x['mean'] for x in Worldwide_line_plot_values ]
std_x_1_lower_values_worldwide = [x['std_x_1_lower'] for x in Worldwide_line_plot_values ]
std_x_1_upper_values_worldwide = [x['std_x_1_upper'] for x in Worldwide_line_plot_values ]
std_x_2_upper_values_worldwide = [x['std_x_2_upper'] for x in Worldwide_line_plot_values ]
std_x_2_lower_values_worldwide = [x['std_x_2_upper'] for x in Worldwide_line_plot_values ]

In [None]:
years = ['2015', '2016', '2017', '2018', '2019']

In [None]:
mean_values_worldwide

In [None]:
from plotly.subplots import make_subplots

In [None]:

revenue_bar_with_lines = make_subplots(specs=[[{"secondary_y": True}]])

revenue_bar_with_lines.add_trace(go.Scatter(x=x, y=mean_values_worldwide, name='Mean', line = dict(color='#7a4b4b', width=4), marker = dict(color='#7a4b4b', size=10)), secondary_y=True)
revenue_bar_with_lines.add_trace(go.Scatter(x=x, y=std_x_1_upper_values_worldwide, name='1xSTD up', line = dict(color='#7a4b4b', width=4, dash='dash'), marker = dict(color='#7a4b4b', size=10)), secondary_y=True)
revenue_bar_with_lines.add_trace(go.Scatter(x=x, y=std_x_1_lower_values_worldwide, name='1xSTD down', line = dict(color='#7a4b4b', width=4, dash='dash'), marker = dict(color='#7a4b4b', size=10)), secondary_y=True)
# revenue_bar_with_lines.add_trace(go.Scatter(x=x, y=std_x_2_lower_values_worldwide, mode='lines', name='2xSTD up'), secondary_y=True)
# revenue_bar_with_lines.add_trace(go.Scatter(x=x, y=std_x_2_lower_values_worldwide, mode='lines', name='2xSTD down'), secondary_y=True)

# add_trace(go.Scatter(x=top_actors, y=top_avg), secondary_y=True)

revenue_bar_with_lines.add_trace(go.Bar(name='Domestic', x=x_sum, y=y_sum_Domestic, marker_color='#bfa878'))
revenue_bar_with_lines.add_trace(go.Bar(name='Foreign', x=x_sum, y=y_sum_Foreign, marker_color='#d9cdb4'))

revenue_bar_with_lines.update_layout(
    yaxis_title="Revenue (USD)",
    template="ggplot2",
    font=dict(
        family="Calibri",
        size=24,
        color='#7a4b4b'
    ))
revenue_bar_with_lines.update_yaxes(title_text="Avg Revenue (USD)", secondary_y=True)

revenue_bar_with_lines.update_layout(barmode='stack', showlegend = False)


revenue_bar_with_lines.show()

# Plot by Runtime

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import statistics as stat

In [None]:
movies_with_Title_Worldwide_and_Runtime = movie_data_df[['Title', 'Worldwide', 'Runtime']]

In [None]:
movies_with_Title_Worldwide_and_Runtime.head()

In [None]:
movies_with_Title_Worldwide_and_Runtime.sort_values(by = 'Runtime', inplace = True)

In [None]:
zero_runtime_indices = movies_with_Title_Worldwide_and_Runtime['Runtime'] == 0

In [None]:
movies_with_Title_Worldwide_and_Runtime = movies_with_Title_Worldwide_and_Runtime[~zero_runtime_indices]

In [None]:
movies_with_Title_Worldwide_and_Runtime.reset_index(inplace = True)

In [None]:
movies_with_Title_Worldwide_and_Runtime.drop(labels = ['index'], axis = 1, inplace = True)

In [None]:
movies_with_Title_Worldwide_and_Runtime

In [None]:
len(movies_with_Title_Worldwide_and_Runtime)/5

In [None]:
Runtime_bin_1 = movies_with_Title_Worldwide_and_Runtime[0:222]
Runtime_bin_2 = movies_with_Title_Worldwide_and_Runtime[222:444]
Runtime_bin_3 = movies_with_Title_Worldwide_and_Runtime[444:666]
Runtime_bin_4 = movies_with_Title_Worldwide_and_Runtime[666:888]
Runtime_bin_5 = movies_with_Title_Worldwide_and_Runtime[888:1110]

In [None]:
Runtime_list = [Runtime_bin_1, Runtime_bin_2, Runtime_bin_3, Runtime_bin_4, Runtime_bin_5]

In [None]:
movies_with_Title_Worldwide_and_Runtime['Runtime'].iloc[222]

In [None]:
x_runtime = []
for i in range(1,5):
    x_runtime.append(movies_with_Title_Worldwide_and_Runtime['Runtime'].iloc[(i) * 222])

In [None]:
x_runtime

In [None]:
x_runtime = ['< 95', '< 104', '< 114', '< 125', '>= 125']

In [None]:
Runtime_bin_1['Worldwide'].sum()

In [None]:
y_runtime = []

In [None]:
for runtime_df in Runtime_list:
    y_runtime.append(runtime_df['Worldwide'].sum())

In [None]:
y_runtime

In [None]:
plt.hist(x = list(movie_data_df['Runtime']), bins = 5)

In [None]:
runtime_plot = go.Figure([go.Bar(x=x_runtime, y=y_runtime, marker_color='#bfa878')])
runtime_plot.update_layout(
    template="ggplot2",
    xaxis_title = 'minutes',
    yaxis_title = 'Worldwide Revenue (USD)',
    font=dict(
        family="Calibri",
        size=24,
        color='#7a4b4b'
    ))


runtime_plot.show()

# Plot by Metascore

In [None]:
movies_with_Title_Worldwide_and_Metascore = movie_data_df[['Title', 'Worldwide', 'Metascore']]

In [None]:
movies_with_Title_Worldwide_and_Metascore.sort_values(by = 'Metascore', inplace = True)

In [None]:
zero_metascore_indices = movies_with_Title_Worldwide_and_Metascore['Metascore'] == 0

In [None]:
movies_with_Title_Worldwide_and_Metascore = movies_with_Title_Worldwide_and_Metascore[~zero_metascore_indices]

In [None]:
movies_with_Title_Worldwide_and_Metascore.reset_index(inplace = True)

In [None]:
movies_with_Title_Worldwide_and_Metascore.drop(labels = ['index'], axis = 1, inplace = True)

In [None]:
movies_with_Title_Worldwide_and_Metascore

In [None]:
len(movies_with_Title_Worldwide_and_Metascore)

In [None]:
Metascore_bin_1 = movies_with_Title_Worldwide_and_Metascore[0:85]
Metascore_bin_2 = movies_with_Title_Worldwide_and_Metascore[85:170]
Metascore_bin_3 = movies_with_Title_Worldwide_and_Metascore[170:255]
Metascore_bin_4 = movies_with_Title_Worldwide_and_Metascore[255:340]
Metascore_bin_5 = movies_with_Title_Worldwide_and_Metascore[340:425]
Metascore_bin_6 = movies_with_Title_Worldwide_and_Metascore[425:510]
Metascore_bin_7 = movies_with_Title_Worldwide_and_Metascore[510:595]
Metascore_bin_8 = movies_with_Title_Worldwide_and_Metascore[595:680]
Metascore_bin_9 = movies_with_Title_Worldwide_and_Metascore[680:765]
Metascore_bin_10 = movies_with_Title_Worldwide_and_Metascore[765:]

In [None]:
Metascore_list = [Metascore_bin_1, Metascore_bin_2, Metascore_bin_3, Metascore_bin_4, Metascore_bin_5,Metascore_bin_6, Metascore_bin_7, Metascore_bin_8, Metascore_bin_9, Metascore_bin_10 ]

In [None]:
movies_with_Title_Worldwide_and_Metascore['Metascore'].iloc[171]

In [None]:
x_metascore = []
for i in range(1,10):
    x_metascore.append(movies_with_Title_Worldwide_and_Metascore['Metascore'].iloc[(i) * 85])

In [None]:
x_metascore

In [None]:
x_metascore = ['< 35', '< 42', '< 48', '< 53', '< 58', '< 63', '< 68', '< 74', '< 81', '>= 81']

In [None]:
y_metascore = []

In [None]:
for metascore_df in Metascore_list:
    y_metascore.append(metascore_df['Worldwide'].sum())

In [None]:
y_metascore

In [None]:
metascore_plot = go.Figure([go.Bar(x=x_metascore, y=y_metascore, marker_color='#bfa878')])
metascore_plot.update_layout(
    template="ggplot2",
    xaxis_title = 'Metascore',
    yaxis_title = 'Worldwide Revenue (USD)',
    font=dict(
        family="Calibri",
        size=24,
        color='#7a4b4b'
    ))


metascore_plot.show()

# Bar Chart by Genre

In [None]:
genre_action = movie_data_df[ movie_data_df['Genre_Action'] == 1]

In [None]:
genre_action[['Year', 'Worldwide']].groupby(['Year']).agg('sum')

In [None]:
sum(genre_action[['Year', 'Worldwide']].groupby(['Year']).agg('sum')['Worldwide'])

Now that we know how to do it for one genre, build a function that we can call for each movie genre. It returns total and count, so that the client can calculate average.

In [None]:
def group_by_genre(genre):
    genre_cat = movie_data_df[ movie_data_df[genre] == 1]
    total = sum(genre_cat[['Year', 'Worldwide']].groupby(['Year']).agg('sum')['Worldwide'])
    count = sum(genre_cat[['Year', 'Worldwide']].groupby(['Year']).size()) 
    return total, count
    

Test it for Action genre and make sure we get the same answer.

In [None]:
group_by_genre("Genre_Action")

Now build a loop that for each Genre adds total, average, and gebnre name to separate lists.

In [None]:
total_list = []
avg_list = []
genre_list = []

In [None]:
for col in movie_data_df.columns:
    if "Genre_" in col:
        total, count = group_by_genre(col)
        if total > 0:
           print(total, count)
           total_list.append(total)
           avg_list.append(int(total/count))
           genre_list.append(col[6:])

print(total_list)
print(avg_list)
print(genre_list)

Now determine the top x. This starts by creating a sorted list in descending order (large to small). top is the number of elements to show. 

In [None]:
top = 10
total_sorted = sorted(total_list, reverse=True)
total_sorted

Create empty top lists for total, average, and genre.

In [None]:
top_total = []
top_genres = []
top_avg = []

The loop below selects each top x totals. Determines the index for the total in the original lists, and then uses that to append to the top_xxx lists.

In [None]:
for i in range(top):
    for j, total in enumerate(total_list):
#        print(i, j)
        category = genre_list[j]
        if total_sorted[i] == total and category not in top_genres: 
           top_total.append(total)
           top_avg.append(avg_list[j])
           top_genres.append(genre_list[j])
            
           print(total, avg_list[j], genre_list[j])
           break

In [None]:
top_total

In [None]:
top_avg

In [None]:
top_genres

In [None]:
#plt.bar(top_genres, top_total)

plot_top_genres_total = go.Figure()
plot_top_genres_total = go.Figure([go.Bar(x=top_genres, y=top_total)])
plot_top_genres_total.show()

In [None]:
#plt.plot(top_genres, top_avg)

plot_top_genres_avg = go.Figure()
plot_top_genres_avg = go.Figure([go.Scatter(x=top_genres, y=top_avg)])
plot_top_genres_avg.show()

In [None]:
# Combining the above two plots
plot_top_genres_total_and_avg = make_subplots(specs=[[{"secondary_y": True}]])

plot_top_genres_total_and_avg.add_trace(go.Bar(x=top_genres, y=top_total, name = 'Total Revenue', marker_color='#bfa878'))
plot_top_genres_total_and_avg.add_trace(go.Scatter(x=top_genres, y=top_avg, name = 'Avg Revenue', marker_color='#7a4b4b'), secondary_y=True)
plot_top_genres_total_and_avg.update_layout(
    yaxis_title="Revenue (USD)", showlegend = False,
    template="ggplot2",
    font=dict(
        family="Calibri",
        size=24,
        color='#7a4b4b'
    ))
plot_top_genres_total_and_avg.update_yaxes(title_text="Avg Revenue (USD)", secondary_y=True)


plot_top_genres_total_and_avg.show()

## Chart Top Actors

Similar to genre categories, now determine the top x actors.

In [None]:
count = 0
for col in movie_data_df.columns:
    if "Actor_" in col:
        actor = movie_data_df[ movie_data_df[col] == 1]
        if len(actor) > 2 and count<100:
           print(col, len(actor))
        count += 1
        

We will try to work with one actor first to ensure we get the logic to work properly. We will use Actor_Tom_Holland.

In [None]:
actor = movie_data_df[ movie_data_df['Actor_Tom_Holland'] == 1]

In [None]:
actor

In [None]:
actor[['Year', 'Worldwide']].groupby(['Year']).agg('sum')

In [None]:
sum(actor[['Year', 'Worldwide']].groupby(['Year']).agg('sum')['Worldwide'])

In [None]:
def group_by_actor(actor):
    actor_df = movie_data_df[ movie_data_df[actor] == 1]
    total = sum(actor_df[['Year', 'Worldwide']].groupby(['Year']).agg('sum')['Worldwide'])
    count = sum(actor_df[['Year', 'Worldwide']].groupby(['Year']).size()) 
    return total, count

In [None]:
group_by_actor('Actor_Tom_Holland')

In [None]:
total_list = []
avg_list = []
actor_list = []

In [None]:
count = 0
for col in movie_data_df.columns:
    if "Actor_" in col:
        total, count = group_by_actor(col)
        if total > 0:
           if count % 50 == 0:
              print(total, count)
           total_list.append(total)
           avg_list.append(int(total/count))
           actor_list.append(col[6:])
           count += 1

print(total_list)
print(avg_list)
print(actor_list)

In [None]:
print(len(total_list), len(avg_list), len(actor_list))

In [None]:
count = 0
for col in movie_data_df.columns:
    if "Actor_" in col:
       count += 1
print(count)

In [None]:
top = 10
total_sorted = sorted(total_list, reverse=True)
total_sorted

In [None]:
top_total_actors = []
top_actors = []
top_avg_actors = []

In [None]:
top

In [None]:
for i in range(top):
    for j, total in enumerate(total_list):
        #print(i, j)
        actor = actor_list[j]
        if actor not in top_actors and total_sorted[i] == total: 
            top_total_actors.append(total)
            top_avg_actors.append(avg_list[j])
            top_actors.append(actor_list[j])
            print(total, avg_list[j], actor_list[j])
            

In [None]:
top_total_actors[0] > top_avg_actors[0]

In [None]:
print(top_actors, "\n", top_total_actors)

In [None]:
movie_data_df[movie_data_df['Actor_Robert_Downey_Jr.'] == 1]['Title']

In [None]:
movie_data_df[movie_data_df['Actor_Chris_Hemsworth'] == 1]['Title']

In [None]:
top_actors = list(map(lambda actor: actor.replace('_', ' '), top_actors))

In [None]:
# plt.bar(top_actors, top_total)

plot_top_actors_total = go.Figure()
plot_top_actors_total = go.Figure([go.Bar(x=top_actors, y=top_total_actors)])
plot_top_actors_total.show()

In [None]:
# plt.plot(top_actors, top_avg)

plot_top_actors_avg = go.Figure()
plot_top_actors_avg = go.Figure([go.Scatter(x=top_actors, y=top_avg)])
plot_top_actors_avg.show()

In [None]:
# Combining the above two plots
plot_top_actors_total_and_avg = make_subplots(specs=[[{"secondary_y": True}]])

plot_top_actors_total_and_avg.add_trace(go.Bar(x=top_actors, y=top_total, marker_color='#bfa878'))
plot_top_actors_total_and_avg.add_trace(go.Scatter(x=top_actors, y=top_avg, marker_color='#7a4b4b'), secondary_y=True)
plot_top_actors_total_and_avg.update_layout(
    yaxis_title="Revenue (USD)", showlegend = False,
    template="ggplot2",
    font=dict(
        family="Calibri",
        size=24,
        color='#7a4b4b'
    ))
plot_top_actors_total_and_avg.update_yaxes(title_text="Avg Revenue (USD)", secondary_y=True)
plot_top_actors_total_and_avg.show()

Done!