In [43]:
##Create a Bump chart to rank number of subscribers for different streaming services

import pandas as pd
import plotly
import plotly.express as px

In [51]:
## Load dataset from subscriber_counts file
## Data comes from webscraping of Business of Apps website, was combined by hand from various webscraping results
df_subscriber_counts = pd.read_excel("subscriber_counts.xlsx", sheet_name="Sheet1")


##check top rows of dataframe
df_subscriber_counts.head()

Unnamed: 0,Year,Streaming Service,Revenue ($bn),Number Subscribers (mm)
0,2011,Netflix,3.1,21.5
1,2012,Netflix,3.5,25.71
2,2013,Netflix,4.5,35.63
3,2014,Netflix,5.4,47.99
4,2015,Netflix,6.7,62.71


In [53]:
## Add rank values to dataset
## To plot the bump chart each Service needs a rank for every year
## Rank will be based off Number of Subscribers

#ensure number of subscribers is treanted as float
df_subscriber_counts["Number Subscribers (mm)"] = df_subscriber_counts["Number Subscribers (mm)"].astype(float)

#add rank column and rank services by number of subscribers
df_subscriber_counts["Rank"] = df_subscriber_counts.groupby("Year")["Number Subscribers (mm)"].rank(ascending=False, method="dense").astype(int)

#Show top rows of dataframe
df_subscriber_counts

Unnamed: 0,Year,Streaming Service,Revenue ($bn),Number Subscribers (mm),Rank
0,2011,Netflix,3.1,21.5,1
1,2012,Netflix,3.5,25.71,1
2,2013,Netflix,4.5,35.63,1
3,2014,Netflix,5.4,47.99,1
4,2015,Netflix,6.7,62.71,1
5,2016,Netflix,8.8,79.9,1
6,2017,Netflix,11.6,99.04,1
7,2018,Netflix,15.7,124.35,1
8,2019,Netflix,20.1,151.56,1
9,2020,Netflix,24.9,192.95,1


In [54]:
#sort value by year and ranking
df_subscriber_counts = df_subscriber_counts.sort_values(by=["Year", "Rank"])

#Convert Year into string for plotting
df_subscriber_counts["Year"] = df_subscriber_counts["Year"].astype(str)

df_subscriber_counts.head()

Unnamed: 0,Year,Streaming Service,Revenue ($bn),Number Subscribers (mm),Rank
0,2011,Netflix,3.1,21.5,1
14,2011,Hulu,0.4,1.0,2
1,2012,Netflix,3.5,25.71,1
15,2012,Hulu,0.6,2.0,2
2,2013,Netflix,4.5,35.63,1


In [55]:
## Create custom color map for plotting
## Colors are pulled from Streaming Service logos using color picker

color_dict = {
    "Netflix": '#d70c1b',
    "Hulu": '#57e880',
    "Max": '#2f16e1',
    "Disney+": '#50b9ca',
    "Amazon Prime": '#48a8e2',
    "Tubi": '#fbff00'
}

In [56]:
##Create bump plot using data above
fig = px.line(df_subscriber_counts, x="Year", y="Rank",
              color="Streaming Service",
              color_discrete_map=color_dict,
              markers=True,
              hover_name="Streaming Service",
              hover_data=["Revenue ($bn)", "Number Subscribers (mm)"]
              )
fig.update_yaxes(autorange='reversed', title="Rank", visible=True, showticklabels=True)
fig.update_xaxes(title="Year", visible=True, showticklabels=True)
fig.update_layout(xaxis=dict(showgrid=False),
                  yaxis=dict(showgrid=False))

#Show plot to check how it looks so far
fig.show()

In [57]:
## Add title to Figure
fig.update_layout(title="Ranking Streaming Service by Number of Subscribers")

In [38]:
## Set dark background color
fig.update_layout(plot_bgcolor='black',
                  paper_bgcolor='lightgrey',
                  title_font=dict(size=20,
                                  color='black',
                                  family='Arial'))

In [39]:
## New color dict using pruple for Tubi
color_dict_2 = {
    "Netflix": '#d70c1b',
    "Hulu": '#57e880',
    "Max": '#2f16e1',
    "Disney+": '#50b9ca',
    "Amazon Prime": '#48a8e2',
    "Tubi": '#6800c2'
}

##Recreate bump chart using color_dict_2
fig_2 = px.line(df_subscriber_counts, x="Year", y="Rank",
              color="Streaming Service",
              color_discrete_map=color_dict_2,
              markers=True,
              hover_name="Streaming Service",
              hover_data=["Revenue ($bn)", "Number Subscribers (mm)"]
              )
fig_2.update_yaxes(autorange='reversed', title="Rank", visible=True, showticklabels=True)
fig_2.update_xaxes(title="Year", visible=True, showticklabels=True)
fig_2.update_layout(xaxis=dict(showgrid=False),
                  yaxis=dict(showgrid=False))

fig_2.update_layout(title="Ranking Streaming Service by Number of Subscribers")

fig_2.show()