In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
import datetime
import numpy as np
import itertools
import networkx as nx

Load data from CSV

In [4]:
# Color palette
theme = {
    "red": 'rgb(219,0,0)',
    "black": 'rgb(0,0,0)',
    "white": 'rgb(255,255,255)',
    "gray": 'rgb(86,77,77)',
    "dark_gray": 'rgb(30,30,30)',
    "light_gray": "rgb(150,150,150)",
    "light_red": 'rgb(131,16,16)',
}

In [5]:
dataframe = pd.read_csv('data.csv')
dataframe

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...
...,...,...,...,...,...,...,...,...,...,...,...,...
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...","October 19, 2020",2005,TV-MA,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,"March 2, 2019",2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
7784,s7785,Movie,Zulu Man in Japan,,Nasty C,,"September 25, 2020",2019,TV-MA,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."
7785,s7786,TV Show,Zumbo's Just Desserts,,"Adriano Zumbo, Rachel Khoo",Australia,"October 31, 2020",2019,TV-PG,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...


### 1. Number of titles produced by each country added yearly on Netflix

First we remove productions without any country specified.

In [6]:
have_country = dataframe[pd.notnull(dataframe['country'])]
have_country

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...
...,...,...,...,...,...,...,...,...,...,...,...,...
7781,s7782,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...","October 19, 2020",2005,TV-MA,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,"March 2, 2019",2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
7785,s7786,TV Show,Zumbo's Just Desserts,,"Adriano Zumbo, Rachel Khoo",Australia,"October 31, 2020",2019,TV-PG,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...


In [7]:
nb_total_productions = dataframe.shape[0]
nb_have_country = have_country.shape[0]
nb_without_country = nb_total_productions - nb_have_country
print(nb_total_productions)
print(nb_have_country)
print(nb_without_country)

7787
7280
507


In [96]:
labels = ['Country not specified', 'Specified country']
values = [nb_without_country, nb_have_country]

fig = go.Figure(data=[
    go.Pie(labels=labels,
           values=values,
           marker_colors=[theme['light_gray'], theme['red']])
])
fig.update_layout(
    paper_bgcolor=theme['dark_gray'],
    plot_bgcolor=theme['dark_gray'],
    font_color="white",
    title={
        'text': "Number of productions with missing country specification",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
)
fig.show()

We observe that some shows have multiple countries separated by `,`

In [97]:
collabs = have_country['country'].str.contains(",")
have_country[collabs]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
11,s12,TV Show,1983,,"Robert Więckiewicz, Maciej Musiał, Michalina O...","Poland, United States","November 30, 2018",2018,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Dramas","In this dark alt-history thriller, a naïve law..."
17,s18,Movie,22-Jul,Paul Greengrass,"Anders Danielsen Lie, Jon Øigarden, Jonas Stra...","Norway, Iceland, United States","October 10, 2018",2018,R,144 min,"Dramas, Thrillers","After devastating terror attacks in Norway, a ..."
48,s49,Movie,10 Days in Sun City,Adze Ugah,"Ayo Makun, Adesua Etomi, Richard Mofe-Damijo, ...","South Africa, Nigeria","October 18, 2019",2017,TV-14,87 min,"Comedies, International Movies, Romantic Movies",After his girlfriend wins the Miss Nigeria pag...
50,s51,Movie,"10,000 B.C.",Roland Emmerich,"Steven Strait, Camilla Belle, Cliff Curtis, Jo...","United States, South Africa","June 1, 2019",2008,PG-13,109 min,Action & Adventure,Fierce mammoth hunter D'Leh sets out on an imp...
54,s55,Movie,100 Meters,Marcel Barrena,"Dani Rovira, Karra Elejalde, Alexandra Jiménez...","Portugal, Spain","March 10, 2017",2016,TV-MA,109 min,"Dramas, International Movies, Sports Movies",A man who is diagnosed with multiple sclerosis...
...,...,...,...,...,...,...,...,...,...,...,...,...
7759,s7760,TV Show,Zak Storm,,"Michael Johnston, Jessica Gee-George, Christin...","United States, France, South Korea, Indonesia","September 13, 2018",2016,TV-Y7,3 Seasons,Kids' TV,Teen surfer Zak Storm is mysteriously transpor...
7761,s7762,Movie,Zapped,Peter DeLuise,"Zendaya, Chanelle Peloso, Spencer Boldman, Emi...","Canada, United States","February 1, 2017",2014,TV-Y,92 min,"Children & Family Movies, Comedies",A girl discovers a dog-training app that can g...
7770,s7771,Movie,Zinzana,Majid Al Ansari,"Ali Suliman, Saleh Bakri, Yasa, Ali Al-Jabri, ...","United Arab Emirates, Jordan","March 9, 2016",2015,TV-MA,96 min,"Dramas, International Movies, Thrillers",Recovering alcoholic Talal wakes up inside a s...
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...","October 19, 2020",2005,TV-MA,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...


In [10]:
nb_collabs = np.sum(collabs)
nb_not_collabs = np.sum(~collabs)

print(nb_collabs)
print(nb_not_collabs)


1153
6127


In [99]:
labels = [
    'Productions made in country collaborations',
    'Productions made by individudal countries'
]
values = [nb_collabs, nb_not_collabs]

fig = go.Figure(data=[
    go.Pie(labels=labels,
           values=values,
           marker_colors=[theme['light_gray'], theme['red']])
])
fig.update_layout(
    paper_bgcolor=theme['dark_gray'],
    plot_bgcolor=theme['dark_gray'],
    font_color="white",
    title={
        'text': "Number of productions created in collaboration between countries",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    })
fig.show()

Let's try to create arrays from the comma separated country values.

In [12]:
by_country = have_country[['title', 'country', 'date_added',
                        'release_year']].copy()
by_country['country'] = by_country['country'].apply(lambda row_value: list(
    map(lambda country_name: country_name.strip(), str(row_value).split(","))))
by_country

Unnamed: 0,title,country,date_added,release_year
0,3%,[Brazil],"August 14, 2020",2020
1,7:19,[Mexico],"December 23, 2016",2016
2,23:59,[Singapore],"December 20, 2018",2011
3,9,[United States],"November 16, 2017",2009
4,21,[United States],"January 1, 2020",2008
...,...,...,...,...
7781,Zoom,[United States],"January 11, 2020",2006
7782,Zozo,"[Sweden, Czech Republic, United Kingdom, Denma...","October 19, 2020",2005
7783,Zubaan,[India],"March 2, 2019",2015
7785,Zumbo's Just Desserts,[Australia],"October 31, 2020",2019


Now we can use the `explode` method to expand the records which have multiple countries. 

In [13]:
expanded_countries = by_country.explode('country')

expanded_countries

Unnamed: 0,title,country,date_added,release_year
0,3%,Brazil,"August 14, 2020",2020
1,7:19,Mexico,"December 23, 2016",2016
2,23:59,Singapore,"December 20, 2018",2011
3,9,United States,"November 16, 2017",2009
4,21,United States,"January 1, 2020",2008
...,...,...,...,...
7783,Zubaan,India,"March 2, 2019",2015
7785,Zumbo's Just Desserts,Australia,"October 31, 2020",2019
7786,ZZ TOP: THAT LITTLE OL' BAND FROM TEXAS,United Kingdom,"March 1, 2020",2019
7786,ZZ TOP: THAT LITTLE OL' BAND FROM TEXAS,Canada,"March 1, 2020",2019


In [14]:
country_counts = expanded_countries.groupby(by='country').title.agg(
    ["count"]).sort_values(['count'], ascending=False).reset_index()
country_counts[:10]

Unnamed: 0,country,count
0,United States,3297
1,India,990
2,United Kingdom,723
3,Canada,412
4,France,349
5,Japan,287
6,Spain,215
7,South Korea,212
8,Germany,199
9,Mexico,154


In [101]:
fig = go.Figure(data=[
    go.Bar(x=country_counts[:10].country,
           y=country_counts[:10]['count'],
           marker_color=theme['red'])
])
fig.update_layout(
    paper_bgcolor=theme['dark_gray'],
    plot_bgcolor=theme['dark_gray'],
    font_color="white",
    title={
        'text': "Countries with most productions on Netflix",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
)
fig.show()

In [20]:
country_counts[country_counts['country'] == 'United States']['count'].sum()

3297

In [103]:
nb_us_movies = country_counts[country_counts['country'] ==
                              'United States']['count'].sum()
nb_int_movies = country_counts[
    country_counts['country'] != 'United States']['count'].sum()
labels = ['Productions made by US', 'Productions made by other countries']
values = [nb_us_movies, nb_int_movies]

fig = go.Figure(data=[
    go.Pie(labels=labels,
           values=values,
           marker_colors=[theme['light_gray'], theme['red']])
])
fig.update_layout(paper_bgcolor=theme['dark_gray'],
                  plot_bgcolor=theme['dark_gray'],
                  font_color="white",
                  title={
                      'text': "Percentage of productions made by US",
                      'y': 0.95,
                      'x': 0.45,
                      'xanchor': 'center',
                      'yanchor': 'top'
                  })
fig.show()

We need to extract the year in which each tv show/movie was added to Netflix from the `date_added` column

In [27]:
def parse_time(string_time):
    try:
        clean = string_time.strip()
        parsed = datetime.datetime.strptime(clean, '%B %d, %Y')
        return int(parsed.year)

    except Exception as e:
        return 0

In [28]:
expanded_countries['added_year'] = expanded_countries['date_added'].apply(
    parse_time)
with_year = expanded_countries[expanded_countries['added_year'] != 0]

with_year

Unnamed: 0,title,country,date_added,release_year,added_year
0,3%,Brazil,"August 14, 2020",2020,2020
1,7:19,Mexico,"December 23, 2016",2016,2016
2,23:59,Singapore,"December 20, 2018",2011,2018
3,9,United States,"November 16, 2017",2009,2017
4,21,United States,"January 1, 2020",2008,2020
...,...,...,...,...,...
7783,Zubaan,India,"March 2, 2019",2015,2019
7785,Zumbo's Just Desserts,Australia,"October 31, 2020",2019,2020
7786,ZZ TOP: THAT LITTLE OL' BAND FROM TEXAS,United Kingdom,"March 1, 2020",2019,2020
7786,ZZ TOP: THAT LITTLE OL' BAND FROM TEXAS,Canada,"March 1, 2020",2019,2020


We get all the added_year values available in our dataframe.

In [29]:
years = sorted(expanded_countries['added_year'].unique())

For each year, we count how many shows/movies are produced by each country and create one larger dataframe containing all aggregate data.

In [30]:
counts_by_year_and_country = pd.DataFrame()

for year in years:

    for_year = expanded_countries[
        expanded_countries['added_year'] < year].copy()

    for_year = for_year[['country', 'title']]

    counts_by_country = for_year.groupby(
        by='country', as_index=False).agg(number_of_titles=('title', 'count'))
    counts_by_country['added_year'] = str(year)
    counts_by_year_and_country = pd.concat(
        [counts_by_year_and_country, counts_by_country])

counts_by_year_and_country

Unnamed: 0,country,number_of_titles,added_year
0,Australia,1,2008
1,Japan,1,2008
2,United Kingdom,1,2008
3,United States,6,2008
0,Australia,1,2009
...,...,...,...
113,Vatican City,1,2021
114,Venezuela,3,2021
115,Vietnam,5,2021
116,West Germany,5,2021


In [31]:
us_movies = counts_by_year_and_country[counts_by_year_and_country['country'] == "United States"].reset_index(drop=True)
us_movies

Unnamed: 0,country,number_of_titles,added_year
0,United States,6,2008
1,United States,8,2009
2,United States,9,2010
3,United States,10,2011
4,United States,22,2012
5,United States,25,2013
6,United States,35,2014
7,United States,58,2015
8,United States,118,2016
9,United States,322,2017


In [104]:
fig = go.Figure(data=[
    go.Bar(x=us_movies['added_year'],
           y=us_movies['number_of_titles'],
           marker_color=theme['red'])
])
fig.update_layout(
    paper_bgcolor=theme['dark_gray'],
    plot_bgcolor=theme['dark_gray'],
    font_color="white",
    title={
        'text': "Netflix titles added every year produced by US",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
)
fig.show()

In [33]:
ro_movies = counts_by_year_and_country[counts_by_year_and_country['country'] == "Romania"].copy()
ro_movies['added_year'] = ro_movies['added_year'].astype(str)
ro_movies

Unnamed: 0,country,number_of_titles,added_year
60,Romania,1,2018
70,Romania,1,2019
75,Romania,7,2020
85,Romania,12,2021


In [105]:
fig = go.Figure(data=[
    go.Bar(x=ro_movies['added_year'],
           y=ro_movies['number_of_titles'],
           marker_color=theme['red'])
])
fig.update_layout(
    paper_bgcolor=theme['dark_gray'],
    plot_bgcolor=theme['dark_gray'],
    font_color="white",
    title={
        'text': "Netflix titles added every year produced by Romania",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
)
fig.show()

In [35]:
int_movies = counts_by_year_and_country[counts_by_year_and_country['country'] != "United States"]
int_movies

Unnamed: 0,country,number_of_titles,added_year
0,Australia,1,2008
1,Japan,1,2008
2,United Kingdom,1,2008
0,Australia,1,2009
1,Japan,1,2009
...,...,...,...
113,Vatican City,1,2021
114,Venezuela,3,2021
115,Vietnam,5,2021
116,West Germany,5,2021


In [36]:
int_movies.groupby(by='added_year').sum().reset_index()

int_movies

Unnamed: 0,country,number_of_titles,added_year
0,Australia,1,2008
1,Japan,1,2008
2,United Kingdom,1,2008
0,Australia,1,2009
1,Japan,1,2009
...,...,...,...
113,Vatican City,1,2021
114,Venezuela,3,2021
115,Vietnam,5,2021
116,West Germany,5,2021


In [37]:
us_int_counts = {'added_year': [], 'us_movies': [], 'int_movies': []}

for year in years[1:]:
    us_int_counts['added_year'].append(year)
    us_counts = us_movies[us_movies["added_year"] == str(
        year)].number_of_titles.sum()
    int_counts = int_movies[int_movies["added_year"] == str(
        year)].number_of_titles.sum()

    us_percentage = round(us_counts * 100 / (us_counts + int_counts), 2)
    int_percentage = round(int_counts * 100 / (us_counts + int_counts), 2)
    us_int_counts['us_movies'].append(us_percentage)
    us_int_counts['int_movies'].append(int_percentage)

us_int_counts = pd.DataFrame(us_int_counts)
us_int_counts

Unnamed: 0,added_year,us_movies,int_movies
0,2008,66.67,33.33
1,2009,72.73,27.27
2,2010,69.23,30.77
3,2011,71.43,28.57
4,2012,75.86,24.14
5,2013,75.76,24.24
6,2014,76.09,23.91
7,2015,69.88,30.12
8,2016,61.78,38.22
9,2017,43.28,56.72


In [90]:
fig = go.Figure()
fig.add_trace(
    go.Bar(y=us_int_counts["us_movies"],
           x=us_int_counts['added_year'],
           name="US Movies %",
           marker={"color": 'rgb(219,0,0)'}))
fig.add_trace(
    go.Bar(y=us_int_counts["int_movies"],
           x=us_int_counts['added_year'],
           name="International Movies %",
           marker={"color": 'rgb(86,77,77)'}))

fig.update_layout(yaxis={
    "title_text": "Productions added %",
    "ticktext": ["0%", "20%", "40%", "60%", "80%", "100%"],
    "tickvals": [0, 20, 40, 60, 80, 100],
    "tickmode": "array",
    "titlefont": {
        "size": 16
    },
},
                  autosize=False,
                  paper_bgcolor=theme['dark_gray'],
                  plot_bgcolor=theme['dark_gray'],
                  title={
                      'text':
                      "US vs International movie industry on Netflix %",
                      'y': 0.85,
                      'x': 0.5,
                      'xanchor': 'center',
                      'yanchor': 'top'
                  },
                  font_color='white',
                  barmode='stack')
fig.show()

In [39]:
last_year_counts = counts_by_year_and_country[counts_by_year_and_country['added_year'] == "2021"]
last_year_counts

Unnamed: 0,country,number_of_titles,added_year
0,,4,2021
1,Afghanistan,1,2021
2,Albania,1,2021
3,Algeria,2,2021
4,Angola,1,2021
...,...,...,...
113,Vatican City,1,2021
114,Venezuela,3,2021
115,Vietnam,5,2021
116,West Germany,5,2021


In [40]:
px.choropleth(last_year_counts,
              locations="country",
              color="number_of_titles",
              hover_name="country",
              hover_data={
                  "country": False,
                  "number_of_titles": True,
                  "added_year": False,
              },
              locationmode='country names',
              color_continuous_scale='reds',
              height=600)

In [41]:
px.choropleth(last_year_counts[last_year_counts['country'] != "United States"],
              locations="country",
              color="number_of_titles",
              hover_name="country",
              hover_data={
                  "country": False,
                  "number_of_titles": True,
                  "added_year": False,
              },
              locationmode='country names',
              color_continuous_scale='reds',
              height=600)

Now we can plot this data on a map and also use the `animation_frame` property to have it variable year.

In [42]:
px.choropleth(counts_by_year_and_country,
              locations="country",
              color="number_of_titles",
              hover_name="country",
              hover_data={
                  "country": False,
                  "number_of_titles": True,
                  "added_year": False,
              },
              locationmode='country names',
              animation_frame="added_year",
              color_continuous_scale='reds',
              height=600)

In [43]:
px.choropleth(counts_by_year_and_country[counts_by_year_and_country['country'] != "United States"],
              locations="country",
              color="number_of_titles",
              hover_name="country",
              hover_data={
                  "country": False,
                  "number_of_titles": True,
                  "added_year": False,
              },
              locationmode='country names',
              animation_frame="added_year",
              color_continuous_scale='reds',
              height=600)

### 2. Percentage of TV shows vs movies

#### Total

In [115]:
labels = ['TV Shows', 'Movies']
values = [
    dataframe[dataframe['type'] == 'TV Show'].shape[0],
    dataframe[dataframe['type'] == 'Movie'].shape[0]
]

fig = go.Figure(data=[
    go.Pie(labels=labels,
           values=values,
           marker_colors=[theme['light_gray'], theme['red']])
])
fig.update_layout(
    paper_bgcolor=theme['dark_gray'],
    plot_bgcolor=theme['dark_gray'],
    font_color="white",
    title={
        'text': "Total number of movies vs TV shows",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
)
fig.show()

#### Each year

In [44]:
percentages = dataframe.copy()

percentages['added_year'] = percentages['date_added'].apply(
    parse_time)
percentages

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,added_year
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...,2016
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow...",2018
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi...",2017
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...","October 19, 2020",2005,TV-MA,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...,2020
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,"March 2, 2019",2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...,2019
7784,s7785,Movie,Zulu Man in Japan,,Nasty C,,"September 25, 2020",2019,TV-MA,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast...",2020
7785,s7786,TV Show,Zumbo's Just Desserts,,"Adriano Zumbo, Rachel Khoo",Australia,"October 31, 2020",2019,TV-PG,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...,2020


In [45]:
years = sorted(percentages['added_year'].unique())

In [46]:
counts_by_year_percentage = {"Year": [], "TV Shows %": [], "Movies %": []}

for year in years:

    for_year = percentages[percentages['added_year'] == year]

    shows_count = for_year[for_year['type'] == 'TV Show'].shape[0]
    movies_count = for_year[for_year['type'] == 'Movie'].shape[0]
    shows_percentage = round(shows_count * 100 / for_year.shape[0], 2)
    movies_percentage = round(movies_count * 100 / for_year.shape[0], 2)
    
    counts_by_year_percentage['Year'].append(year)
    counts_by_year_percentage['TV Shows %'].append(shows_percentage)
    counts_by_year_percentage['Movies %'].append(movies_percentage)
    

counts_by_year_percentage= pd.DataFrame(counts_by_year_percentage)
counts_by_year_percentage = counts_by_year_percentage[counts_by_year_percentage['Year'] != 0]

In [47]:
counts_by_year_percentage

Unnamed: 0,Year,TV Shows %,Movies %
1,2008,50.0,50.0
2,2009,0.0,100.0
3,2010,0.0,100.0
4,2011,0.0,100.0
5,2012,0.0,100.0
6,2013,45.45,54.55
7,2014,24.0,76.0
8,2015,34.09,65.91
9,2016,41.76,58.24
10,2017,29.47,70.53


In [48]:
fig = go.Figure()
fig.add_trace(
    go.Bar(y=counts_by_year_percentage["TV Shows %"],
           x=counts_by_year_percentage.Year,
           name="TV Shows %",
           marker={"color": theme['red']}))
fig.add_trace(
    go.Bar(y=counts_by_year_percentage["Movies %"],
           x=counts_by_year_percentage.Year,
           name="Movies %",
           marker={"color": theme['light_gray']}))
fig.update_annotations(font_color='white')

fig.update_layout(yaxis={
    "title_text": "Productions added %",
    "ticktext": ["0%", "20%", "40%", "60%", "80%", "100%"],
    "tickvals": [0, 20, 40, 60, 80, 100],
    "tickmode": "array",
    "titlefont": {
        "size": 16,
    },
},
                  autosize=False,
                  paper_bgcolor=theme['dark_gray'],
                  plot_bgcolor=theme['dark_gray'],
                  font_color= "white",
                  title={
                      'text': "Total Netflix productions %",
                      'y': 0.85,
                      'x': 0.5,
                      'xanchor': 'center',
                      'yanchor': 'top'
                  },
                  barmode='stack')
fig.show()

### 3. Average movie duration

Extract productions that are of type movie

In [49]:
movies = dataframe[dataframe['type'] == 'Movie'].copy()
movies

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...
6,s7,Movie,122,Yasir Al Yasiri,"Amina Khalil, Ahmed Dawood, Tarek Lotfy, Ahmed...",Egypt,"June 1, 2020",2019,TV-MA,95 min,"Horror Movies, International Movies","After an awful accident, a couple admitted to ..."
...,...,...,...,...,...,...,...,...,...,...,...,...
7781,s7782,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...","October 19, 2020",2005,TV-MA,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,"March 2, 2019",2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
7784,s7785,Movie,Zulu Man in Japan,,Nasty C,,"September 25, 2020",2019,TV-MA,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."


Convert the duration column from string `x min` to the number `x`

In [50]:
movies['duration'] = movies['duration'].apply(lambda x: int(x.split()[0]))

movies

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123,Dramas,A brilliant group of students become card-coun...
6,s7,Movie,122,Yasir Al Yasiri,"Amina Khalil, Ahmed Dawood, Tarek Lotfy, Ahmed...",Egypt,"June 1, 2020",2019,TV-MA,95,"Horror Movies, International Movies","After an awful accident, a couple admitted to ..."
...,...,...,...,...,...,...,...,...,...,...,...,...
7781,s7782,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...","October 19, 2020",2005,TV-MA,99,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,"March 2, 2019",2015,TV-14,111,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
7784,s7785,Movie,Zulu Man in Japan,,Nasty C,,"September 25, 2020",2019,TV-MA,44,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."


Group movies by their release year and compute average, minimum and maximum durations

In [51]:
duration_per_year = movies.groupby(by='release_year', as_index=False).agg(
    num_movies=('show_id', 'count'),
    average_duration=('duration', 'mean'),
    min_duration=('duration', 'min'),
    max_duration=('duration', 'max'))
duration_per_year

Unnamed: 0,release_year,num_movies,average_duration,min_duration,max_duration
0,1942,2,35.000000,18,52
1,1943,3,62.666667,45,82
2,1944,3,52.000000,40,76
3,1945,3,51.333333,32,63
4,1946,1,58.000000,58,58
...,...,...,...,...,...
67,2017,744,94.836022,11,166
68,2018,734,96.170300,12,312
69,2019,582,93.457045,10,209
70,2020,411,89.795620,5,190


Let's see the number of movies produced every year

In [116]:
fig = go.Figure(data=[
    go.Bar(x=duration_per_year['release_year'],
           y=duration_per_year['num_movies'],
           marker_color=theme['red'])
])
fig.update_layout(
    paper_bgcolor=theme['dark_gray'],
    plot_bgcolor=theme['dark_gray'],
    font_color="white",
    title={
        'text': "Release year of all movies available on Netflix",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
)
fig.show()

It looks like most of the movies are released after year 2000 so we will only use these ones in oroder to have correct metrics  

In [53]:
recent_movies = duration_per_year[duration_per_year['release_year'] > 2000].copy()

recent_movies

Unnamed: 0,release_year,num_movies,average_duration,min_duration,max_duration
51,2001,32,118.875,42,224
52,2002,39,118.871795,78,179
53,2003,40,117.125,37,201
54,2004,50,115.94,46,185
55,2005,61,114.360656,55,208
56,2006,72,114.541667,45,196
57,2007,72,113.069444,70,169
58,2008,102,106.666667,25,214
59,2009,103,108.621359,29,203
60,2010,136,104.426471,23,186


We can also compute the regression line of the average duration

In [56]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


x = recent_movies['release_year'].values.reshape(-1, 1)
y = recent_movies['average_duration'].values

model = make_pipeline(StandardScaler(), LinearRegression())

model.fit(x, y)

x_future = np.arange(2021, 2026)
y_future = model.predict(x_future.reshape(-1, 1))

Plot all the data

In [124]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(x=recent_movies['release_year'],
               y=recent_movies['average_duration'],
               name='Average duration',
               mode='lines+markers',
               line={
                   "color": theme['red'],
                   "width": 4,
               }))
fig.add_trace(
    go.Scatter(x=recent_movies['release_year'],
               y=recent_movies['min_duration'],
               name='Minimum duration',
               mode='lines+markers',
               line={
                   "color": 'royalblue',
                   "width": 4,
                   "dash": 'dot'
               }))

fig.add_trace(
    go.Scatter(x=recent_movies['release_year'],
               y=recent_movies['max_duration'],
               name='Maximum duration',
               mode='lines+markers',
               line={
                   "color": 'green',
                   "width": 4,
                   "dash": 'dot'
               }))

fig.update_layout(
    paper_bgcolor=theme['dark_gray'],
    plot_bgcolor=theme['dark_gray'],
    font_color='white',
    title='Duration of movies from the Netflix collection by release year',
    xaxis_title='Release year',
    yaxis_title='Duration (minutes)')

fig.show()

In [123]:
fig = go.Figure()

fig.add_traces(
    go.Scatter(x=x_future,
               y=y_future.reshape(-1, ),
               name='Regression Line',
               line={
                   "color": theme['red'],
                   "width": 2,
                   "dash": "dashdot"
               }))


fig.update_layout(
    paper_bgcolor=theme['dark_gray'],
    plot_bgcolor=theme['dark_gray'],
    font_color='white',
    title='Regression line for average movie duration for next 5 years',
    xaxis_title='Release year',
    yaxis_title='Duration (minutes)',
    yaxis_range=[60,120],
    xaxis_range=[2021,2025],
    xaxis_tickvals=np.arange(2021,2026),
)

fig.show()

In [59]:
recent_movies['average_duration'].values

array([118.875     , 118.87179487, 117.125     , 115.94      ,
       114.36065574, 114.54166667, 113.06944444, 106.66666667,
       108.62135922, 104.42647059, 102.82677165, 100.62025316,
        98.75247525, 100.26639344,  99.82722513,  95.11214953,
        94.83602151,  96.17029973,  93.45704467,  89.79562044,
        80.16666667])

In [60]:
recent_movies

Unnamed: 0,release_year,num_movies,average_duration,min_duration,max_duration
51,2001,32,118.875,42,224
52,2002,39,118.871795,78,179
53,2003,40,117.125,37,201
54,2004,50,115.94,46,185
55,2005,61,114.360656,55,208
56,2006,72,114.541667,45,196
57,2007,72,113.069444,70,169
58,2008,102,106.666667,25,214
59,2009,103,108.621359,29,203
60,2010,136,104.426471,23,186


### 4. Actors analysis

#### US only

From our dataset we will extract production from United States which have cast information

In [61]:
by_cast = dataframe[['title', 'country', 'cast']].copy()
by_cast = by_cast[(by_cast['cast'].notnull())
                  & (by_cast['country'] == 'United States')]

The cast column is made up of actor names separated by commas. We need to separate them and also remove whitespace.

In [62]:
by_cast['cast'] = by_cast['cast'].apply(lambda row_value: list(
    map(lambda actor_name: actor_name.strip(), str(row_value).split(","))))

Let's split the cast arrays into separate rows by using explode.

In [63]:
all_actors = by_cast.explode("cast")
all_actors

Unnamed: 0,title,country,cast
3,9,United States,Elijah Wood
3,9,United States,John C. Reilly
3,9,United States,Jennifer Connelly
3,9,United States,Christopher Plummer
3,9,United States,Crispin Glover
...,...,...,...
7781,Zoom,United States,Ryan Newman
7781,Zoom,United States,Michael Cassidy
7781,Zoom,United States,Spencer Breslin
7781,Zoom,United States,Rip Torn


Now we can count the number of productions each actor starred in

In [64]:
cast_counts = all_actors.groupby(by='cast', as_index=False).agg(number_prods=('cast', 'count'))
cast_counts

Unnamed: 0,cast,number_prods
0,2 Chainz,1
1,50 Cent,4
2,A Boogie Wit tha Hoodie,1
3,A.D. Miles,3
4,A.J. LoCascio,3
...,...,...
10664,Zydrunas Savickas,1
10665,k.d. lang,1
10666,vivienne Rutherford,1
10667,Álvaro Rodríguez,1


In [65]:
sorted_cast_counts = cast_counts.copy().sort_values(['number_prods'], ascending=False).reset_index(drop=True)
sorted_cast_counts

Unnamed: 0,cast,number_prods
0,Adam Sandler,19
1,Samuel L. Jackson,16
2,Fred Tatasciore,16
3,Erin Fitzgerald,14
4,Seth Rogen,14
...,...,...
10664,Iwan Rheon,1
10665,Iván Pohárnok,1
10666,Ivonne Coll,1
10667,Ivan Sergei,1


In [106]:
# fig = go.Figure(data=[
#     go.Bar(x=us_movies['added_year'],
#            y=us_movies['number_of_titles'],
#            marker_color=theme['red'])
# ])
# fig.update_layout(
#     paper_bgcolor=theme['dark_gray'],
#     plot_bgcolor=theme['dark_gray'],
#     font_color="white",

fig = go.Figure(data=[
    go.Bar(y=sorted_cast_counts[:30].cast,
           x=sorted_cast_counts[:30]['number_prods'],
           marker_color=theme['red'],
           orientation='h')
])
fig.update_layout(paper_bgcolor=theme['dark_gray'],
                  plot_bgcolor=theme['dark_gray'],
                  font_color="white",
                  title_text="Most popular actors from US",
                  width=800,
                  height=800)
fig.update_yaxes(autorange="reversed")
fig.update_xaxes(side="top")
fig.show()

Using cast counts, we can extract the actors which are part of at least 5 productions (in order to not have too much data).
We will also extract these actor names in an array and into a map of indices.

In [67]:
popular_actors = cast_counts[cast_counts['number_prods'] > 5]

popular_actors_list = popular_actors['cast'].values
popular_map = dict(zip(popular_actors_list, range(len(popular_actors_list))))
popular_map_inv = dict(zip(range(len(popular_actors_list)), popular_actors_list))



Now we need to compute a matrix of occurences that for each actor will say in how many movies he or she has player with all the other actors. It's size will be `num_of_actors * num_of_actors` 

In [68]:
# Create a new empty matrix for occurences between actors
matrix = np.zeros((len(popular_actors_list), len(popular_actors_list)))

for i in by_cast.index:
    # Get the current movie cast
    current_group = by_cast['cast'][i]

    # Extract actors which are part of popular list
    current_group = filter(lambda x: x in popular_actors_list, current_group)

    # Convert actor names into indices
    indices = list(map(lambda x: popular_map[x], current_group))

    # Get all the permutations of actors in pairs
    # Example: [x, y, z] => [[x, y], [x, z], [y, x], [y, z], [z, x], [z, y]]
    pairs = list(itertools.permutations(indices, 2))

    # Use each pair of actors to update the matrix of occurences
    for pair in pairs:
        matrix[pair[0], pair[1]] += 1
        
print(matrix.shape)
print(len(popular_actors_list))
matrix

(300, 300)
300


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [69]:
to_drop = []
for i in range(len(popular_actors_list)):
    nb_collaborations = np.sum(matrix[:, i])
    if nb_collaborations < 15:
        to_drop.append(i)

matrix = np.delete(matrix,to_drop, axis = 0)
matrix = np.delete(matrix,to_drop, axis = 1)
popular_actors_list = np.delete(popular_actors_list, to_drop, axis=0)

In [70]:
adj_matrix_with_names = pd.DataFrame(matrix, index=popular_actors_list, columns=popular_actors_list)
adj_matrix_with_names

Unnamed: 0,Adam Sandler,Adam Scott,Alex Heartman,Alison Brie,Allen Covert,America Ferrera,America Young,Amy Poehler,Amy Sedaris,Andrew Bachelor,...,Taye Diggs,Terrence Howard,Terry Crews,Tiffany Haddish,Tony Hale,Tracy Morgan,Will Arnett,Will Ferrell,Will Forte,Zach Galifianakis
Adam Sandler,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
Adam Scott,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
Alex Heartman,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Alison Brie,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
Allen Covert,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Tracy Morgan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0
Will Arnett,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
Will Ferrell,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Will Forte,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [111]:
def get_collaborations_for_actor(actor_name, adj_matrix, actors_map):
    actor_idx = actors_map[actor_name]
    collabs = adj_matrix[actor_idx]
    collabs_df = pd.DataFrame(list(zip(actors_map.keys(), collabs)),
                              columns=['actor_name', 'collabs'])
    collabs_df = collabs_df[collabs_df['collabs'] > 0].sort_values(
        by='collabs', ascending=False)

    return collabs_df


actor_name = "Andy Samberg"
collabs = get_collaborations_for_actor(actor_name, matrix, popular_map)

fig = go.Figure(data=[
    go.Bar(y=collabs['collabs'],
           x=collabs['actor_name'],
           marker_color=theme['red'])
])

fig.update_xaxes(tickangle=300,
                 title_text="Actor name",
                 title_font={"size": 20})
fig.update_yaxes(title_text="Number of collaborations",
                 title_font={"size": 20})
fig.update_layout(
    title={
        'text': f"Total collaborations of {actor_name}",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    font_color='white',
    paper_bgcolor=theme['dark_gray'],
    plot_bgcolor=theme['dark_gray'],
)
fig.show()

In [72]:
graph = nx.from_pandas_adjacency(adj_matrix_with_names)
pos_ = nx.spring_layout(graph)

In [73]:
def make_edge(x, y, text, width):
    return go.Scatter(x=x,
                      y=y,
                      line=dict(width=width, color='red'),
                      hoverinfo='text',
                      text=([text]),
                      mode='lines')


edge_trace = []
for edge in graph.edges():

    if graph.edges()[edge]['weight'] > 0:
        char_1 = edge[0]
        char_2 = edge[1]

        x0, y0 = pos_[char_1]
        x1, y1 = pos_[char_2]

        text = char_1 + '--' + char_2 + ': ' + str(
            graph.edges()[edge]['weight'])

        trace = make_edge([x0, x1, None], [y0, y1, None], text,
                          0.5 * graph.edges()[edge]['weight'])

        edge_trace.append(trace)

In [74]:
node_trace = go.Scatter(x=[],
                        y=[],
                        text=[],
                        textposition="top center",
                        textfont_size=10,
                        mode='markers+text',
                        hoverinfo='none',
                        marker=dict(color=[], size=[], line=None))

for node in graph.nodes():
    x, y = pos_[node]
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])
    node_trace['marker']['color'] += tuple(['red'])
    node_trace['marker']['size'] += tuple([8])
    node_trace['text'] += tuple([node])

In [75]:
layout = go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')

fig = go.Figure(layout=layout)

for trace in edge_trace:
    fig.add_trace(trace)

fig.add_trace(node_trace)
fig.update_layout(showlegend=False)
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False)
fig.show()