In [5]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from dash import Dash, html, dcc


import matplotlib.pyplot as plt

import seaborn as sns
from pandas_profiling import ProfileReport

In [6]:
df = pd.read_csv('netflix_titles.csv')

In [8]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


In [9]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

Data Cleaning

In [6]:
# Lets check how many nulls are there in the data
df.nunique()

show_id         8807
type               2
title           8807
director        4528
cast            7692
country          748
date_added      1767
release_year      74
rating            17
duration         220
listed_in        514
description     8775
dtype: int64

In [7]:
df.isna().sum()

# The missing values are in:

# director: i think its not needed for the analysis (no much information)
# cast: this var has too many diferent values too so there is no muich information out there
# country: this is a importante variable so wi will think how to fix it (maybe mode)
# date_added: there are just a few cases, so lets scrap them
# rating: there are just a few cases, so lets scrap them

show_id            0
type               0
title              0
director        2634
cast             825
country          831
date_added        10
release_year       0
rating             4
duration           3
listed_in          0
description        0
dtype: int64

Fix rating missings

In [8]:
df['rating'].unique()

array(['PG-13', 'TV-MA', 'PG', 'TV-14', 'TV-PG', 'TV-Y', 'TV-Y7', 'R',
       'TV-G', 'G', 'NC-17', '74 min', '84 min', '66 min', 'NR', nan,
       'TV-Y7-FV', 'UR'], dtype=object)

In [9]:
df[df['rating'].isna()]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
5989,s5990,Movie,13TH: A Conversation with Oprah Winfrey & Ava ...,,"Oprah Winfrey, Ava DuVernay",,"January 26, 2017",2017,,37 min,Movies,Oprah Winfrey sits down with director Ava DuVe...
6827,s6828,TV Show,Gargantia on the Verdurous Planet,,"Kaito Ishikawa, Hisako Kanemoto, Ai Kayano, Ka...",Japan,"December 1, 2016",2013,,1 Season,"Anime Series, International TV Shows","After falling through a wormhole, a space-dwel..."
7312,s7313,TV Show,Little Lunch,,"Flynn Curry, Olivia Deeble, Madison Lu, Oisín ...",Australia,"February 1, 2018",2015,,1 Season,"Kids' TV, TV Comedies","Adopting a child's perspective, this show take..."
7537,s7538,Movie,My Honor Was Loyalty,Alessandro Pepe,"Leone Frisa, Paolo Vaccarino, Francesco Miglio...",Italy,"March 1, 2017",2015,,115 min,Dramas,"Amid the chaos and horror of World War II, a c..."


In [10]:
# This info is gathered from the Internet, having multiple sources.
# So this is hardcoded, but it'll do

rating_replacements = {
    5989 : 'TV-PG',
    6827 : 'TV-14',
    7312 : 'TV-MA',
    7537 : 'TV-MA'
}

for id, rate in rating_replacements.items():
    df.iloc[id, 8] = rate
    
df['rating'].isna().sum()

0

Drop director and cast columns

In [11]:
df = df.drop(['director', 'cast'], axis=1)
df.columns

Index(['show_id', 'type', 'title', 'country', 'date_added', 'release_year',
       'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

Fix date_added, country, duration missings

In [12]:
df = df[df['date_added'].notna()] # It's hard to find this info on the net
df = df[df['duration'].notnull()]
df['country'] = df['country'].fillna(df['country'].mode()[0])


Generate new columns based on variables

In [13]:
# Taking year and the month from date_added so we can compare 
# wich are the most valueable months and the difference between Show realese 
# and Netflix adding dates

df['year_added'] = df['date_added'].apply(lambda x: x.split(" ")[-1])
df['year_added'].head()

0    2021
1    2021
2    2021
3    2021
4    2021
Name: year_added, dtype: object

In [14]:
df['month_added'] = df['date_added'].apply(lambda x: x.split(" ")[0])
df['month_added'].head()

0    September
1    September
2    September
3    September
4    September
Name: month_added, dtype: object

Shows targets based on rating

In [15]:
# This could be interesting to show wich is target mostly focused by productions

ratings_ages = {
    'TV-PG': 'Older Kids',
    'TV-MA': 'Adults',
    'TV-Y7-FV': 'Older Kids',
    'TV-Y7': 'Older Kids',
    'TV-14': 'Teens',
    'R': 'Adults',
    'TV-Y': 'Kids',
    'NR': 'Adults',
    'PG-13': 'Teens',
    'TV-G': 'Kids',
    'PG': 'Older Kids',
    'G': 'Kids',
    'UR': 'Adults',
    'NC-17': 'Adults'
}

In [16]:
df['target_ages'] = df['rating'].replace(ratings_ages)
df['target_ages'].unique()

array(['Teens', 'Adults', 'Older Kids', 'Kids'], dtype=object)

Country fixup

In [17]:
# There are some entries where country has multiple values. 
# Adding a new column with just the first one 
# so we can check witch regions have more content.

df['principal_country'] = df['country'].apply(lambda x: x.split(",")[0])
df['principal_country'].head()

0    United States
1     South Africa
2    United States
3    United States
4            India
Name: principal_country, dtype: object

Fix data types

In [18]:
df.dtypes

show_id              object
type                 object
title                object
country              object
date_added           object
release_year          int64
rating               object
duration             object
listed_in            object
description          object
year_added           object
month_added          object
target_ages          object
principal_country    object
dtype: object

In [19]:
# type should be a category
df['type'] = pd.Categorical(df['type'])
# target_ages is another category (4 classes)
df['target_ages'] = pd.Categorical(df['target_ages'], categories=['Kids', 'Older Kids', 'Teens', 'Adults'])

# Year added should be integer so we can compare with `released_year`
df['year_added'] = pd.to_numeric(df['year_added'])

In [20]:
df.dtypes

show_id                object
type                 category
title                  object
country                object
date_added             object
release_year            int64
rating                 object
duration               object
listed_in              object
description            object
year_added              int64
month_added            object
target_ages          category
principal_country      object
dtype: object

Data Visualization

In [21]:
df['genre'] = df['listed_in'].apply(lambda x :  x.replace(' ,',',').replace(', ',',').split(',')) 
df['genre'].head()

0                                      [Documentaries]
1    [International TV Shows, TV Dramas, TV Mysteries]
2    [Crime TV Shows, International TV Shows, TV Ac...
3                             [Docuseries, Reality TV]
4    [International TV Shows, Romantic TV Shows, TV...
Name: genre, dtype: object

In [22]:
movie_df = df[df['type'] == 'Movie']
show_df = df[df['type'] == 'TV Show']

show_df.head()

Unnamed: 0,show_id,type,title,country,date_added,release_year,rating,duration,listed_in,description,year_added,month_added,target_ages,principal_country,genre
1,s2,TV Show,Blood & Water,South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",2021,September,Adults,South Africa,"[International TV Shows, TV Dramas, TV Mysteries]"
2,s3,TV Show,Ganglands,United States,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,2021,September,Adults,United States,"[Crime TV Shows, International TV Shows, TV Ac..."
3,s4,TV Show,Jailbirds New Orleans,United States,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",2021,September,Adults,United States,"[Docuseries, Reality TV]"
4,s5,TV Show,Kota Factory,India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,2021,September,Adults,India,"[International TV Shows, Romantic TV Shows, TV..."
5,s6,TV Show,Midnight Mass,United States,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",The arrival of a charismatic young priest brin...,2021,September,Adults,United States,"[TV Dramas, TV Horror, TV Mysteries]"


In [23]:
# As we can see, there are much more TV shows than Movies

movie_or_tv_show = px.pie(df['type'].value_counts().reset_index(), values='type', names='index')
movie_or_tv_show.update_traces(textposition='inside', textinfo='percent+label')
movie_or_tv_show.update_layout(
    title_text="Distribution by Type of content", 
    title_x=0.5,
    )
movie_or_tv_show.show()

In [24]:
def generate_rating_df(df):
    rating_df = df.groupby(['rating', 'target_ages']).agg({'show_id': 'count'}).reset_index()
    rating_df = rating_df[rating_df['show_id'] != 0]
    rating_df.columns = ['rating', 'target_ages', 'counts']
    rating_df = rating_df.sort_values('target_ages')
    return rating_df

In [25]:
rating_df = generate_rating_df(df)
content_by_target = px.bar(rating_df, x='rating', y='counts', color='target_ages')
content_by_target.update_layout(title_text="Distribution by Target age", title_x=0.5)
content_by_target.show()

In [26]:
movie_rating_df = generate_rating_df(movie_df)
show_rating_df = generate_rating_df(show_df)

rating_by_type_of_content = make_subplots(rows=1, cols=2, specs=[[{"type": "pie"}, {"type": "pie"}]])

rating_by_type_of_content.add_trace(
    go.Pie(labels=movie_rating_df['target_ages'], values=movie_rating_df['counts']),
    row=1, col=1
)

rating_by_type_of_content.add_trace(
    go.Pie(labels=show_rating_df['target_ages'], values=show_rating_df['counts']),
    row=1, col=2
)

rating_by_type_of_content.update_traces(textposition='inside', hole=.4, hoverinfo="label+percent+name")
rating_by_type_of_content.update_layout(
    title_text="Target age distribution by Type of content", title_x=0.5,
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Movies', x=0.21, y=0.5, font_size=12, showarrow=False),
                 dict(text='TV Shows', x=0.799, y=0.5, font_size=12, showarrow=False)])
rating_by_type_of_content.show()

In [27]:
country_df = df['principal_country'].value_counts().reset_index()
country_df = country_df[country_df['principal_country'] /  country_df['principal_country'].sum() > 0.01]

pie_per_country = px.pie(country_df, values='principal_country', names='index')
pie_per_country.update_traces(textposition='inside', textinfo='percent+label')
pie_per_country.update_layout(title_text="Distribution per Country (pie)", title_x=0.5)
pie_per_country.show()

In [28]:
histogram_per_country = px.histogram(df, x='principal_country')
histogram_per_country.update_xaxes(categoryorder='total descending')
histogram_per_country.update_layout(title_text="Distribution per Country (histogram)", title_x=0.5)
histogram_per_country.show()

In [29]:
# In the last few years, there has been a increment in content productions

released_year_df = df.loc[df['release_year'] > 2010].groupby(['release_year', 'type']).agg({'show_id': 'count'}).reset_index()
added_year_df = df.loc[df['year_added'] > 2010].groupby(['year_added', 'type']).agg({'show_id': 'count'}).reset_index()

added_vs_released = go.Figure()
added_vs_released.add_trace(go.Scatter( 
    x=released_year_df.loc[released_year_df['type'] == 'Movie']['release_year'], 
    y=released_year_df.loc[released_year_df['type'] == 'Movie']['show_id'],
    mode='lines+markers',
    name='Movie: Released Year',
    marker_color='green',
))
added_vs_released.add_trace(go.Scatter( 
    x=released_year_df.loc[released_year_df['type'] == 'TV Show']['release_year'], 
    y=released_year_df.loc[released_year_df['type'] == 'TV Show']['show_id'],
    mode='lines+markers',
    name='TV Show: Released Year',
    marker_color='darkgreen',
))
added_vs_released.add_trace(go.Scatter( 
    x=added_year_df.loc[added_year_df['type'] == 'Movie']['year_added'], 
    y=added_year_df.loc[added_year_df['type'] == 'Movie']['show_id'],
    mode='lines+markers',
    name='Movie: Year Added',
    marker_color='orange',
))
added_vs_released.add_trace(go.Scatter( 
    x=added_year_df.loc[added_year_df['type'] == 'TV Show']['year_added'], 
    y=added_year_df.loc[added_year_df['type'] == 'TV Show']['show_id'],
    mode='lines+markers',
    name='TV Show: Year Added',
    marker_color='darkorange',
))
added_vs_released.update_xaxes(categoryorder='total descending')
added_vs_released.update_layout(title_text="Date released vs Date added by Content type", title_x=0.5)
added_vs_released.show()

In [30]:
release_year_norm = ff.create_distplot([df[df.release_year > 2005]['release_year']], ['release_year'], curve_type='normal',
    show_rug=False)
release_year_norm.update_layout(title_text="Release year with Normal curve", title_x=0.5)
release_year_norm.show()

Genres Analysis

In [31]:
# Build dataframes for types of content

from sklearn.preprocessing import MultiLabelBinarizer 

In [32]:
def calculate_mlb(series):
    mlb = MultiLabelBinarizer()
    mlb_df = pd.DataFrame(mlb.fit_transform(series), columns=mlb.classes_, index=series.index)
    return mlb_df

In [33]:
def top_genres(df, title='Top ones'):
    genres_df = calculate_mlb(df['genre'])
    tdata = genres_df.sum().sort_values(ascending=False)
    
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=tdata.index,
        y=tdata.values,
    ))
    fig.update_xaxes(categoryorder='total descending')
    fig.update_layout(title=title, title_x=0.5)
    return fig

In [34]:
top_movie_genres = top_genres(movie_df, title='Top Movie Genres')
top_movie_genres.show()

In [35]:
top_movies_genres = [
    'International Movies',
    'Dramas',
    'Comedies',
    'Documentaries',
    'Action & Adventure',
]

In [36]:
top_tv_genres = top_genres(show_df, title='Top TV Show Genres')
top_tv_genres.show()

In [37]:
genres_df = calculate_mlb(movie_df['genre'])

movie_corr = genres_df.corr().round(decimals=2)
genres_mask = np.triu(np.ones_like(movie_corr, dtype=bool))
genres_mask = movie_corr.mask(genres_mask)

genre_heatmap = ff.create_annotated_heatmap(z=genres_mask.to_numpy(), 
                                  x=genres_mask.columns.tolist(),
                                  y=genres_mask.columns.tolist(),
                                  colorscale=px.colors.diverging.RdBu,
                                  showscale=True, ygap=1, xgap=1
                                 )

genre_heatmap.update_xaxes(side="bottom")

genre_heatmap.update_layout(
    title_text='Movie genre Heatmap', 
    title_x=0.5,
    height=800,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    xaxis_zeroline=False,
    yaxis_zeroline=False,
    yaxis_autorange='reversed',
    template='plotly_white'
)

for i in range(len(genre_heatmap.layout.annotations)):
    if genre_heatmap.layout.annotations[i].text == 'nan':
        genre_heatmap.layout.annotations[i].text = ""

genre_heatmap.show()

In [38]:
genres_df = calculate_mlb(show_df['genre'])

show_corr = genres_df.corr().round(decimals=2)
show_mask = np.triu(np.ones_like(show_corr, dtype=bool))
show_mask = show_corr.mask(show_mask)

show_heatmap = ff.create_annotated_heatmap(z=show_mask.to_numpy(), 
                                  x=show_mask.columns.tolist(),
                                  y=show_mask.columns.tolist(),
                                  colorscale=px.colors.diverging.RdBu,
                                  showscale=True, ygap=1, xgap=1
                                 )

show_heatmap.update_xaxes(side="bottom")

show_heatmap.update_layout(
    title_text='Show genre Heatmap', 
    title_x=0.5,
    height=800,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    xaxis_zeroline=False,
    yaxis_zeroline=False,
    yaxis_autorange='reversed',
    template='plotly_white'
)

for i in range(len(show_heatmap.layout.annotations)):
    if show_heatmap.layout.annotations[i].text == 'nan':
        show_heatmap.layout.annotations[i].text = ""

show_heatmap.show()

Duration analysis

In [10]:
# Analyze Series duration
# This values are in term of seasons, so there is no much to analyze.

# As we can see the most of the series are only 1 season probably becasuse 
# they were cancelled or they are new and the second one is in production.

show_duration = px.histogram(x=show_df['duration'])
show_duration.update_xaxes(categoryorder="total descending")
show_duration.update_layout(
    title="Distribution of Show season durations",
    title_x=0.5,
    xaxis_title="Duration of the Show",
)
show_duration.show()

NameError: name 'show_df' is not defined

In [40]:
# Movie duration Alasysis
# Lets split the data duration into bins:

# Less than 1.30 hours
# More than 1.30 hours and less than 2.30 hours
# Morte than 2.30 hours
# As we can see, the most part of the movies' duration is between 1.30 hours and 2.30 hours

temp_duration = movie_df['duration'].apply(lambda x: int(x.split(' ')[0]))

In [41]:
movie_df.loc[temp_duration.loc[temp_duration < 90].index, 'duration_bin'] = 'Less than 1.30 hours'
movie_df.loc[temp_duration.loc[(temp_duration >= 90) & (temp_duration < 150)].index, 'duration_bin'] = 'More than 1.30 - less than 2.30 hours'
movie_df.loc[temp_duration.loc[temp_duration >= 150].index, 'duration_bin'] = 'More than 2.30 hours'

In [42]:
movie_df.head()

Unnamed: 0,show_id,type,title,country,date_added,release_year,rating,duration,listed_in,description,year_added,month_added,target_ages,principal_country,genre,duration_bin
0,s1,Movie,Dick Johnson Is Dead,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",2021,September,Teens,United States,[Documentaries],More than 1.30 - less than 2.30 hours
6,s7,Movie,My Little Pony: A New Generation,United States,"September 24, 2021",2021,PG,91 min,Children & Family Movies,Equestria's divided. But a bright-eyed hero be...,2021,September,Older Kids,United States,[Children & Family Movies],More than 1.30 - less than 2.30 hours
7,s8,Movie,Sankofa,"United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s...",2021,September,Adults,United States,"[Dramas, Independent Movies, International Mov...",More than 1.30 - less than 2.30 hours
9,s10,Movie,The Starling,United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...,2021,September,Teens,United States,"[Comedies, Dramas]",More than 1.30 - less than 2.30 hours
12,s13,Movie,Je Suis Karl,"Germany, Czech Republic","September 23, 2021",2021,TV-MA,127 min,"Dramas, International Movies",After most of her family is murdered in a terr...,2021,September,Adults,Germany,"[Dramas, International Movies]",More than 1.30 - less than 2.30 hours


In [43]:
movie_duration = px.histogram(x=movie_df['duration_bin'], color=movie_df['duration_bin'])
movie_duration.update_xaxes(categoryorder = "array",
                categoryarray = ['Less than 1.30 hours', 'More than 1.30 - less than 2.30 hours', 'More than 2.30 hours'],
                )
movie_duration.update_layout(
    title="Distribution of Movie durations",
    title_x=0.5,
    xaxis_title="Duration of the Movies",
)
movie_duration.show()

In [44]:
def check_genre_contains(genres):
    for genre in genres:
        if genre in top_movies_genres:
            return True
    return False

In [45]:
movie_df['principal_genre'] = movie_df['genre'].apply(lambda genres: genres[0])
movie_df['principal_genre'].head()

0                Documentaries
6     Children & Family Movies
7                       Dramas
9                     Comedies
12                      Dramas
Name: principal_genre, dtype: object

Top Genres release by year

In [46]:
# As we saw in the previos steps, in the last 4 years, there has been a huge amount of shows releases. 
# So we are going to explore the distribution of genres by year.

# Maybe sunburst is not the best plot for this purpose, but one focus is diversity in this project.

year_genre_df = movie_df[(movie_df['principal_genre'].isin(top_movies_genres)) & (movie_df['year_added'] >= 2017)].groupby(['principal_genre', 'year_added']).agg({'title': 'count'})
year_genre_df = year_genre_df.reset_index()
year_genre_df.columns = ['principal_genre', 'year_added', 'count']

genre_per_year = px.sunburst(year_genre_df, path=['year_added', 'principal_genre'], values='count')
genre_per_year.update_layout(title="Top Genres per Year", title_x=0.5, height = 650)
genre_per_year.show()

Creating Dash app 

In [47]:
app = Dash(__name__)

app.layout = html.Div(children=[
    html.H1(
        children='Aierizer Dániel - BI project 2023',
        style={'textAlign': 'center',}
        ),

    html.H2(
        children='Netflix analysis',
        style={'textAlign': 'center',}
        ),

    dcc.Graph(
        id='movie_or_tv_show',
        figure=movie_or_tv_show
    ),

    dcc.Graph(
        id='content_by_target',
        figure=content_by_target
    ),

    dcc.Graph(
        id='rating_by_type_of_content',
        figure=rating_by_type_of_content
    ),

    dcc.Graph(
        id='pie_per_country',
        figure=pie_per_country
    ),

    dcc.Graph(
        id='histogram_per_country',
        figure=histogram_per_country
    ),

    dcc.Graph(
        id='added_vs_released',
        figure=added_vs_released
    ),

    dcc.Graph(
        id='release_year_norm',
        figure=release_year_norm
    ),

    dcc.Graph(
        id='top_movie_genres',
        figure=top_movie_genres
    ),

    dcc.Graph(
        id='top_tv_genres',
        figure=top_tv_genres
    ),

    dcc.Graph(
        id='genre_heatmap',
        figure=genre_heatmap
    ),

    dcc.Graph(
        id='show_heatmap',
        figure=show_heatmap
    ),

    dcc.Graph(
        id='show_duration',
        figure=show_duration
    ),

    dcc.Graph(
        id='movie',
        figure=movie_duration
    ),

    dcc.Graph(
        id='genre_per_year',
        figure=genre_per_year
    )
])

if __name__ == '__main__':
    app.run(port=44477)

Dash is running on http://127.0.0.1:44477/

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:44477
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [07/Jan/2023 12:46:57] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 12:46:57] "GET /_dash-component-suites/dash/deps/polyfill@7.v2_6_2m1667486089.12.1.min.js HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 12:46:57] "GET /_dash-component-suites/dash/deps/react-dom@16.v2_6_2m1667486089.14.0.min.js HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 12:46:57] "GET /_dash-component-suites/dash/dash-renderer/build/dash_renderer.v2_6_2m1667486089.min.js HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 12:46:57] "GET /_dash-component-suites/dash/dcc/dash_core_components.v2_6_2m1667486089.js HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 12:46:57] "GET /_dash-component-suites/dash/html/dash_html_components.v2_0_5m1667486089.min.js HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 12:46:57] "GET /_dash-component-suites/dash/dash_table/bundle.v5_1_6m1667486089.js HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 12:46:57] "GET /_dash-dependencies 