# Project  blogpost team 12

**Pierce, Nikhil, Oskay. Adam, Wesley**

# 1. Setup

Setup the required packages for this project

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import csv
import streamlit as st
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# 2. Data Setup

## 2.1 The Netflix data 

In [2]:
# loading in the netflix data from the csv file and assigning it to 'netflix' 
netflix = pd.read_csv('netflix_titles.csv')


In [3]:
#test to see if the data is loaded in correctly
netflix.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


**Transforming the data**

If we look at the dataset we see a column called description. This column gives a brief summary of the movie/show. The data column called cast lists the different star cast members in the movie/show. These data columns aren't usefull for our project, therefore we will remove it from the dataset. 

In [4]:
# We will drop the column description and cast and update the dataset
netflix = netflix.drop(columns=['description','cast'])

In [5]:
# See if the desirerd result has been achieved
netflix.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries
1,s2,TV Show,Blood & Water,,South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries"
2,s3,TV Show,Ganglands,Julien Leclercq,,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
3,s4,TV Show,Jailbirds New Orleans,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV"
4,s5,TV Show,Kota Factory,,India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ..."


We want to seperate the Movie and TV Show type so we can look at them seperatly. To achieve this we will make 2 new datasets. One containing all the movies (called netflix_movies) and one containing all tvshows (called netflix_tvshow)

In [6]:
#Creating 2 new datasets to seperate the 'type' colomn of the original dataset
netflix_movies = netflix[netflix['type'] == 'Movie']
netflix_tvshow = netflix[netflix['type'] == 'TV Show']

In [7]:
netflix_movies.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries
6,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha",,"September 24, 2021",2021,PG,91 min,Children & Family Movies
7,s8,Movie,Sankofa,Haile Gerima,"United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies"
9,s10,Movie,The Starling,Theodore Melfi,United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas"
12,s13,Movie,Je Suis Karl,Christian Schwochow,"Germany, Czech Republic","September 23, 2021",2021,TV-MA,127 min,"Dramas, International Movies"


In [8]:
netflix_tvshow.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
1,s2,TV Show,Blood & Water,,South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries"
2,s3,TV Show,Ganglands,Julien Leclercq,,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
3,s4,TV Show,Jailbirds New Orleans,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV"
4,s5,TV Show,Kota Factory,,India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ..."
5,s6,TV Show,Midnight Mass,Mike Flanagan,,"September 24, 2021",2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries"


## 2.2 The Disney+ data

In [9]:
# loading in the disney data from the csv file and assigning it to 'disney' 
disney = pd.read_csv('disney_plus_titles.csv')


In [10]:
#test to see if the data is loaded in correctly
disney.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!
1,s2,Movie,Ernest Saves Christmas,John Cherry,"Jim Varney, Noelle Parker, Douglas Seale",,"November 26, 2021",1988,PG,91 min,Comedy,Santa Claus passes his magic bag to a new St. ...
2,s3,Movie,Ice Age: A Mammoth Christmas,Karen Disher,"Raymond Albert Romano, John Leguizamo, Denis L...",United States,"November 26, 2021",2011,TV-G,23 min,"Animation, Comedy, Family",Sid the Sloth is on Santa's naughty list.
3,s4,Movie,The Queen Family Singalong,Hamish Hamilton,"Darren Criss, Adam Lambert, Derek Hough, Alexa...",,"November 26, 2021",2021,TV-PG,41 min,Musical,"This is real life, not just fantasy!"
4,s5,TV Show,The Beatles: Get Back,,"John Lennon, Paul McCartney, George Harrison, ...",,"November 25, 2021",2021,,1 Season,"Docuseries, Historical, Music",A three-part documentary from Peter Jackson ca...


**Transforming the data**

Just like before we won't be needing the columns description and cast and will be dropping it from the data

In [11]:
# We will drop the column description and cast and update the dataset
disney = disney.drop(columns=['description','cast'])

In [12]:
# See if the desirerd result has been achieved
disney.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson",,"November 26, 2021",2016,TV-G,23 min,"Animation, Family"
1,s2,Movie,Ernest Saves Christmas,John Cherry,,"November 26, 2021",1988,PG,91 min,Comedy
2,s3,Movie,Ice Age: A Mammoth Christmas,Karen Disher,United States,"November 26, 2021",2011,TV-G,23 min,"Animation, Comedy, Family"
3,s4,Movie,The Queen Family Singalong,Hamish Hamilton,,"November 26, 2021",2021,TV-PG,41 min,Musical
4,s5,TV Show,The Beatles: Get Back,,,"November 25, 2021",2021,,1 Season,"Docuseries, Historical, Music"


Again we would like to seperate the type into two different datasets so we can look at the movies and Tv shows seperatly

In [13]:
#Creating 2 new datasets to seperate the 'type' colomn of the original dataset
disney_movies = disney[disney['type'] == 'Movie']
disney_tvshow = disney[disney['type'] == 'TV Show']

In [14]:
disney_movies.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson",,"November 26, 2021",2016,TV-G,23 min,"Animation, Family"
1,s2,Movie,Ernest Saves Christmas,John Cherry,,"November 26, 2021",1988,PG,91 min,Comedy
2,s3,Movie,Ice Age: A Mammoth Christmas,Karen Disher,United States,"November 26, 2021",2011,TV-G,23 min,"Animation, Comedy, Family"
3,s4,Movie,The Queen Family Singalong,Hamish Hamilton,,"November 26, 2021",2021,TV-PG,41 min,Musical
5,s6,Movie,Becoming Cousteau,Liz Garbus,United States,"November 24, 2021",2021,PG-13,94 min,"Biographical, Documentary"


In [15]:
disney_tvshow.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
4,s5,TV Show,The Beatles: Get Back,,,"November 25, 2021",2021,,1 Season,"Docuseries, Historical, Music"
6,s7,TV Show,Hawkeye,,,"November 24, 2021",2021,TV-14,1 Season,"Action-Adventure, Superhero"
7,s8,TV Show,Port Protection Alaska,,United States,"November 24, 2021",2015,TV-14,2 Seasons,"Docuseries, Reality, Survival"
8,s9,TV Show,Secrets of the Zoo: Tampa,,United States,"November 24, 2021",2019,TV-PG,2 Seasons,"Animals & Nature, Docuseries, Family"
13,s14,TV Show,"Dr. Oakley, Yukon Vet",,United States,"November 17, 2021",2013,TV-PG,10 Seasons,"Action-Adventure, Animals & Nature, Docuseries"


## 2.3 The Amazon Prime data 

In [16]:
# loading in the amazon data from the csv file and assigning it to 'amazon' 
amazon = pd.read_csv('amazon_prime_titles.csv')


In [17]:
#test to see if the data is loaded in correctly
amazon.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,The Grand Seduction,Don McKellar,"Brendan Gleeson, Taylor Kitsch, Gordon Pinsent",Canada,"March 30, 2021",2014,,113 min,"Comedy, Drama",A small fishing village must procure a local d...
1,s2,Movie,Take Care Good Night,Girish Joshi,"Mahesh Manjrekar, Abhay Mahajan, Sachin Khedekar",India,"March 30, 2021",2018,13+,110 min,"Drama, International",A Metro Family decides to fight a Cyber Crimin...
2,s3,Movie,Secrets of Deception,Josh Webber,"Tom Sizemore, Lorenzo Lamas, Robert LaSardo, R...",United States,"March 30, 2021",2017,,74 min,"Action, Drama, Suspense",After a man discovers his wife is cheating on ...
3,s4,Movie,Pink: Staying True,Sonia Anderson,"Interviews with: Pink, Adele, Beyoncé, Britney...",United States,"March 30, 2021",2014,,69 min,Documentary,"Pink breaks the mold once again, bringing her ..."
4,s5,Movie,Monster Maker,Giles Foster,"Harry Dean Stanton, Kieran O'Brien, George Cos...",United Kingdom,"March 30, 2021",1989,,45 min,"Drama, Fantasy",Teenage Matt Banting wants to work with a famo...


**Transforming the data**

Just like before we won't be needing the columns description and cast and will be dropping it from the data

In [18]:
# We will drop the column description and cast and update the dataset
amazon = amazon.drop(columns=['description','cast'])

In [19]:
# See if the desirerd result has been achieved
amazon.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,The Grand Seduction,Don McKellar,Canada,"March 30, 2021",2014,,113 min,"Comedy, Drama"
1,s2,Movie,Take Care Good Night,Girish Joshi,India,"March 30, 2021",2018,13+,110 min,"Drama, International"
2,s3,Movie,Secrets of Deception,Josh Webber,United States,"March 30, 2021",2017,,74 min,"Action, Drama, Suspense"
3,s4,Movie,Pink: Staying True,Sonia Anderson,United States,"March 30, 2021",2014,,69 min,Documentary
4,s5,Movie,Monster Maker,Giles Foster,United Kingdom,"March 30, 2021",1989,,45 min,"Drama, Fantasy"


In [20]:
#Creating 2 new datasets to seperate the 'type' colomn of the original dataset
amazon_movies = amazon[amazon['type'] == 'Movie']
amazon_tvshow = amazon[amazon['type'] == 'TV Show']

In [21]:
amazon_movies.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,The Grand Seduction,Don McKellar,Canada,"March 30, 2021",2014,,113 min,"Comedy, Drama"
1,s2,Movie,Take Care Good Night,Girish Joshi,India,"March 30, 2021",2018,13+,110 min,"Drama, International"
2,s3,Movie,Secrets of Deception,Josh Webber,United States,"March 30, 2021",2017,,74 min,"Action, Drama, Suspense"
3,s4,Movie,Pink: Staying True,Sonia Anderson,United States,"March 30, 2021",2014,,69 min,Documentary
4,s5,Movie,Monster Maker,Giles Foster,United Kingdom,"March 30, 2021",1989,,45 min,"Drama, Fantasy"


In [22]:
amazon_tvshow.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
17,s18,TV Show,Zoo Babies,,,,2008,ALL,1 Season,"Kids, Special Interest"
18,s19,TV Show,Zoë Coombs Marr: Bossy Bottom,,,,2020,18+,1 Season,"Comedy, Talk Show and Variety"
20,s21,TV Show,Zoboomafoo,,,,2001,TV-Y,1 Season,Kids
21,s22,TV Show,Zo Zo Zombie: Mini-Series,,,,2020,TV-Y7,1 Season,"Action, Anime, Comedy"
23,s24,TV Show,ZeroZeroZero,,Italy,,2020,18+,1 Season,TV Shows


## 2.4 The Hulu data 

In [23]:
# loading in the hulu data from the csv file and assigning it to 'hulu' 
hulu = pd.read_csv('hulu_titles.csv')

In [24]:
#test to see if the data is loaded in correctly
hulu.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Ricky Velez: Here's Everything,,,,"October 24, 2021",2021,TV-MA,,"Comedy, Stand Up",​Comedian Ricky Velez bares it all with his ho...
1,s2,Movie,Silent Night,,,,"October 23, 2021",2020,,94 min,"Crime, Drama, Thriller","Mark, a low end South London hitman recently r..."
2,s3,Movie,The Marksman,,,,"October 23, 2021",2021,PG-13,108 min,"Action, Thriller",A hardened Arizona rancher tries to protect an...
3,s4,Movie,Gaia,,,,"October 22, 2021",2021,R,97 min,Horror,A forest ranger and two survivalists with a cu...
4,s5,Movie,Settlers,,,,"October 22, 2021",2021,,104 min,"Science Fiction, Thriller",Mankind's earliest settlers on the Martian fro...


In [25]:
# We will drop the column description and update the dataset
hulu = hulu.drop(columns=['description','cast'])


In [26]:
# See if the desirerd result has been achieved
hulu.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Ricky Velez: Here's Everything,,,"October 24, 2021",2021,TV-MA,,"Comedy, Stand Up"
1,s2,Movie,Silent Night,,,"October 23, 2021",2020,,94 min,"Crime, Drama, Thriller"
2,s3,Movie,The Marksman,,,"October 23, 2021",2021,PG-13,108 min,"Action, Thriller"
3,s4,Movie,Gaia,,,"October 22, 2021",2021,R,97 min,Horror
4,s5,Movie,Settlers,,,"October 22, 2021",2021,,104 min,"Science Fiction, Thriller"


In [27]:
#Creating 2 new datasets to seperate the 'type' colomn of the original dataset
hulu_movies = hulu[hulu['type'] == 'Movie']
hulu_tvshow = hulu[hulu['type'] == 'TV Show']

In [28]:
hulu_movies.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Ricky Velez: Here's Everything,,,"October 24, 2021",2021,TV-MA,,"Comedy, Stand Up"
1,s2,Movie,Silent Night,,,"October 23, 2021",2020,,94 min,"Crime, Drama, Thriller"
2,s3,Movie,The Marksman,,,"October 23, 2021",2021,PG-13,108 min,"Action, Thriller"
3,s4,Movie,Gaia,,,"October 22, 2021",2021,R,97 min,Horror
4,s5,Movie,Settlers,,,"October 22, 2021",2021,,104 min,"Science Fiction, Thriller"


In [29]:
hulu_tvshow.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
5,s6,TV Show,The Halloween Candy Magic Pet,,,"October 22, 2021",2021,,1 Season,"Family, Kids"
7,s8,TV Show,The Next Thing You Eat,,,"October 21, 2021",2021,,1 Season,"Cooking & Food, Documentaries, Lifestyle & Cul..."
8,s9,TV Show,Queens,,,"October 20, 2021",2021,TV-14,1 Season,"Drama, Music"
9,s10,TV Show,The Bachelorette,,United States,"October 20, 2021",2003,TV-14,3 Seasons,"Reality, Romance"
10,s11,TV Show,The Real Queens of Hip-Hop: The Women Who Chan...,,,"October 19, 2021",2021,,1 Season,"Music, News"


# 3. Visualisations

In [54]:
# Calculate the count of movies for each duration on Netflix and add it as a new column 'netflix_count'.
netflix_movies.loc[:, 'netflix_count'] = netflix_movies[netflix_movies['type'] == 'Movie'].groupby('duration')['type'].transform('size')

# Convert the 'duration' column to numeric values (in minutes) if it's not already in string format.
netflix_movies['duration'] = netflix_movies['duration'].astype(str).str.extract('(\d+)').astype(float)

# Sort the Netflix movie DataFrame by duration.
netflix_movies = netflix_movies.sort_values(by='duration')

# Calculate the count of movies for each duration on Disney+ and add it as a new column 'disney_count'.
disney_movies.loc[:, 'disney_count'] = disney_movies[disney_movies['type'] == 'Movie'].groupby('duration')['type'].transform('size')

# Convert the 'duration' column to numeric values (in minutes) if it's not already in string format.
disney_movies['duration'] = disney_movies['duration'].astype(str).str.extract('(\d+)').astype(float)

# Sort the Disney+ movie DataFrame by duration.
disney_movies = disney_movies.sort_values(by='duration')

# Calculate the count of movies for each duration on Amazon Prime Video and add it as a new column 'amazon_count'.
amazon_movies.loc[:, 'amazon_count'] = amazon_movies[amazon_movies['type'] == 'Movie'].groupby('duration')['type'].transform('size')

# Convert the 'duration' column to numeric values (in minutes) if it's not already in string format.
amazon_movies['duration'] = amazon_movies['duration'].astype(str).str.extract('(\d+)').astype(float)

# Sort the Amazon Prime Video movie DataFrame by duration.
amazon_movies = amazon_movies.sort_values(by='duration')

# Calculate the count of movies for each duration on Hulu and add it as a new column 'hulu_count'.
hulu_movies.loc[:, 'hulu_count'] = hulu_movies[hulu_movies['type'] == 'Movie'].groupby('duration')['type'].transform('size')

# Convert the 'duration' column to numeric values (in minutes) if it's not already in string format.
hulu_movies['duration'] = hulu_movies['duration'].astype(str).str.extract('(\d+)').astype(float)

# Sort the Hulu movie DataFrame by duration.
hulu_movies = hulu_movies.sort_values(by='duration')

# Create a subplot figure for the line plots.
fig = make_subplots(rows=1, cols=1)

# Add a line plot for Netflix movie counts with the color red.
netflix_trace = go.Scatter(x=netflix_movies['duration'], y=netflix_movies['netflix_count'], name='Netflix', line=dict(color='red'))
fig.add_trace(netflix_trace)

# Add a line plot for Disney+ movie counts with the color blue.
disney_trace = go.Scatter(x=disney_movies['duration'], y=disney_movies['disney_count'], name='Disney', line=dict(color='blue'))
fig.add_trace(disney_trace)

# Add a line plot for Amazon Prime Video movie counts with the color yellow.
amazon_trace = go.Scatter(x=amazon_movies['duration'], y=amazon_movies['amazon_count'], name='Amazon', line=dict(color='orange'))
fig.add_trace(amazon_trace)

# Add a line plot for Hulu movie counts with the color green.
hulu_trace = go.Scatter(x=hulu_movies['duration'], y=hulu_movies['hulu_count'], name='Hulu', line=dict(color='green'))
fig.add_trace(hulu_trace)

# Update the layout of the graph with a title, axis labels, and legend positioning.
fig.update_layout(title='Duration of movies on different streaming services',
                  xaxis_title='Duration (in minutes)',
                  yaxis_title='Number of movies',
                  legend=dict(x=1, y=1, traceorder='normal'))

# Add a range slider to the x-axis for interactive zooming and limit it so it can't go below zero
fig.update_xaxes(rangeslider_visible=True, rangemode='tozero')

# Display the interactive graph.
fig.show()


In [53]:
# Calculate the count of TV shows for each duration on Netflix and add it as a new column 'netflix_count'.
netflix_tvshow.loc[:, 'netflix_count'] = netflix_tvshow[netflix_tvshow['type'] == 'TV Show'].groupby('duration')['type'].transform('size')

# Convert the 'duration' column to numeric values (in seasons) if it's not already in string format.
netflix_tvshow['duration'] = netflix_tvshow['duration'].astype(str).str.extract('(\d+)').astype(float)

# Sort the Netflix TV show DataFrame by duration.
netflix_tvshow = netflix_tvshow.sort_values(by='duration')

# Calculate the count of TV shows for each duration on Disney+ and add it as a new column 'disney_count'.
disney_tvshow.loc[:, 'disney_count'] = disney_tvshow[disney_tvshow['type'] == 'TV Show'].groupby('duration')['type'].transform('size')

# Convert the 'duration' column to numeric values (in seasons) if it's not already in string format.
disney_tvshow['duration'] = disney_tvshow['duration'].astype(str).str.extract('(\d+)').astype(float)

# Sort the Disney+ TV show DataFrame by duration.
disney_tvshow = disney_tvshow.sort_values(by='duration')

# Calculate the count of TV shows for each duration on Amazon Prime Video and add it as a new column 'amazon_count'.
amazon_tvshow.loc[:, 'amazon_count'] = amazon_tvshow[amazon_tvshow['type'] == 'TV Show'].groupby('duration')['type'].transform('size')

# Convert the 'duration' column to numeric values (in seasons) if it's not already in string format.
amazon_tvshow['duration'] = amazon_tvshow['duration'].astype(str).str.extract('(\d+)').astype(float)

# Sort the Amazon Prime Video TV show DataFrame by duration.
amazon_tvshow = amazon_tvshow.sort_values(by='duration')

# Calculate the count of TV shows for each duration on Hulu and add it as a new column 'hulu_count'.
hulu_tvshow.loc[:, 'hulu_count'] = hulu_tvshow[hulu_tvshow['type'] == 'TV Show'].groupby('duration')['type'].transform('size')

# Convert the 'duration' column to numeric values (in seasons) if it's not already in string format.
hulu_tvshow['duration'] = hulu_tvshow['duration'].astype(str).str.extract('(\d+)').astype(float)

# Sort the Hulu TV show DataFrame by duration.
hulu_tvshow = hulu_tvshow.sort_values(by='duration')

# Create a subplot figure for the line plots.
fig = make_subplots(rows=1, cols=1)

# Add a line plot for Netflix TV show counts with the color red.
netflix_trace = go.Scatter(x=netflix_tvshow['duration'], y=netflix_tvshow['netflix_count'], name='Netflix', line=dict(color='red'))
fig.add_trace(netflix_trace)

# Add a line plot for Disney+ TV show counts with the color blue.
disney_trace = go.Scatter(x=disney_tvshow['duration'], y=disney_tvshow['disney_count'], name='Disney', line=dict(color='blue'))
fig.add_trace(disney_trace)

# Add a line plot for Amazon Prime Video TV show counts with the color yellow.
amazon_trace = go.Scatter(x=amazon_tvshow['duration'], y=amazon_tvshow['amazon_count'], name='Amazon', line=dict(color='orange'))
fig.add_trace(amazon_trace)

# Add a line plot for Hulu TV show counts with the color green.
hulu_trace = go.Scatter(x=hulu_tvshow['duration'], y=hulu_tvshow['hulu_count'], name='Hulu', line=dict(color='green'))
fig.add_trace(hulu_trace)

# Update the layout of the graph with a title, axis labels, and legend positioning.
fig.update_layout(title='Duration of TV Shows on different streaming services',
                  xaxis_title='Duration (in seasons)',
                  yaxis_title='Number of series',
                  legend=dict(x=1, y=1, traceorder='normal'))

# Add a range slider to the x-axis for interactive zooming and limit it so it can't go below zero
fig.update_xaxes(rangeslider_visible=True, rangemode='tozero')

# Display the interactive graph.
fig.show()


In [55]:
# Calculate the top 5 directors with the most content on Netflix.
director_count_netflix = netflix['director'].value_counts().head(5)

# Calculate the top 5 directors with the most content on Disney+.
director_count_disney = disney['director'].value_counts().head(5)

# Calculate the top 5 directors with the most content on Amazon Prime Video.
director_count_amazon = amazon['director'].value_counts().head(5)

# Assign colors to represent each streaming service.
color_netflix = 'red'
color_disney = 'blue'
color_amazon = 'orange'

# Create trace objects for the bar chart for each streaming service with assigned colors.
trace_netflix = go.Bar(x=director_count_netflix.index, y=director_count_netflix.values, name='Netflix',
                       marker=dict(color=color_netflix))
trace_disney = go.Bar(x=director_count_disney.index, y=director_count_disney.values, name='Disney+',
                      marker=dict(color=color_disney))
trace_amazon = go.Bar(x=director_count_amazon.index, y=director_count_amazon.values, name='Amazon Prime Video',
                      marker=dict(color=color_amazon))

# Create the figure with the trace objects.
fig = go.Figure(data=[trace_netflix, trace_disney, trace_amazon])

# Update the layout of the graph with a title, axis labels, and grouping bars together.
fig.update_layout(
    title='Top 5 directors on different streaming services',
    xaxis=dict(title='Director'),
    yaxis=dict(title='Number of Movies / TV Shows Directed'),
    barmode='group'
)

# Show the graph.
fig.show()