In [293]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
import datetime
import numpy as np

In [317]:
dataframe = pd.read_csv('data.csv')
dataframe

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...
...,...,...,...,...,...,...,...,...,...,...,...,...
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...","October 19, 2020",2005,TV-MA,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,"March 2, 2019",2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
7784,s7785,Movie,Zulu Man in Japan,,Nasty C,,"September 25, 2020",2019,TV-MA,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."
7785,s7786,TV Show,Zumbo's Just Desserts,,"Adriano Zumbo, Rachel Khoo",Australia,"October 31, 2020",2019,TV-PG,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...


### Preprocessing steps

Before applying different data visualization methods, some cleanup is necessary.

Because some movie titles are available in multiple countries and are listed as well, in multiple Netflix genres (listed_in column), by splitting these columns and adding new entries, we will have a better overview on the data. After this operation, there might be some residual initial white spaces, such as ' India' instead of 'India', therefore this case must be handled as well.
    

In [314]:
by_genre = dataframe[['title', 'country', 'date_added',
                        'release_year', 'listed_in']].copy()

by_genre = by_genre.sort_values(by=['country'])
by_genre = by_genre[by_genre['country'].notna()]
by_genre['country'] = by_genre['country'].apply(d)
by_genre = by_genre.explode('country')

by_genre['listed_in'] = by_genre['listed_in'].apply(
    lambda x: str(x).split(","))

by_genre = by_genre.explode('listed_in')
by_genre

Unnamed: 0,title,country,date_added,release_year,listed_in
4746,Palermo Hollywood,Argentina,"October 8, 2020",2004,Dramas
4746,Palermo Hollywood,Argentina,"October 8, 2020",2004,International Movies
4495,No Time for Shame,Argentina,"November 19, 2019",2019,International TV Shows
4495,No Time for Shame,Argentina,"November 19, 2019",2019,Reality TV
4495,No Time for Shame,Argentina,"November 19, 2019",2019,Spanish-Language TV Shows
...,...,...,...,...,...
2757,Hitler - A Career,West Germany,"February 10, 2017",1977,Documentaries
2757,Hitler - A Career,West Germany,"February 10, 2017",1977,International Movies
1493,Cook Off,Zimbabwe,"June 1, 2020",2017,Comedies
1493,Cook Off,Zimbabwe,"June 1, 2020",2017,International Movies


In [315]:
by_genre['country'] = by_genre['country'].astype(str)
for index, row in by_genre.iterrows():
    if(str(row['country']) != '' and str(row['country'])[0] == ' '):
        strr = row['country'][1:]
        row['country'] = strr
        by_genre.at[index, 'country'] = strr


In the next block of code, we will obtain a general overview of the movies and TV shows genres count, without taking into account their release date.



In [316]:
new_dataframe = pd.DataFrame(columns=[ 'listed_in','count'])
unique_countries = by_genre['country'].unique()
unique_by_genre = by_genre['listed_in'].unique()

for item in unique_by_genre:
    count = by_genre[by_genre['listed_in'] == item]['title'].count()    
    new_dataframe = new_dataframe.append({'listed_in': item, 'count': count}, ignore_index = True)
    
new_dataframe = new_dataframe.sort_values(by=['count'], ascending=False)
new_dataframe

Unnamed: 0,listed_in,count
1,International Movies,2906
0,Dramas,1840
5,Comedies,1187
14,Action & Adventure,977
16,Documentaries,945
...,...,...
54,Romantic Movies,3
72,Spanish-Language TV Shows,3
57,TV Sci-Fi & Fantasy,1
56,LGBTQ Movies,1


In the next pie chart we have the percentage of each genre, no matter the year of release, the most frequent ones being 'International Movies',  'Dramas', Comedies' and 'Action & Adventure' 

In [319]:
import plotly.express as px
fig = px.pie(new_dataframe, values='count', names='listed_in',
             title='Movie genre count',
             hover_data=['count'])
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [299]:
first_genres = np.array(new_dataframe.iloc[0:4]['listed_in'])
first_genres

array([' International Movies', 'Dramas', 'Comedies',
       'Action & Adventure'], dtype=object)

For each of the most common genres, we will now plot the world map of these insights, for each country

In [320]:
def plot_by_genre_and_by_country(df, genre):
    expanded_by_specific_genre = df[df['listed_in'] == genre]
    country_titles = pd.DataFrame(columns=['country','number_of_titles'])
    by_specific_genre = expanded_by_specific_genre.groupby(by=['country']).count()
    by_specific_genre = by_specific_genre.reset_index()
   
    country_titles['country'] = by_specific_genre['country']
    country_titles['number_of_titles'] = by_specific_genre['title']

    fig = px.choropleth(country_titles,
              locations="country",
              title=genre,
              color="number_of_titles",
              hover_name="country",
              hover_data={
                  "country": False,
                  "number_of_titles": True,
                
              },
              locationmode='country names',
              color_continuous_scale='reds',
              height=700)
    fig.show()
    country_titles = country_titles[0:0]

In [321]:
plot_by_genre_and_by_country(by_genre, first_genres[0])

In [322]:
plot_by_genre_and_by_country(expanded_by_genre, first_genres[1])

In [323]:
plot_by_genre_and_by_country(expanded_by_genre, first_genres[2])

In [324]:
plot_by_genre_and_by_country(expanded_by_genre, first_genres[3])