In [1]:
import pandas as pd
import plotly.express as px
import ast

df = pd.read_csv('data/df_netflix_titles.csv')

In [2]:
df['listed_in'].value_counts()

listed_in
['dramas', ' international movies']                                     362
['documentaries']                                                       359
['stand-up comedy']                                                     334
['comedies', ' dramas', ' international movies']                        274
['dramas', ' independent movies', ' international movies']              252
                                                                       ... 
['classic & cult tv', ' tv horror', ' tv mysteries']                      1
['british tv shows', ' tv dramas', ' tv sci-fi & fantasy']                1
['action & adventure', ' faith & spirituality', ' sci-fi & fantasy']      1
['comedies', ' cult movies', ' sports movies']                            1
['cult movies', ' dramas', ' thrillers']                                  1
Name: count, Length: 514, dtype: int64

In [3]:
df_netflix_genre = df.copy(deep=True)
df_netflix_genre.dropna(subset='listed_in', inplace=True)
df_netflix_genre['listed_in'] = df_netflix_genre['listed_in'].apply(lambda x: str(x))
df_netflix_genre['listed_in'] = df_netflix_genre['listed_in'].apply(lambda x: x.split(','))
df_netflix_genre_exploded = df_netflix_genre.explode(column='listed_in')
df_netflix_genre_exploded['listed_in'] = df_netflix_genre_exploded['listed_in'].apply(lambda x: x.strip())
df_netflix_genre_exploded['listed_in'] = df_netflix_genre_exploded['listed_in'].apply(lambda x: x.replace('[','').replace(']','').replace("' ","").replace("'","").replace('" ','').replace('"',''))
df_netflix_genre_exploded['listed_in'].unique()
 

# df_netflix_genre_grouped = df_netflix_genre_exploded.groupby(['listed_in']).size()
# df_netflix_genre_grouped

array(['documentaries', 'international tv shows', 'tv dramas',
       'tv mysteries', 'crime tv shows', 'tv action & adventure',
       'docuseries', 'reality tv', 'romantic tv shows', 'tv comedies',
       'tv horror', 'children & family movies', 'dramas',
       'independent movies', 'international movies', 'british tv shows',
       'comedies', 'spanish-language tv shows', 'thrillers',
       'romantic movies', 'music & musicals', 'horror movies',
       'sci-fi & fantasy', 'tv thrillers', 'kidstv', 'action & adventure',
       'tv sci-fi & fantasy', 'classic movies', 'anime features',
       'sports movies', 'anime series', 'korean tv shows',
       'science & nature tv', 'teen tv shows', 'cult movies', 'tv shows',
       'faith & spirituality', 'lgbtq movies', 'stand-up comedy',
       'movies', 'stand-up comedy & talk shows', 'classic & cult tv'],
      dtype=object)

In [4]:
d = df_netflix_genre_exploded['listed_in'].unique()
d.sort()

In [5]:
df['listed_in'] = df['listed_in'].apply(ast.literal_eval)

In [6]:
def filter_genres(row, filter_values):
    # Check if any of the filter values are in the list of genres
    return any(item in filter_values for item in row)

In [7]:
# Example filter values
filter_values = 'documentaries'

# Apply the filter to the DataFrame
filtered_df = df[df['listed_in'].apply(lambda x: filter_genres(x, filter_values))]
filtered_df

Unnamed: 0,type,title,director,cast,country,date_added,release_year,duration,listed_in,description,title_slug,director_list,rating_imdb
0,movie,Dick Johnson Is Dead,Kirsten Johnson,,['United States'],"September 25, 2021",2020,90 min,[documentaries],"As her father nears the end of his life, filmm...",dick-johnson-is-dead,['kirsten johnson'],5.4
16,movie,Europe's Most Dangerous Man: Otto Skorzeny in ...,"Pedro de Echave García, Pablo Azorín Williams",,[''],"September 22, 2021",2020,67 min,"[documentaries, international movies]",Declassified documents reveal the post-WWII li...,europe-s-most-dangerous-man-otto-skorzeny-in-s...,"['pedro de echave garcía', ' pablo azorín will...",3.7
45,movie,My Heroes Were Cowboys,Tyler Greco,,[''],"September 16, 2021",2021,23 min,[documentaries],Robin Wiltshire's painful childhood was rescue...,my-heroes-were-cowboys,['tyler greco'],6.0
68,movie,Schumacher,"Hanns-Bruno Kammertöns, Vanessa Nöcker, Michae...",Michael Schumacher,[''],"September 15, 2021",2021,113 min,"[documentaries, international movies, sports...",Through exclusive interviews and archival foot...,schumacher,"['hanns-bruno kammertöns', ' vanessa nöcker', ...",5.8
88,movie,Blood Brothers: Malcolm X & Muhammad Ali,Marcus Clarke,"Malcolm X, Muhammad Ali",[''],"September 9, 2021",2021,96 min,"[documentaries, sports movies]","From a chance meeting to a tragic fallout, Mal...",blood-brothers-malcolm-x-muhammad-ali,['marcus clarke'],6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8739,movie,Why We Fight: The Battle of Russia,"Frank Capra, Anatole Litvak",,['United States'],"March 31, 2017",1943,82 min,[documentaries],This installment of Frank Capra's acclaimed do...,why-we-fight-the-battle-of-russia,"['frank capra', ' anatole litvak']",7.4
8744,movie,Williams,Morgan Matthews,,['United Kingdom'],"November 4, 2017",2017,105 min,"[documentaries, international movies, sports...",This inspiring documentary charts the birth of...,williams,['morgan matthews'],6.7
8746,movie,Winnie,Pascale Lamche,,"['France', ' Netherlands', ' South Africa', ' ...","February 26, 2018",2017,85 min,"[documentaries, international movies]",Winnie Mandela speaks about her extraordinary ...,winnie,['pascale lamche'],6.1
8756,movie,Woodstock,Barak Goodman,,['United States'],"August 13, 2019",2019,97 min,"[documentaries, music & musicals]",For the 50th anniversary of the legendary Wood...,woodstock,['barak goodman'],7.5


In [8]:
df_netflix_last_titles = df[df["release_year"] >= df['release_year'].max()-5]

fig = px.box(df_netflix_last_titles, 
             x='release_year', 
             y="rating_imdb", 
             color='type',
             width=700,
             title="Distribuição das notas no IMDB \nnos últimos 5 anos agrupadas por tipo",
             color_discrete_sequence=px.colors.qualitative.Dark24,
             labels={'release_year':'Ano de lançamento', 'rating_imdb':'Nota no IMDB', 'type':'Tipo'})
fig.show()

In [11]:
fig = px.histogram(df, 
                   x="rating_imdb", 
                   color='type',
                   labels={'rating_imdb':'Nota no IMDB', 'type':'Tipo'},
                   color_discrete_sequence=px.colors.qualitative.Dark24,
                   width=700,
                   nbins=20, 
                   barmode='stack',
                   title="Histograma da distribuição das notas no IMDB agrupadas por tipo")
fig.data = fig.data[::-1]
fig.layout.legend.traceorder = 'reversed'
fig.update_yaxes(title='')
fig.update_layout(bargap=0.1)
fig.show()