# Netflix data trends using plotly

Source = https://www.kaggle.com/shivamb/netflix-shows

Description = This dataset consists of tv shows and movies available on Netflix as of 2019. The dataset is collected from Flixable which is a third-party Netflix search engine. 

In [134]:
import pandas as pd
import numpy as np
import os
from collections import Counter
import ipywidgets

import plotly.graph_objs as go
import plotly.express as px

In [46]:
filename = [i for i in os.listdir() if i.find('netflix') == 0]
        
netflix_df = pd.read_csv(filename[0])

In [140]:
## Clean data
def convert_to_list(x):
    if isinstance(x, list):
        return x
    elif not isinstance(x, str):
        return []
    else:
        return [i.strip() for i in x.split(',')]
    
netflix_df['listed_in'] = netflix_df['listed_in'].apply(convert_to_list)
netflix_df['country'] = netflix_df['country'].apply(convert_to_list)
netflix_df['cast'] = netflix_df['cast'].apply(convert_to_list)
netflix_df['date_added'] = netflix_df['date_added'].apply(lambda x: pd.to_datetime(x))

## Analysis of most popular category (listed_id)
category_count = Counter(pd.Series([item for sublist in netflix_df.listed_in for item in sublist])).most_common()
category_df = pd.DataFrame({"type": list(dict(a).keys()), 
                            "count": list(dict(a).values())})
  
fig = px.treemap(category_df, 
                path=['type'], 
                values='count',
                title = 'Treemap of Type')
fig.update_traces(hovertemplate=None)
fig.update_layout(hovermode="x")
fig.show()

fig = px.bar(category_df, 
             x='type', y='count',
             title = 'Count of Categories',
             labels = {'x': 'category',
                       'y': 'count'})
fig.update_traces(hovertemplate=None)
fig.update_layout(hovermode="x")
fig.show()



## release_year 

## Average movie length

In [142]:
## Movie vs TV show by year
def response(change):
    if rel_year.value == 'ALL':
        with fig.batch_update():
            fig.data[0].x = list(netflix_df['type'])
    else:
        filter_df = netflix_df[(netflix_df.release_year == rel_year.value)]
        with fig.batch_update():
            fig.data[0].x = list(filter_df['type'])

rel_year = ipywidgets.Dropdown(
                            options=['ALL'] + sorted(list(netflix_df['release_year'].unique()), reverse=True),
                            value='ALL'
                        )
rel_description = ipywidgets.HTML('Release Year: ')

rel_year.observe(response, names="value")

trace = go.Histogram(x=netflix_df['type'], 
                    name='Type')

fig = go.FigureWidget(data=[trace],
                    layout=go.Layout(
                        title='Media Type'
                    ))

fig.update_traces(hovertemplate=None)
fig.update_layout(hovermode="x")

container = ipywidgets.HBox([rel_description, rel_year])
ipywidgets.VBox([container,
                fig])

VBox(children=(HBox(children=(HTML(value='Release Year: '), Dropdown(options=('ALL', 2020, 2019, 2018, 2017, 2…