**Load in libraries**

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from textblob import TextBlob

**Import dataset**

In [2]:
df = pd.read_csv('netflix_titles.csv')

**Dataset information**

In [3]:
print(df.shape)
print(df.columns)

(8807, 12)
Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')


**Distribution of content ratings**

In [4]:
z = df.groupby(['rating']).size().reset_index(name = 'counts')
pieChart = px.pie(z, values = 'counts', names = 'rating',
                  title = 'Distribution of Content Ratings on Netflix',
                  color_discrete_sequence = px.colors.qualitative.Set3)
pieChart.show()

* The majority of Netflix content is categorized as 'TV-MA'

* Most of the content available is intended for adult audiences

**Top 5 Actors and Directors**

In [5]:
df['director'] = df['director'].fillna('No Director Specified')

filtered_directors = pd.DataFrame()
filtered_directors = df['director'].str.split(',', expand = True).stack()
filtered_directors = filtered_directors.to_frame()
filtered_directors.columns = ['Director']

directors = filtered_directors.groupby(['Director']).size().reset_index(name = 'Total Content')
directors = directors[directors.Director != 'No Director Specified']
directors = directors.sort_values(by = ['Total Content'], ascending = False)

directorsTop5 = directors.head()
directorsTop5 = directorsTop5.sort_values(by = ['Total Content'])

fig1 = px.bar(directorsTop5, x='Total Content', y='Director', title='Top 5 Directors on Netflix')
fig1.show()

The Top 5 Listed Directors are:
1.   Rhajiv Chilaka (Count = 22)
2.   Jan Suter (Count = 18)
3.   Raul Campos (Count = 18)
4.   Macus Raboy (Count = 16)
5.   Suhas Kadav (Count = 16)



In [6]:
df['cast'] = df['cast'].fillna('No Cast Specified')

filtered_cast = pd.DataFrame()
filtered_cast = df['cast'].str.split(',',expand = True).stack()
filtered_cast = filtered_cast.to_frame()
filtered_cast.columns = ['Actor']

actors = filtered_cast.groupby(['Actor']).size().reset_index(name = 'Total Content')
actors = actors[actors.Actor != 'No Cast Specified']
actors = actors.sort_values(by = ['Total Content'], ascending = False)

actorsTop5 = actors.head()
actorsTop5 = actorsTop5.sort_values(by = ['Total Content'])

fig2 = px.bar(actorsTop5, x='Total Content', y='Actor', title='Top 5 Actors on Netflix')
fig2.show()

The Top 5 Listed Actors are:
1.   Anupam Kher (Count = 39)
2.   Rupa Bhimani (Count = 31)
3.   Takahiro Sakurai (Count = 30)
4.   Julie Tejwani (Count = 28)
5.   Om Puri (Count = 27)



**Analyzing Netflix Content**

Production Trend

In [7]:
df1 = df[['type','release_year']]
df1 = df1.rename(columns = {'release_year': 'Release Year'})

df2 = df1.groupby(['Release Year', 'type']).size().reset_index(name = 'Total Content')
df2 = df2 [df2['Release Year']>= 2010]

fig3 = px.line(df2, x = 'Release Year', y = 'Total Content', color = 'type', title = 'Trend of Content Produced on Netflix over Time')
fig3.show()

There has been a decline in production of Movies since 2018 and TV shows since 2020

Netflix Content Review

In [8]:
df3 = df[['release_year', 'description']]
df3 = df3.rename(columns = {'release_year': 'Release Year'})

for index, row in df3.iterrows():
  z = row['description']
  testimonial = TextBlob(z)
  p = testimonial.sentiment.polarity

  if p==0:
    sent = 'Neutral'
  elif p>0:
    sent = 'Positive'
  else:
    sent = 'Negative'

  df3.loc[[index,2], 'Review'] = sent

In [9]:
df3 = df3.groupby(['Release Year', 'Review']).size().reset_index(name = 'Total Content')
df3 = df3[df3['Release Year']>= 2010]

fig4 = px.bar(df3, x = 'Release Year', y = 'Total Content', color = 'Review', title = 'Netflix Content Review')
fig4.show()

The content on Netflix overall received more positive reviews than neutral or negative reviews combined.