# 1. Importing Libraries

In [1]:
import pandas as pd 

# 2. Data Loading

In [2]:
df = pd.read_csv('netflix1.csv')
df

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,9/25/2021,2020,PG-13,90 min,Documentaries
1,s3,TV Show,Ganglands,Julien Leclercq,France,9/24/2021,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
2,s6,TV Show,Midnight Mass,Mike Flanagan,United States,9/24/2021,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries"
3,s14,Movie,Confessions of an Invisible Girl,Bruno Garotti,Brazil,9/22/2021,2021,TV-PG,91 min,"Children & Family Movies, Comedies"
4,s8,Movie,Sankofa,Haile Gerima,United States,9/24/2021,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies"
...,...,...,...,...,...,...,...,...,...,...
8785,s8797,TV Show,Yunus Emre,Not Given,Turkey,1/17/2017,2016,TV-PG,2 Seasons,"International TV Shows, TV Dramas"
8786,s8798,TV Show,Zak Storm,Not Given,United States,9/13/2018,2016,TV-Y7,3 Seasons,Kids' TV
8787,s8801,TV Show,Zindagi Gulzar Hai,Not Given,Pakistan,12/15/2016,2012,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ..."
8788,s8784,TV Show,Yoko,Not Given,Pakistan,6/23/2018,2016,TV-Y,1 Season,Kids' TV


# 3. Exploring Data

In [3]:
df.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,9/25/2021,2020,PG-13,90 min,Documentaries
1,s3,TV Show,Ganglands,Julien Leclercq,France,9/24/2021,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
2,s6,TV Show,Midnight Mass,Mike Flanagan,United States,9/24/2021,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries"
3,s14,Movie,Confessions of an Invisible Girl,Bruno Garotti,Brazil,9/22/2021,2021,TV-PG,91 min,"Children & Family Movies, Comedies"
4,s8,Movie,Sankofa,Haile Gerima,United States,9/24/2021,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies"


In [4]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in'],
      dtype='object')

In [5]:
df.dtypes

show_id         object
type            object
title           object
director        object
country         object
date_added      object
release_year     int64
rating          object
duration        object
listed_in       object
dtype: object

In [8]:
df.describe() 

Unnamed: 0,release_year
count,8790.0
mean,2014.183163
std,8.825466
min,1925.0
25%,2013.0
50%,2017.0
75%,2019.0
max,2021.0


In [9]:
df.shape

(8790, 10)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8790 entries, 0 to 8789
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8790 non-null   object
 1   type          8790 non-null   object
 2   title         8790 non-null   object
 3   director      8790 non-null   object
 4   country       8790 non-null   object
 5   date_added    8790 non-null   object
 6   release_year  8790 non-null   int64 
 7   rating        8790 non-null   object
 8   duration      8790 non-null   object
 9   listed_in     8790 non-null   object
dtypes: int64(1), object(9)
memory usage: 686.8+ KB


# 4. Data Cleaning

In [11]:
df.isnull().sum()

show_id         0
type            0
title           0
director        0
country         0
date_added      0
release_year    0
rating          0
duration        0
listed_in       0
dtype: int64

In [12]:
df.duplicated().sum()

np.int64(0)

# Total Content on Netflix

In [13]:
total_content = df.shape[0]
total_content

8790

#  Top 10 countries with Netflix Content

In [15]:
top_countries = df['country'].value_counts().head(10)

top_countries.reset_index()

Unnamed: 0,country,count
0,United States,3240
1,India,1057
2,United Kingdom,638
3,Pakistan,421
4,Not Given,287
5,Canada,271
6,Japan,259
7,South Korea,214
8,France,213
9,Spain,182


# Monthly Trend of Movie and TV Show Releases

In [16]:
df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')

df['Month'] = df['date_added'].dt.to_period('M')
monthly_trend = df.groupby(['Month', 'type']).size().reset_index(name='Total Releases')
monthly_trend

Unnamed: 0,Month,type,Total Releases
0,2008-01,Movie,1
1,2008-02,TV Show,1
2,2009-05,Movie,1
3,2009-11,Movie,1
4,2010-11,Movie,1
...,...,...,...
188,2021-07,TV Show,88
189,2021-08,Movie,117
190,2021-08,TV Show,61
191,2021-09,Movie,118


#  Yearly Trend of Movie and TV Show Releases

In [17]:
df['Year'] = df['date_added'].dt.year

yearly_trend = df.groupby(['Year', 'type']).size().reset_index(name='Total Releases')
yearly_trend


Unnamed: 0,Year,type,Total Releases
0,2008,Movie,1
1,2008,TV Show,1
2,2009,Movie,2
3,2010,Movie,1
4,2011,Movie,13
5,2012,Movie,3
6,2013,Movie,6
7,2013,TV Show,5
8,2014,Movie,19
9,2014,TV Show,5


# Top 10 Movies

In [19]:

movies_df = df[df['type'] == 'Movie']
top_10_movies = movies_df.sort_values(by='release_year', ascending=False).head(10)

top_10_movies[['title', 'director', 'country', 'release_year', 'duration']].reset_index()


Unnamed: 0,index,title,director,country,release_year,duration
0,6906,Creating The Queen's Gambit,Not Given,United States,2021,14 min
1,6910,Stuck Apart,Not Given,Turkey,2021,97 min
2,1135,Pagglait,Umesh Bist,India,2021,114 min
3,6922,The Minimalists: Less Is Now,Not Given,United States,2021,54 min
4,14,Bling Empire - The Afterparty,"Krysia Plonka, Kristian Mercado",United States,2021,36 min
5,15,Cobra Kai - The Afterparty,"Krysia Plonka, Kristian Mercado",United States,2021,34 min
6,16,The Circle - The Afterparty,"Krysia Plonka, Kristian Mercado",United States,2021,35 min
7,23,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha",Not Given,2021,91 min
8,1125,7 Yards: The Chris Norton Story,Jonathon Link,United States,2021,93 min
9,1126,Octonauts & the Ring of Fire,Blair Simmons,China,2021,73 min


# Top 10 TV Show

In [20]:

tv_shows_df = df[df['type'] == 'TV Show']


top_10_tv_shows = tv_shows_df.sort_values(by='release_year', ascending=False).head(10)


top_10_tv_shows[['title', 'director', 'country', 'release_year', 'duration']].reset_index()



Unnamed: 0,index,title,director,country,release_year,duration
0,1,Ganglands,Julien Leclercq,France,2021,1 Season
1,2,Midnight Mass,Mike Flanagan,United States,2021,1 Season
2,5,The Great British Baking Show,Andy Devonshire,United Kingdom,2021,9 Seasons
3,73,Nailed It,Not Given,United States,2021,6 Seasons
4,72,I Heart Arlo,Not Given,Pakistan,2021,1 Season
5,17,Jailbirds New Orleans,Not Given,Pakistan,2021,1 Season
6,50,The Ingenuity of the Househusband,Not Given,Pakistan,2021,1 Season
7,49,Hometown Cha-Cha-Cha,Not Given,Pakistan,2021,1 Season
8,48,Turning Point: 9/11 and the War on Terror,Not Given,Pakistan,2021,1 Season
9,18,Crime Stories: India Detectives,Not Given,Pakistan,2021,1 Season


#  Top_10_Directors

In [22]:

top_10_directors = df['director'].value_counts().head(10)


top_10_directors.reset_index()


Unnamed: 0,director,count
0,Not Given,2588
1,Rajiv Chilaka,20
2,Alastair Fothergill,18
3,"Raúl Campos, Jan Suter",18
4,Marcus Raboy,16
5,Suhas Kadav,16
6,Jay Karas,14
7,Cathy Garcia-Molina,13
8,Jay Chapman,12
9,Martin Scorsese,12
