In [6]:
import pandas as pd
import numpy as np

# Load the dataset from the CSV file
nfl_data= pd.read_csv("netflix.csv")


In [7]:
nfl_data.sample(5)

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
6587,s73,TV Show,Jack Whitehall: Travels with My Father,Not Given,United Kingdom,9/14/2021,2021,TV-MA,5 Seasons,"British TV Shows, Docuseries, International TV..."
6544,s8807,Movie,Zubaan,Mozez Singh,India,3/2/2019,2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals"
8246,s5708,TV Show,Gilmore Girls: A Year in the Life,Not Given,United States,11/25/2016,2016,TV-14,1 Season,"TV Comedies, TV Dramas, Teen TV Shows"
507,s5260,TV Show,Satu Hari,Not Given,Pakistan,9/29/2017,2014,TV-PG,1 Season,"International TV Shows, TV Dramas"
5876,s8007,Movie,Shopkins: Wild,"Adele K. Thomas, Richard Bailey",Australia,11/15/2018,2018,TV-Y,75 min,Movies


In [8]:
# get the number of missing data points per column
missing_values_count = nfl_data.isnull().sum()

# look at the # of missing points in the first ten columns
missing_values_count[0:10]

show_id         0
type            0
title           0
director        0
country         0
date_added      0
release_year    0
rating          0
duration        0
listed_in       0
dtype: int64

In [9]:
# how many total missing values do we have?
total_cells = np.product(nfl_data.shape)
total_missing = missing_values_count.sum()

# percent of data that is missing
(total_missing/total_cells) * 100

0.0

In [10]:
# remove all the rows that contain a missing value
nfl_data.dropna()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,9/25/2021,2020,PG-13,90 min,Documentaries
1,s3,TV Show,Ganglands,Julien Leclercq,France,9/24/2021,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
2,s6,TV Show,Midnight Mass,Mike Flanagan,United States,9/24/2021,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries"
3,s14,Movie,Confessions of an Invisible Girl,Bruno Garotti,Brazil,9/22/2021,2021,TV-PG,91 min,"Children & Family Movies, Comedies"
4,s8,Movie,Sankofa,Haile Gerima,United States,9/24/2021,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies"
...,...,...,...,...,...,...,...,...,...,...
8785,s8797,TV Show,Yunus Emre,Not Given,Turkey,1/17/2017,2016,TV-PG,2 Seasons,"International TV Shows, TV Dramas"
8786,s8798,TV Show,Zak Storm,Not Given,United States,9/13/2018,2016,TV-Y7,3 Seasons,Kids' TV
8787,s8801,TV Show,Zindagi Gulzar Hai,Not Given,Pakistan,12/15/2016,2012,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ..."
8788,s8784,TV Show,Yoko,Not Given,Pakistan,6/23/2018,2016,TV-Y,1 Season,Kids' TV


In [11]:
# remove all columns with at least one missing value
columns_with_na_dropped = nfl_data.dropna(axis=1)
columns_with_na_dropped.head()

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,9/25/2021,2020,PG-13,90 min,Documentaries
1,s3,TV Show,Ganglands,Julien Leclercq,France,9/24/2021,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
2,s6,TV Show,Midnight Mass,Mike Flanagan,United States,9/24/2021,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries"
3,s14,Movie,Confessions of an Invisible Girl,Bruno Garotti,Brazil,9/22/2021,2021,TV-PG,91 min,"Children & Family Movies, Comedies"
4,s8,Movie,Sankofa,Haile Gerima,United States,9/24/2021,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies"


In [12]:
# just how much data did we lose?
print("Columns in original dataset: %d \n" % nfl_data.shape[1])
print("Columns with na's dropped: %d" % columns_with_na_dropped.shape[1])

Columns in original dataset: 10 

Columns with na's dropped: 10


In [15]:
# get a small subset of the NFL dataset
subset_nfl_data = nfl_data.loc[:, 'type':'title'].head()
subset_nfl_data

Unnamed: 0,type,title
0,Movie,Dick Johnson Is Dead
1,TV Show,Ganglands
2,TV Show,Midnight Mass
3,Movie,Confessions of an Invisible Girl
4,Movie,Sankofa


In [16]:
# replace all NA's with 0
subset_nfl_data.fillna(0)

Unnamed: 0,type,title
0,Movie,Dick Johnson Is Dead
1,TV Show,Ganglands
2,TV Show,Midnight Mass
3,Movie,Confessions of an Invisible Girl
4,Movie,Sankofa


In [17]:
# replace all NA's the value that comes directly after it in the same column, 
# then replace all the reamining na's with 0
subset_nfl_data.fillna(method = 'bfill', axis=0).fillna(0)

Unnamed: 0,type,title
0,Movie,Dick Johnson Is Dead
1,TV Show,Ganglands
2,TV Show,Midnight Mass
3,Movie,Confessions of an Invisible Girl
4,Movie,Sankofa
