In [3]:
# Count missing
# Missing values occur when some data is not recorded (e.g., missing director names).
# We can either drop these rows (dropna) or fill them with default values (fillna).


import pandas as pd

# Step 2: Load Netflix dataset
df = pd.read_csv("netflix_titles.csv")  # Make sure this CSV file is in same folder

print(df.isnull().sum())

# Drop missing in specific column
df_cleaned = df.dropna(subset=['director'])

# Fill missing
df['cast'].fillna("Not Available", inplace=True)


show_id            0
type               0
title              0
director        2634
cast             825
country          831
date_added        10
release_year       0
rating             4
duration           3
listed_in          0
description        0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['cast'].fillna("Not Available", inplace=True)


In [5]:
# Check
# Duplicates are rows with repeated data that add no value.
# We remove them using drop_duplicates() to avoid biased analysis.

print(df.duplicated().sum())

# Remove duplicates
df = df.drop_duplicates()


0


In [7]:
# One-hot encoding on 'type' (Movie/TV Show)
# Encoding is used to convert categorical (text) data into numeric form.
# For example, 'type' column with 'Movie' or 'TV Show' can be encoded into binary columns using one-hot encoding.
# This is needed for machine learning models that only understand numbers.

encoded_df = pd.get_dummies(df, columns=['type'])
print(encoded_df.head())


  show_id                  title         director  \
0      s1   Dick Johnson Is Dead  Kirsten Johnson   
1      s2          Blood & Water              NaN   
2      s3              Ganglands  Julien Leclercq   
3      s4  Jailbirds New Orleans              NaN   
4      s5           Kota Factory              NaN   

                                                cast        country  \
0                                      Not Available  United States   
1  Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...   South Africa   
2  Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...            NaN   
3                                      Not Available            NaN   
4  Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...          India   

           date_added  release_year rating   duration  \
0  September 25, 2021          2020  PG-13     90 min   
1  September 24, 2021          2021  TV-MA  2 Seasons   
2  September 24, 2021          2021  TV-MA   1 Season   
3  September 24, 2021     