In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("netflix_titles.csv")

# 1. Check for missing values
print("Missing values:\n", df.isnull().sum())

# 2. Drop rows with missing titles or types (essential fields)
df = df.dropna(subset=['title', 'type'])

# 3. Fill missing 'country', 'director' with placeholder
df['country'] = df['country'].fillna('Unknown')
df['director'] = df['director'].fillna('No Director')

# 4. Remove duplicate entries
df = df.drop_duplicates()

# 5. Standardize text values
df['type'] = df['type'].str.strip().str.lower()  # movie, tv show
df['country'] = df['country'].str.strip()

# 6. Convert 'date_added' to datetime
df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')

# 7. Rename columns to clean format
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

# 8. Check and fix data types
print(df.dtypes)

# 9. Save the cleaned dataset
df.to_csv("netflix_cleaned.csv", index=False)

print("Dataset cleaned and saved as 'netflix_cleaned.csv'")

Missing values:
 show_id            0
type               0
title              0
director        2634
cast             825
country          831
date_added        10
release_year       0
rating             4
duration           3
listed_in          0
description        0
dtype: int64
show_id                 object
type                    object
title                   object
director                object
cast                    object
country                 object
date_added      datetime64[ns]
release_year             int64
rating                  object
duration                object
listed_in               object
description             object
dtype: object
Dataset cleaned and saved as 'netflix_cleaned.csv'
