In [None]:
import pandas as pd
import os # Import the os module

# Load the dataset
df = pd.read_csv("/content/netflix_titles.csv")

# Display first few rows
print("Original Data:")
print(df.head())

# Check missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Drop rows with missing values
df.dropna(inplace=True)

# Remove duplicates
df.drop_duplicates(inplace=True)

# Rename column headers: lowercase with underscores
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

# Check and fix data types (e.g., convert date_added to datetime)
if 'date_added' in df.columns:
    df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')

# Display data types after cleaning
print("\nData Types After Cleaning:")
print(df.dtypes)

# Define the directory path
output_dir = "/mnt/data"
# Create the directory if it does not exist
os.makedirs(output_dir, exist_ok=True)

# Save the cleaned dataset
df.to_csv(f"{output_dir}/cleaned_netflix_titles.csv", index=False) # Use the variable for the directory path

print(f"\nCleaned dataset saved as '{output_dir}/cleaned_netflix_titles.csv'")

Original Data:
  show_id     type                  title         director  \
0      s1    Movie   Dick Johnson Is Dead  Kirsten Johnson   
1      s2  TV Show          Blood & Water              NaN   
2      s3  TV Show              Ganglands  Julien Leclercq   
3      s4  TV Show  Jailbirds New Orleans              NaN   
4      s5  TV Show           Kota Factory              NaN   

                                                cast        country  \
0                                                NaN  United States   
1  Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...   South Africa   
2  Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...            NaN   
3                                                NaN            NaN   
4  Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...          India   

           date_added  release_year rating   duration  \
0  September 25, 2021          2020  PG-13     90 min   
1  September 24, 2021          2021  TV-MA  2 Seasons   
2  September 2

In [None]:
from google.colab import files
files.download("/content/netflix_titles.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>