In [8]:
import pandas as pd

# Load the file
df = pd.read_csv("Sample - Superstore.csv", encoding='ISO-8859-1', sep=',')

# 1. Identify missing values
missing_values = df.isnull().sum()

# 2. Drop duplicates
df = df.drop_duplicates()

# 3. Standardize text fields (example: 'State' column)
df['State'] = df['State'].str.title().str.strip()

# 4. Convert date format (assuming 'Order Date' and 'Ship Date' are in the dataset)
df['Order Date'] = pd.to_datetime(df['Order Date'], errors='coerce')
df['Ship Date'] = pd.to_datetime(df['Ship Date'], errors='coerce')

# 5. Rename columns to lowercase with underscores
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

# 6. Check and fix data types (example: 'Postal Code' should be int or string)
if 'postal_code' in df.columns:
    df['postal_code'] = pd.to_numeric(df['postal_code'], errors='coerce')

# Save cleaned file
df.to_csv("cleaned_superstore.csv", index=False)