In [1]:
import pandas as pd
import numpy as np
from io import StringIO

data = """customer_id,name,email,age,city
1,John Doe,john.doe@example.com,28,New York
2,Jane Smith,janesmithexample.com,34,Los Angeles
3,Bob Lee,bob.lee@example.com,0,Chicago
4,Alice Wong,alice.wong@example.com,27,San Francisco
5,John Doe,john.doe@example.com,28,New York
6,NaN,susan.park@,45,Miami
7,Tom Hill,tom.hill@example.com,,Seattle
8,Emily Stone,emily.stone@example.com,22,NaN
9,Chris King,,30,Boston
10,Mary Clark,mary.clark@example.com,NaN,Denver
"""

df = pd.read_csv(StringIO(data))

df["name"].fillna("Unknown", inplace=True)
df["email"].fillna("Unknown", inplace=True)
df["city"].fillna("Unknown", inplace=True)

df["email"] = df["email"].apply(lambda x: x if "@" in str(x) and "." in str(x) else "Invalid Email")

df["age"] = pd.to_numeric(df["age"], errors="coerce")
df.loc[df["age"] <= 0, "age"] = np.nan
df["age"].fillna(df["age"].median(), inplace=True)

df.drop_duplicates(inplace=True)

df["city"] = df["city"].str.title().str.strip()

df["customer_id"] = df["customer_id"].astype(int)

print(df)

output_path = "customer_data_cleaned.xlsx"
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
    df.to_excel(writer, sheet_name="Cleaned Data", index=False)

print(f"Cleaned data saved to {output_path}")


   customer_id         name                    email   age           city
0            1     John Doe     john.doe@example.com  28.0       New York
1            2   Jane Smith            Invalid Email  34.0    Los Angeles
2            3      Bob Lee      bob.lee@example.com  28.0        Chicago
3            4   Alice Wong   alice.wong@example.com  27.0  San Francisco
4            5     John Doe     john.doe@example.com  28.0       New York
5            6      Unknown              susan.park@  45.0          Miami
6            7     Tom Hill     tom.hill@example.com  28.0        Seattle
7            8  Emily Stone  emily.stone@example.com  22.0        Unknown
8            9   Chris King            Invalid Email  30.0         Boston
9           10   Mary Clark   mary.clark@example.com  28.0         Denver
Cleaned data saved to customer_data_cleaned.xlsx
