In [None]:
import pandas as pd
from scipy.stats import zscore

df = pd.read_csv("Book1.csv")

print("\nOriginal Dataset:")
print(df)

print("\nMissing Values (True = Missing):")
print(df.isnull())

print("\nMissing Count Per Column:")
print(df.isnull().sum())

print("\nNon-Missing (False = Missing):")
print(df.notnull())

if not df["Occupation"].mode().empty:
    df["Occupation"] = df["Occupation"].fillna(df["Occupation"].mode()[0])

df["SL"] = df["SL"].fillna(df["SL"].mean())

df_cleaned = df.dropna(thresh=6)

print("\nDataset After Filling Missing Occupation & SL:")
print(df)

def convert_sl(value):
    return "High" if value > 0.7 else "Low"

df["SL_Binary"] = df["SL"].apply(convert_sl)

print("\nSatisfaction Level Converted to Binary:")
print(df[["SL", "SL_Binary"]])

mapping = {"h": 2, "m": 1, "l": 0}
df["Purchase_History_Num"] = df["PH"].map(mapping)

print("\nPurchase History Mapped to Numbers:")
print(df[["PH", "Purchase_History_Num"]])

df["Income_Z"] = zscore(df["Income"])

outliers_z = df[df["Income_Z"].abs() > 3]
print("\nZ-score Outliers in Income:")
print(outliers_z[["Income", "Income_Z"]])

Q1 = df["Income"].quantile(0.25)
Q3 = df["Income"].quantile(0.75)
IQR = Q3 - Q1

lower = Q1 - 1.5 * IQR
upper = Q3 + 1.5 * IQR

outliers_iqr = df[(df["Income"] < lower) | (df["Income"] > upper)]

print("\nIQR Outliers in Income:")
print(outliers_iqr["Income"])

print("\nMissing Values in Years Before Fill:")
print(df["Years"].isnull().sum())

df["Years"] = df["Years"].fillna(df["Years"].median())

print("\nYears After Median Imputation:")
print(df["Years"])

print("\nFinal Cleaned Dataset:")
print(df)

df.to_csv("cleaned_output.csv", index=False)