In [1]:
import pandas as pd

def clean_column_names(df):
    df.columns = (
        df.columns.str.lower()
                  .str.replace(' ', '_')
                  .str.replace(r'^st$', 'state', regex=True)
    )
    return df

def clean_gender(df):
    df['gender'] = df['gender'].str.strip().str.upper().replace({
        'FEMALE': 'F', 'FEMAL': 'F', 'F': 'F',
        'MALE': 'M', 'M': 'M'
    })
    return df

def replace_state_names(df):
    state_mapping = {
        'AZ': 'Arizona',
        'Cali': 'California',
        'WA': 'Washington'
    }
    df['state'] = df['state'].replace(state_mapping)
    return df

def fix_customer_lifetime_value(df):
    df['customer_lifetime_value'] = (
        df['customer_lifetime_value']
        .str.replace('%', '', regex=False)
        .astype(float)
    )
    df['customer_lifetime_value'] = pd.to_numeric(df['customer_lifetime_value'], errors='coerce')
    return df

def standardize_education(df):
    df['education'] = df['education'].replace('Bachelors', 'Bachelor')
    return df

def standardize_vehicle_class(df):
    df['vehicle_class'] = df['vehicle_class'].replace({
        'Sport Car': 'Luxury',
        'Luxury SUV': 'Luxury',
        'Luxury Car': 'Luxury'
    })
    return df

def extract_middle_number(val):
    try:
        if isinstance(val, str) and '/' in val:
            parts = val.split('/')
            if len(parts) >= 2 and parts[1].isdigit():
                return int(parts[1])
        return int(val)
    except:
        return pd.NA

def clean_open_complaints(df):
    df['number_of_open_complaints'] = df['number_of_open_complaints'].apply(extract_middle_number).astype('Int64')
    return df

def convert_numeric_columns(df):
    df['income'] = df['income'].astype(float)
    df['monthly_premium_auto'] = df['monthly_premium_auto'].astype('Int64')
    df['total_claim_amount'] = df['total_claim_amount'].astype(float)
    return df

def handle_nulls(df):
    # Fill numeric nulls with median
    num_cols = df.select_dtypes(include=['float64', 'Int64', 'int']).columns
    for col in num_cols:
        df[col] = df[col].fillna(df[col].median())
    
    # Fill categorical nulls with mode
    cat_cols = df.select_dtypes(include=['object', 'category']).columns
    for col in cat_cols:
        df[col] = df[col].fillna(df[col].mode()[0])
    
    return df

def remove_duplicates(df):
    df = df.drop_duplicates(keep='first').reset_index(drop=True)
    return df

def main_cleaning_process(df):
    df = clean_column_names(df)
    df = clean_gender(df)
    df = replace_state_names(df)
    df = fix_customer_lifetime_value(df)
    df = standardize_education(df)
    df = standardize_vehicle_class(df)
    df = clean_open_complaints(df)
    df = convert_numeric_columns(df)
    df = handle_nulls(df)
    df = remove_duplicates(df)
    return df
