In [5]:
# Pada script deployment
import joblib
import pandas as pd
import numpy as np

def prepare_features(data, categorical_cols, encoding_maps, global_mean):
    """
    Prepare features including engineered features
    """
    if isinstance(data, dict):
        data = pd.DataFrame([data])
        
    df = data.copy()
    
    # 1. Health Risk Score
    df['age_risk'] = (df['age'] - 18) / 10
    
    # BMI risk
    df['bmi_risk'] = 0
    df.loc[df['bmi'] < 18.5, 'bmi_risk'] = 1
    df.loc[(df['bmi'] >= 18.5) & (df['bmi'] < 25), 'bmi_risk'] = 0
    df.loc[(df['bmi'] >= 25) & (df['bmi'] < 30), 'bmi_risk'] = 1
    df.loc[df['bmi'] >= 30, 'bmi_risk'] = 2
    
    # Smoker risk
    df['smoker_risk'] = np.where(df['smoker'] == 'yes', 3, 0)
    
    # Total health risk score
    df['health_risk_score'] = df['age_risk'] + df['bmi_risk'] + df['smoker_risk']
    
    # 2. Feature Interactions
    df['smoker_binary'] = np.where(df['smoker'] == 'yes', 1, 0)
    df['age_smoker'] = df['age'] * df['smoker_binary']
    df['bmi_smoker'] = df['bmi'] * df['smoker_binary']
    df['bmi_age'] = df['bmi'] * df['age'] / 100
    
    # 3. BMI Categories
    df['bmi_category'] = 'normal'
    df.loc[df['bmi'] < 18.5, 'bmi_category'] = 'underweight'
    df.loc[(df['bmi'] >= 25) & (df['bmi'] < 30), 'bmi_category'] = 'overweight'
    df.loc[df['bmi'] >= 30, 'bmi_category'] = 'obese'
    
    # 4. Age Groups
    df['age_group'] = 'middle'
    df.loc[df['age'] < 35, 'age_group'] = 'young'
    df.loc[df['age'] >= 50, 'age_group'] = 'senior'
    
    # 5. Apply target encoding
    for col in categorical_cols:
        encoding_map = encoding_maps.get(col, {})
        df[f'{col}_encoded'] = df[col].map(encoding_map)
        
        # Handle unseen categories
        if df[f'{col}_encoded'].isna().any():
            df[f'{col}_encoded'].fillna(global_mean, inplace=True)
            
    return df

def predict_insurance_charges(data):
    # Load components
    components = joblib.load('insurance_model_components.joblib')
    model = components['model']
    preprocessor = components['preprocessor']
    categorical_cols = components['categorical_cols']
    encoding_maps = components['encoding_maps']
    log_transform = components['log_transform']
    global_mean = components['global_mean']
    
    # Prepare features
    df_prepared = prepare_features(data, categorical_cols, encoding_maps, global_mean)
    
    # Transform using preprocessing pipeline
    X_transformed = preprocessor.transform(df_prepared)
    
    # Make prediction
    pred_log = model.predict(X_transformed)
    
    # Transform back if needed
    if log_transform:
        pred = np.expm1(pred_log)
    else:
        pred = pred_log
        
    return pred[0] if len(pred) == 1 else pred

In [6]:
# Example usage
new_customer = {
    'age': 35,
    'sex': 'male',
    'bmi': 27.5,
    'children': 2,
    'smoker': 'no',
    'region': 'northeast'
}

predicted_charge = predict_insurance_charges(new_customer)
print(f"Predicted insurance charge: ${predicted_charge:.2f}")

Predicted insurance charge: $5966.75
