In [None]:
# Income Prediction Deployment Script
import joblib
import pandas as pd
import numpy as np

def predict_income(data):
    """
    Make income predictions using the trained decision tree model
    
    Parameters:
    -----------
    data : dict
        Dictionary with features for prediction. Must include:
        - age: int
        - workclass: str (e.g., 'Private', 'Self-emp-not-inc', 'Federal-gov')
        - fnlwgt: int
        - education_num: int
        - marital_status: str (e.g., 'Married-civ-spouse', 'Never-married')
        - occupation: str (e.g., 'Prof-specialty', 'Craft-repair')
        - relationship: str (e.g., 'Husband', 'Not-in-family')
        - race: str (e.g., 'White', 'Black')
        - sex: str (e.g., 'Male', 'Female')
        - capital_gain: int
        - capital_loss: int
        - hours_per_week: int
        - native_country: str (e.g., 'United-States', 'Mexico')
    
    Returns:
    --------
    dict
        Dictionary containing:
        - prediction: int (0 or 1)
        - prediction_label: str ('<=50K' or '>50K')
        - probability: float
    """
    # Load model components
    components = joblib.load('income_prediction_components.joblib')
    
    # Get needed components
    model = components['model']
    encoding_maps = components['encoding_maps']
    feature_names = components['feature_names']
    
    # Convert to DataFrame for easier processing
    if isinstance(data, dict):
        df = pd.DataFrame([data])
    else:
        df = data.copy()
    
    # Apply encodings to each categorical column
    for column in df.columns:
        if column in encoding_maps and column != 'income':
            df[column] = df[column].map(encoding_maps[column])
    
    # Ensure we're only using the features that the model was trained on
    df_for_pred = df[feature_names].copy()
    
    # Make prediction
    prediction = model.predict(df_for_pred)[0]
    probabilities = model.predict_proba(df_for_pred)[0]
    
    # Get income label (mapping from numeric prediction back to label)
    income_map_inverse = {v: k for k, v in encoding_maps['income'].items()}
    prediction_label = income_map_inverse[prediction]
    
    return {
        'prediction': int(prediction),
        'prediction_label': prediction_label,
        'probability': float(probabilities[prediction])
    }



In [None]:
# Example usage low income
if __name__ == "__main__":
    # Example data for someone who might earn <= 50K
    example = {
        'age': 25,
        'workclass': 'Private',
        'fnlwgt': 226802,
        'education_num': 7,
        'marital_status': 'Never-married',
        'occupation': 'Sales',
        'relationship': 'Not-in-family',
        'race': 'White',
        'sex': 'Male',
        'capital_gain': 0,
        'capital_loss': 0,
        'hours_per_week': 40,
        'native_country': 'United-States'
    }
    
    # Make predictions
    pred = predict_income(example)
    
    # Print results
    print("Prediction for low income example:")
    print(f"Class: {pred['prediction_label']}")
    print(f"Probability: {pred['probability']:.4f}")
    

In [None]:
# Example usage high income
if __name__ == "__main__":
    
    # Example data for someone who might earn > 50K
    example = {
        'age': 45,
        'workclass': 'Self-emp-inc',
        'fnlwgt': 181342,
        'education_num': 13,
        'marital_status': 'Married-civ-spouse',
        'occupation': 'Exec-managerial',
        'relationship': 'Husband',
        'race': 'White',
        'sex': 'Male',
        'capital_gain': 15024,
        'capital_loss': 0,
        'hours_per_week': 60,
        'native_country': 'United-States'
    }
    
    # Make predictions
    pred = predict_income(example)
    
    # Print results
    print("\nPrediction for high income example:")
    print(f"Class: {pred['prediction_label']}")
    print(f"Probability: {pred['probability']:.4f}")