In [7]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from autogluon.tabular import TabularPredictor

# Data Preparation
## Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
columns = [
    "age", "workclass", "fnlwgt", "education", "education-num",
    "marital-status", "occupation", "relationship", "race", "sex",
    "capital-gain", "capital-loss", "hours-per-week", "native-country", "income"
]
data = pd.read_csv(url, names=columns, header=None, skipinitialspace=True, na_values="?")
data.dropna(inplace=True)

In [8]:
## Convert the income to binary
data['income'] = data['income'].apply(lambda x: 1 if x.strip().endswith('>50K') else 0)

## Split the data
X = data.drop('income', axis=1)
y = data['income']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Model Training: XGBoost with Default Parameters
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_cols),
        ('num', 'passthrough', [col for col in X.columns if col not in categorical_cols])
    ])
xgb_default_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', XGBClassifier(use_label_encoder=False, eval_metric='logloss'))
])
xgb_default_pipeline.fit(X_train, y_train)

In [None]:
# Model training: XGBoost with Hypertuned Parameters
parameters = {
    'classifier__max_depth': [3, 5, 7],
    'classifier__learning_rate': [0.01, 0.1, 0.2],
    'classifier__n_estimators': [100, 200],
    'classifier__subsample': [0.8, 1]
}
xgb_hypertuned_pipeline = GridSearchCV(xgb_default_pipeline, parameters, cv=3, scoring='accuracy')
xgb_hypertuned_pipeline.fit(X_train, y_train)

In [None]:
# model training: AutoGluon
ag_model = TabularPredictor(label='income').fit(train_data=X_train.join(y_train), presets='medium_quality_faster_train')

In [12]:
def evaluate_model(model, X_test, y_test, model_name=""):
    # Predictions
    predictions = model.predict(X_test)

    # Handling probability prediction for AUC computation
    try:
        if hasattr(model, 'predict_proba'):
            probabilities = model.predict_proba(X_test)[:, 1]  # Standard way for most models
        elif 'AutoGluon' in model_name:
            # AutoGluon specific: getting the positive class probabilities
            probabilities = model.predict_proba(X_test).iloc[:, 1]
        else:
            probabilities = None

        if probabilities is not None:
            auc_score = roc_auc_score(y_test, probabilities)
        else:
            auc_score = 'N/A'
    except Exception as e:
        print(f"Error computing AUC for {model_name}: {e}")
        auc_score = 'N/A'

    # Metrics calculation
    metrics = {
        'Accuracy': accuracy_score(y_test, predictions),
        'Precision': precision_score(y_test, predictions),
        'Recall': recall_score(y_test, predictions),
        'F1 Score': f1_score(y_test, predictions),
        'AUC Score': auc_score
    }
    return metrics


In [13]:
# present results
results = pd.DataFrame({
    'XGBoost Default': evaluate_model(xgb_default_pipeline, X_test, y_test),
    'XGBoost Hypertuned': evaluate_model(xgb_hypertuned_pipeline.best_estimator_, X_test, y_test),
    'AutoGluon': evaluate_model(ag_model, X_test, y_test)
}).T

print("Performance Comparison:")
print(results)

Error computing AUC for : '(slice(None, None, None), 1)' is an invalid key
Performance Comparison:
                    Accuracy Precision    Recall  F1 Score AUC Score
XGBoost Default     0.863749  0.763975  0.655126  0.705376  0.923941
XGBoost Hypertuned  0.866733  0.780096  0.647137  0.707424  0.925195
AutoGluon           0.865407   0.77512  0.647137   0.70537       N/A
