In [None]:
# !pip install autogluon  imodels seaborn

In [None]:
# AutoGluon Tabular Classification Exercise: Exploring Presets

#  Objective:
# Use AutoGluon to train classification models on the Titanic dataset.
# Compare model performance using different presets and interpret models using SHAP.

#  Step 1: Install and Import Packages
# Uncomment and run the following line if AutoGluon or SHAP is not installed.
# !pip install autogluon shap

from autogluon.tabular import TabularPredictor
import pandas as pd
import seaborn as sns
import time
import matplotlib.pyplot as plt




In [3]:

#  Step 2: Load and Prepare Data
df = sns.load_dataset("titanic")
df = df.dropna(subset=["age", "fare", "embarked"])  # Drop rows with missing essential data

# Drop columns that leak or aren't useful
columns_to_drop = ["deck", "embark_town", "alive", "who", "class", "adult_male"]
df = df.drop(columns=columns_to_drop)

# Split data into train and test
train_data = df.sample(frac=0.8, random_state=42)
test_data = df.drop(train_data.index)
label = 'survived'


In [7]:

# Step 3: Define Presets to Test

# Available Presets: [‘best_quality’, ‘high_quality’, ‘good_quality’, ‘medium_quality’, ‘experimental_quality’, ‘optimize_for_deployment’, ‘interpretable’, ‘ignore_text’]
presets_list = [
    #'best_quality',
    #'experimental_quality',
    #'medium_quality',
    'interpretable',
]

# Step 4: Train and Evaluate Models with Different Presets
results = []

for preset in presets_list:
    print(f"\nTraining with preset: {preset}")
    save_path = f"AutogluonModels_{preset}"

    # Start timer
    start_time = time.time()

    predictor = TabularPredictor(label=label, path=save_path)
    predictor.fit(train_data=train_data, presets=preset, verbosity=0)

    duration = time.time() - start_time

    # Evaluate on test data
    performance = predictor.evaluate(test_data, silent=True)

    # Save result
    results.append({
        "Preset": preset,
        "Accuracy": performance['accuracy'],
        "Log Loss": performance.get('log_loss', None),
        "Training Time (s)": round(duration, 2)
    })






Training with preset: interpretable




In [None]:
# Step 5: Compare Results
results_df = pd.DataFrame(results)
print("\nSummary of Results:")
print(results_df)

# Step 6: Leaderboard (optional)
# Choose one preset to explore the leaderboard
# Fill in the chosen preset to view its leaderboard
chosen_preset = "medium_quality"  # e.g., "best_quality"

if chosen_preset:
    path = f"AutogluonModels_{chosen_preset}"
    predictor = TabularPredictor.load(path)
    lb = predictor.leaderboard(test_data, silent=True)
    display(lb)

    # 🔍 Step 7: Model Interpretation using SHAP
    # Get the best model from leaderboard
    best_model = lb.iloc[0]['model']





Summary of Results:
          Preset  Accuracy Log Loss  Training Time (s)
0  interpretable  0.753521     None               17.1


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBM,0.802817,0.850877,accuracy,0.005999,0.022402,1.296077,0.005999,0.022402,1.296077,1,True,4
1,CatBoost,0.802817,0.842105,accuracy,0.034901,0.003,16.032039,0.034901,0.003,16.032039,1,True,7
2,LightGBMLarge,0.795775,0.815789,accuracy,0.044577,0.016043,6.345482,0.044577,0.016043,6.345482,1,True,13
3,NeuralNetTorch,0.795775,0.850877,accuracy,0.104777,0.027998,15.633821,0.104777,0.027998,15.633821,1,True,12
4,WeightedEnsemble_L2,0.788732,0.868421,accuracy,0.07562,0.071864,3.454425,0.005001,0.003814,0.344009,2,True,14
5,NeuralNetFastAI,0.78169,0.833333,accuracy,0.042491,0.019161,1.800509,0.042491,0.019161,1.800509,1,True,10
6,XGBoost,0.774648,0.824561,accuracy,0.109221,0.015043,1.034152,0.109221,0.015043,1.034152,1,True,11
7,ExtraTreesGini,0.774648,0.754386,accuracy,0.228264,0.11501,1.063673,0.228264,0.11501,1.063673,1,True,8
8,ExtraTreesEntr,0.774648,0.754386,accuracy,0.265697,0.088524,0.908339,0.265697,0.088524,0.908339,1,True,9
9,RandomForestEntr,0.767606,0.798246,accuracy,0.138856,0.161029,1.689661,0.138856,0.161029,1.689661,1,True,6


AttributeError: 'TabularPredictor' object has no attribute 'explain'

#  Step 8: Questions (to be answered by participant)
# 1. Which preset gave the best accuracy?
# 2. Which preset had the shortest training time?
# 3. Is there a trade-off between training time and accuracy?
# 4. Which models are most commonly used across presets (check leaderboard)?
# 5. Try adding another preset from the documentation and compare!
# 6. Based on SHAP, which features are most important for predicting survival?