https://auto.gluon.ai/stable/tutorials/tabular/tabular-indepth.html

In [1]:
# Need to do this for each autogluon notebook...
!pip install autogluon

Collecting autogluon
  Downloading autogluon-1.1.1-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.core==1.1.1 (from autogluon.core[all]==1.1.1->autogluon)
  Downloading autogluon.core-1.1.1-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.features==1.1.1 (from autogluon)
  Downloading autogluon.features-1.1.1-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.tabular==1.1.1 (from autogluon.tabular[all]==1.1.1->autogluon)
  Downloading autogluon.tabular-1.1.1-py3-none-any.whl.metadata (13 kB)
Collecting autogluon.multimodal==1.1.1 (from autogluon)
  Downloading autogluon.multimodal-1.1.1-py3-none-any.whl.metadata (12 kB)
Collecting autogluon.timeseries==1.1.1 (from autogluon.timeseries[all]==1.1.1->autogluon)
  Downloading autogluon.timeseries-1.1.1-py3-none-any.whl.metadata (12 kB)
Collecting scikit-learn<1.4.1,>=1.3.0 (from autogluon.core==1.1.1->autogluon.core[all]==1.1.1->autogluon)
  Downloading scikit_learn-1.4.0-1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_

In [2]:
from autogluon.tabular import TabularDataset, TabularPredictor

import numpy as np

train_data = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv')
subsample_size = 1000  # subsample subset of data for faster demo, try setting this to much larger values
train_data = train_data.sample(n=subsample_size, random_state=0)
print(train_data.head())

label = 'occupation'
print("Summary of occupation column: \n", train_data['occupation'].describe())

test_data = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv')
y_test = test_data[label]
test_data_nolabel = test_data.drop(columns=[label])  # delete label column

metric = 'accuracy' # we specify eval-metric just for demo (unnecessary as it's the default)

Loaded data from: https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv | Columns = 15 / 15 | Rows = 39073 -> 39073


       age workclass  fnlwgt      education  education-num  \
6118    51   Private   39264   Some-college             10   
23204   58   Private   51662           10th              6   
29590   40   Private  326310   Some-college             10   
18116   37   Private  222450        HS-grad              9   
33964   62   Private  109190      Bachelors             13   

            marital-status        occupation    relationship    race      sex  \
6118    Married-civ-spouse   Exec-managerial            Wife   White   Female   
23204   Married-civ-spouse     Other-service            Wife   White   Female   
29590   Married-civ-spouse      Craft-repair         Husband   White     Male   
18116        Never-married             Sales   Not-in-family   White     Male   
33964   Married-civ-spouse   Exec-managerial         Husband   White     Male   

       capital-gain  capital-loss  hours-per-week  native-country   class  
6118              0             0              40   United-State

Loaded data from: https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv | Columns = 15 / 15 | Rows = 9769 -> 9769


In [3]:
from autogluon.common import space

nn_options = {  # specifies non-default hyperparameter values for neural network models
    'num_epochs': 10,  # number of training epochs (controls training time of NN models)
    'learning_rate': space.Real(1e-4, 1e-2, default=5e-4, log=True),  # learning rate used in training (real-valued hyperparameter searched on log-scale)
    'activation': space.Categorical('relu', 'softrelu', 'tanh'),  # activation function used in NN (categorical hyperparameter, default = first entry)
    'dropout_prob': space.Real(0.0, 0.5, default=0.1),  # dropout probability (real-valued hyperparameter)
}

gbm_options = {  # specifies non-default hyperparameter values for lightGBM gradient boosted trees
    'num_boost_round': 100,  # number of boosting rounds (controls training time of GBM models)
    'num_leaves': space.Int(lower=26, upper=66, default=36),  # number of leaves in trees (integer hyperparameter)
}

hyperparameters = {  # hyperparameters of each model type
                   'GBM': gbm_options,
                   'NN_TORCH': nn_options,  # NOTE: comment this line out if you get errors on Mac OSX
                  }  # When these keys are missing from hyperparameters dict, no models of that type are trained

time_limit = 2*60  # train various models for ~2 min
num_trials = 5  # try at most 5 different hyperparameter configurations for each type of model
search_strategy = 'auto'  # to tune hyperparameters using random search routine with a local scheduler

hyperparameter_tune_kwargs = {  # HPO is not performed unless hyperparameter_tune_kwargs is specified
    'num_trials': num_trials,
    'scheduler' : 'local',
    'searcher': search_strategy,
}  # Refer to TabularPredictor.fit docstring for all valid values

predictor = TabularPredictor(label=label, eval_metric=metric).fit(
    train_data,
    time_limit=time_limit,
    hyperparameters=hyperparameters,
    hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
)

Fitted model: NeuralNetTorch/fe365c4c ...
	0.355	 = Validation score   (accuracy)
	5.58s	 = Training   runtime
	0.02s	 = Validation runtime
Fitted model: NeuralNetTorch/88c95096 ...
	0.34	 = Validation score   (accuracy)
	7.6s	 = Training   runtime
	0.02s	 = Validation runtime
Fitted model: NeuralNetTorch/0f6517f5 ...
	0.355	 = Validation score   (accuracy)
	7.63s	 = Training   runtime
	0.02s	 = Validation runtime
Fitted model: NeuralNetTorch/4323e8db ...
	0.345	 = Validation score   (accuracy)
	7.23s	 = Training   runtime
	0.02s	 = Validation runtime
Fitted model: NeuralNetTorch/6cafd479 ...
	0.345	 = Validation score   (accuracy)
	4.36s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 119.84s of the 75.41s of remaining time.
	Ensemble Weights: {'NeuralNetTorch/0f6517f5': 0.235, 'NeuralNetTorch/4323e8db': 0.235, 'LightGBM/T1': 0.176, 'LightGBM/T3': 0.118, 'NeuralNetTorch/88c95096': 0.118, 'LightGBM/T4': 0.059, 'NeuralNe

In [4]:
y_pred = predictor.predict(test_data_nolabel)
print("Predictions:  ", list(y_pred)[:5])
perf = predictor.evaluate(test_data, auxiliary_metrics=False)

Predictions:   [' Other-service', ' Farming-fishing', ' Exec-managerial', ' Sales', ' Other-service']


In [5]:
results = predictor.fit_summary()

*** Summary of fit() ***
Estimated performance of each model:
                      model  score_val eval_metric  pred_time_val   fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0       WeightedEnsemble_L2      0.420    accuracy       0.114586  31.764129                0.001265           0.159561            2       True         11
1               LightGBM/T3      0.375    accuracy       0.007501   0.625779                0.007501           0.625779            1       True          3
2               LightGBM/T5      0.375    accuracy       0.010853   0.875510                0.010853           0.875510            1       True          5
3               LightGBM/T1      0.370    accuracy       0.006100   1.984071                0.006100           1.984071            1       True          1
4               LightGBM/T4      0.360    accuracy       0.015625   0.956767                0.015625           0.956767            1       True          4
5       

In [6]:
label = 'class'  # Now lets predict the "class" column (binary classification)
test_data_nolabel = test_data.drop(columns=[label])
y_test = test_data[label]
save_path = 'agModels-predictClass'  # folder where to store trained models

predictor = TabularPredictor(label=label, eval_metric=metric).fit(train_data,
    num_bag_folds=5, num_bag_sets=1, num_stack_levels=1,
    hyperparameters = {'NN_TORCH': {'num_epochs': 2}, 'GBM': {'num_boost_round': 20}},  # last  argument is just for quick demo here, omit it in real applications
)

No path specified. Models will be saved in: "AutogluonModels/ag-20240703_060817"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.13
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Tue Dec 19 13:14:11 UTC 2023
CPU Count:          4
Memory Avail:       29.40 GB / 31.36 GB (93.7%)
Disk Space Avail:   19.46 GB / 19.52 GB (99.7%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.
Beginning AutoGluon

In [7]:
# Lets also specify the "f1" metric
predictor = TabularPredictor(label=label, eval_metric='f1', path=save_path).fit(
    train_data, auto_stack=True,
    time_limit=30, hyperparameters={'FASTAI': {'num_epochs': 10}, 'GBM': {'num_boost_round': 200}}  # last 2 arguments are for quick demo, omit them in real applications
)
predictor.leaderboard(test_data)

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.13
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Tue Dec 19 13:14:11 UTC 2023
CPU Count:          4
Memory Avail:       29.09 GB / 31.36 GB (92.8%)
Disk Space Avail:   19.46 GB / 19.52 GB (99.7%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=5
Beginning

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,NeuralNetFastAI_BAG_L1,0.648383,0.689243,f1,3.262553,0.359432,23.021436,3.262553,0.359432,23.021436,1,True,2
1,WeightedEnsemble_L2,0.648383,0.689243,f1,3.264732,0.364669,23.20556,0.002179,0.005237,0.184124,2,True,3
2,LightGBM_BAG_L1,0.629437,0.68559,f1,0.578959,0.225939,13.089781,0.578959,0.225939,13.089781,1,True,1


In [8]:
print(f'Prior to calibration (predictor.decision_threshold={predictor.decision_threshold}):')
scores = predictor.evaluate(test_data)

calibrated_decision_threshold = predictor.calibrate_decision_threshold()
predictor.set_decision_threshold(calibrated_decision_threshold)

print(f'After calibration (predictor.decision_threshold={predictor.decision_threshold}):')
scores_calibrated = predictor.evaluate(test_data)

Prior to calibration (predictor.decision_threshold=0.5):


Calibrating decision threshold to optimize metric f1 | Checking 51 thresholds...
Calibrating decision threshold via fine-grained search | Checking 38 thresholds...
	Base Threshold: 0.500	| val: 0.6892
	Best Threshold: 0.500	| val: 0.6892


After calibration (predictor.decision_threshold=0.5):


In [9]:
for metric_name in scores:
    metric_score = scores[metric_name]
    metric_score_calibrated = scores_calibrated[metric_name]
    decision_threshold = predictor.decision_threshold
    print(f'decision_threshold={decision_threshold:.3f}\t| metric="{metric_name}"'
          f'\n\ttest_score uncalibrated: {metric_score:.4f}'
          f'\n\ttest_score   calibrated: {metric_score_calibrated:.4f}'
          f'\n\ttest_score        delta: {metric_score_calibrated-metric_score:.4f}')

decision_threshold=0.500	| metric="f1"
	test_score uncalibrated: 0.6484
	test_score   calibrated: 0.6484
	test_score        delta: 0.0000
decision_threshold=0.500	| metric="accuracy"
	test_score uncalibrated: 0.8465
	test_score   calibrated: 0.8465
	test_score        delta: 0.0000
decision_threshold=0.500	| metric="balanced_accuracy"
	test_score uncalibrated: 0.7604
	test_score   calibrated: 0.7604
	test_score        delta: 0.0000
decision_threshold=0.500	| metric="mcc"
	test_score uncalibrated: 0.5545
	test_score   calibrated: 0.5545
	test_score        delta: 0.0000
decision_threshold=0.500	| metric="roc_auc"
	test_score uncalibrated: 0.8941
	test_score   calibrated: 0.8941
	test_score        delta: 0.0000
decision_threshold=0.500	| metric="precision"
	test_score uncalibrated: 0.7100
	test_score   calibrated: 0.7100
	test_score        delta: 0.0000
decision_threshold=0.500	| metric="recall"
	test_score uncalibrated: 0.5966
	test_score   calibrated: 0.5966
	test_score        delta: 0.0

In [10]:
predictor.set_decision_threshold(0.5)  # Reset decision threshold
for metric_name in ['f1', 'balanced_accuracy', 'mcc']:
    metric_score = predictor.evaluate(test_data, silent=True)[metric_name]
    calibrated_decision_threshold = predictor.calibrate_decision_threshold(metric=metric_name, verbose=False)
    metric_score_calibrated = predictor.evaluate(
        test_data, decision_threshold=calibrated_decision_threshold, silent=True
    )[metric_name]
    print(f'decision_threshold={calibrated_decision_threshold:.3f}\t| metric="{metric_name}"'
          f'\n\ttest_score uncalibrated: {metric_score:.4f}'
          f'\n\ttest_score   calibrated: {metric_score_calibrated:.4f}'
          f'\n\ttest_score        delta: {metric_score_calibrated-metric_score:.4f}')

decision_threshold=0.500	| metric="f1"
	test_score uncalibrated: 0.6484
	test_score   calibrated: 0.6484
	test_score        delta: 0.0000
decision_threshold=0.484	| metric="balanced_accuracy"
	test_score uncalibrated: 0.7604
	test_score   calibrated: 0.7643
	test_score        delta: 0.0039
decision_threshold=0.500	| metric="mcc"
	test_score uncalibrated: 0.5545
	test_score   calibrated: 0.5545
	test_score        delta: 0.0000
