# AutoGluon Tabular with SageMaker

[AutoGluon](https://github.com/awslabs/autogluon) automates machine learning tasks enabling you to easily achieve strong predictive performance in your applications. With just a few lines of code, you can train and deploy high-accuracy deep learning models on tabular, image, and text data.
This notebook shows how to use AutoGluon-Tabular with Amazon SageMaker by creating custom containers.

## Prerequisites

If using a SageMaker hosted notebook, select kernel `conda_mxnet_p36`.

In [1]:
# Make sure docker compose is set up properly for local mode
!./setup.sh

The user has root access.
SageMaker instance route table setup is ok. We are good to go.
SageMaker instance routing for Docker is ok. We are good to go!


In [2]:
# Imports
import os
import boto3
import sagemaker
from time import sleep
from collections import Counter
import pandas as pd
from sagemaker import get_execution_role, local, Model, utils, fw_utils, s3
from sagemaker.estimator import Estimator
from sagemaker.predictor import RealTimePredictor, csv_serializer, StringDeserializer
from sklearn.metrics import accuracy_score, classification_report
from IPython.core.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell

# Print settings
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 10)

# Account/s3 setup
session = sagemaker.Session()
local_session = local.LocalSession()
bucket = session.default_bucket()
prefix = 'sagemaker/autogluon-tabular'
region = session.boto_region_name
role = get_execution_role()
client = boto3.client('sts')
account = client.get_caller_identity()['Account']
ecr_uri_prefix = utils.get_ecr_image_uri_prefix(account, region)
registry_id = fw_utils._registry_id(region, 'mxnet', 'py3', account, '1.6.0')

### Build docker images

First, build autogluon package to copy into docker image.

In [3]:
if not os.path.exists('package'):
    !pip install PrettyTable -t package
    !pip install bokeh -t package
    !pip install --pre autogluon==0.0.6 -t package
    !pip install numpy==1.16.1 -t package    
    !pip install --upgrade boto3 -t package
    !pip install bokeh -t package
    !pip install --upgrade matplotlib -t package

Collecting PrettyTable
Installing collected packages: PrettyTable
Successfully installed PrettyTable-0.7.2
[33mYou are using pip version 10.0.1, however version 20.0.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Collecting bokeh
Collecting tornado>=5 (from bokeh)
Collecting typing-extensions>=3.7.4 (from bokeh)
  Using cached https://files.pythonhosted.org/packages/0c/0e/3f026d0645d699e7320b59952146d56ad7c374e9cd72cd16e7c74e657a0f/typing_extensions-3.7.4.2-py3-none-any.whl
Collecting python-dateutil>=2.1 (from bokeh)
  Using cached https://files.pythonhosted.org/packages/d4/70/d60450c3dd48ef87586924207ae8907090de0b306af2bce5d134d78615cb/python_dateutil-2.8.1-py2.py3-none-any.whl
Collecting pillow>=4.0 (from bokeh)
  Using cached https://files.pythonhosted.org/packages/ba/90/8a24e6220cfcf6a3a0162535d5b926e774117e384ff921908e07e4c92bda/Pillow-7.1.1-cp36-cp36m-manylinux1_x86_64.whl
Collecting Jinja2>=2.7 (from bokeh)
  Using cached https:/

Collecting s3transfer<0.4.0,>=0.3.0 (from boto3->autogluon==0.0.6)
  Using cached https://files.pythonhosted.org/packages/69/79/e6afb3d8b0b4e96cefbdc690f741d7dd24547ff1f94240c997a26fa908d3/s3transfer-0.3.3-py2.py3-none-any.whl
Collecting botocore<1.16.0,>=1.15.39 (from boto3->autogluon==0.0.6)
[?25l  Downloading https://files.pythonhosted.org/packages/dc/fb/f78a0e09965c156fea9160713705af688ec4f18af4249b3095949c930f77/botocore-1.15.39-py2.py3-none-any.whl (6.1MB)
[K    100% |████████████████████████████████| 6.1MB 11.4MB/s ta 0:00:01
[?25hCollecting jmespath<1.0.0,>=0.7.1 (from boto3->autogluon==0.0.6)
  Using cached https://files.pythonhosted.org/packages/a3/43/1e939e1fcd87b827fe192d0c9fc25b48c5b3368902bfb913de7754b0dc03/jmespath-0.9.5-py2.py3-none-any.whl
Collecting importlib-metadata>=0.12; python_version < "3.8" (from pytest->autogluon==0.0.6)
  Using cached https://files.pythonhosted.org/packages/ad/e4/891bfcaf868ccabc619942f27940c77a8a4b45fd8367098955bb7e152fb1/importlib_metada

  Using cached https://files.pythonhosted.org/packages/ae/e7/d9c3a176ca4b02024debf82342dab36efadfc5776f9c8db077e8f6e71821/pycparser-2.20-py2.py3-none-any.whl
Collecting retrying>=1.3.3 (from plotly->catboost->autogluon==0.0.6)
[31mawscli 1.18.25 has requirement botocore==1.15.25, but you'll have botocore 1.15.39 which is incompatible.[0m
[31maws-mxnet-mkl 1.6.0 has requirement graphviz<0.9.0,>=0.8.1, but you'll have graphviz 0.13.2 which is incompatible.[0m
Installing collected packages: Pillow, typing, numpy, cython, pyparsing, ConfigSpace, urllib3, six, python-dateutil, docutils, jmespath, botocore, s3transfer, boto3, zipp, importlib-metadata, more-itertools, attrs, wcwidth, py, packaging, pluggy, pytest, tqdm, idna, certifi, chardet, requests, portalocker, cycler, kiwisolver, matplotlib, scipy, gluoncv, tornado, click, msgpack, psutil, tblib, pyyaml, sortedcontainers, toolz, dask, cloudpickle, heapdict, zict, distributed, gluonnlp, joblib, scikit-learn, lightgbm, pycparser, cffi

Successfully installed boto3-1.12.39 botocore-1.15.39 docutils-0.15.2 jmespath-0.9.5 python-dateutil-2.8.1 s3transfer-0.3.3 six-1.14.0 urllib3-1.25.8
[33mYou are using pip version 10.0.1, however version 20.0.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Collecting bokeh
Collecting PyYAML>=3.10 (from bokeh)
Collecting python-dateutil>=2.1 (from bokeh)
  Using cached https://files.pythonhosted.org/packages/d4/70/d60450c3dd48ef87586924207ae8907090de0b306af2bce5d134d78615cb/python_dateutil-2.8.1-py2.py3-none-any.whl
Collecting pillow>=4.0 (from bokeh)
  Using cached https://files.pythonhosted.org/packages/ba/90/8a24e6220cfcf6a3a0162535d5b926e774117e384ff921908e07e4c92bda/Pillow-7.1.1-cp36-cp36m-manylinux1_x86_64.whl
Collecting packaging>=16.8 (from bokeh)
  Using cached https://files.pythonhosted.org/packages/62/0a/34641d2bf5c917c96db0ded85ae4da25b6cd922d6b794648d4e7e07c88e5/packaging-20.3-py2.py3-none-any.whl
Collecting typing-extensions>

[33mYou are using pip version 10.0.1, however version 20.0.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Collecting matplotlib
  Using cached https://files.pythonhosted.org/packages/93/4b/52da6b1523d5139d04e02d9e26ceda6146b48f2a4e5d2abfdf1c7bac8c40/matplotlib-3.2.1-cp36-cp36m-manylinux1_x86_64.whl
Collecting python-dateutil>=2.1 (from matplotlib)
  Using cached https://files.pythonhosted.org/packages/d4/70/d60450c3dd48ef87586924207ae8907090de0b306af2bce5d134d78615cb/python_dateutil-2.8.1-py2.py3-none-any.whl
Collecting kiwisolver>=1.0.1 (from matplotlib)
  Using cached https://files.pythonhosted.org/packages/ae/23/147de658aabbf968324551ea22c0c13a00284c4ef49a77002e91f79657b7/kiwisolver-1.2.0-cp36-cp36m-manylinux1_x86_64.whl
Collecting pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 (from matplotlib)
  Using cached https://files.pythonhosted.org/packages/8a/bb/488841f56197b13700afd5658fc279a2025a39e22449b7cf29864669b15d/pyparsing-2.4.7-py2.py3-n

Now build the training/inference image and push to ECR

In [4]:
training_algorithm_name = 'autogluon-sagemaker-training'
inference_algorithm_name = 'autogluon-sagemaker-inference'

In [6]:
!./container-training/build_push_training.sh {account} {region} {training_algorithm_name} {ecr_uri_prefix} {registry_id}
!./container-inference/build_push_inference.sh {account} {region} {inference_algorithm_name} {ecr_uri_prefix} {registry_id}

./container-training/build_push_training.sh: line 1: B#!/bin/bash: No such file or directory
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  688.8MB
Step 1/10 : ARG REGION=us-east-1
Step 2/10 : ARG REGISTRY_ID=763104351884
Step 3/10 : FROM ${REGISTRY_ID}.dkr.ecr.${REGION}.amazonaws.com/mxnet-training:1.6.0-cpu-py3
 ---> 129a8893d865
Step 4/10 : RUN pip install --upgrade pip
 ---> Using cache
 ---> d94b2ee91d31
Step 5/10 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> 40261fdfe01e
Step 6/10 : COPY package/ /opt/ml/code/package/
 ---> d4202130f2e9
Step 7/10 : COPY container-training/train.py /opt/ml/code/train.py
 ---> 9e97ef553e07
Step 8/10 : COPY container-training/inference.py /opt/ml/code/inference.py
 ---> a0e8ccd0931f
Step 9/10 : ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code
 ---> Running in e1f

### Get the data

In this example we'll use the direct-marketing dataset to build a binary classification model that predicts whether customers will accept or decline a marketing offer.  
First we'll download the data and split it into train and test sets. AutoGluon does not require a separate validation set (it uses bagged k-fold cross-validation).

In [7]:
# Download and unzip the data
!wget -N https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip --quiet
!unzip -qq -o bank-additional.zip
!rm bank-additional.zip

local_data_path = './bank-additional/bank-additional-full.csv'
data = pd.read_csv(local_data_path)

# Split train/test data
train = data.sample(frac=0.7, random_state=42)
test = data.drop(train.index)

# Split test X/y
label = 'y'
y_test = test[label]
X_test = test.drop(columns=[label])

##### Check the data

In [8]:
train.head(3)
train.shape

test.head(3)
test.shape

X_test.head(3)
X_test.shape

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
32884,57,technician,married,high.school,no,no,yes,cellular,may,mon,371,1,999,1,failure,-1.8,92.893,-46.2,1.299,5099.1,no
3169,55,unknown,married,unknown,unknown,yes,no,telephone,may,thu,285,2,999,0,nonexistent,1.1,93.994,-36.4,4.86,5191.0,no
32206,33,blue-collar,married,basic.9y,no,no,no,cellular,may,fri,52,1,999,1,failure,-1.8,92.893,-46.2,1.313,5099.1,no


(28832, 21)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
2,37,services,married,high.school,no,yes,no,telephone,may,mon,226,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
9,25,services,single,high.school,no,yes,no,telephone,may,mon,50,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
10,41,blue-collar,married,unknown,unknown,no,no,telephone,may,mon,55,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


(12356, 21)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
2,37,services,married,high.school,no,yes,no,telephone,may,mon,226,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
9,25,services,single,high.school,no,yes,no,telephone,may,mon,50,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
10,41,blue-collar,married,unknown,unknown,no,no,telephone,may,mon,55,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0


(12356, 20)

Upload the data to s3

In [9]:
train_file = 'train.csv'
train.to_csv(train_file,index=False)
train_s3_path = session.upload_data(train_file, key_prefix='{}/data'.format(prefix))

test_file = 'test.csv'
test.to_csv(test_file,index=False)
test_s3_path = session.upload_data(test_file, key_prefix='{}/data'.format(prefix))

X_test_file = 'X_test.csv'
X_test.to_csv(X_test_file,index=False)
X_test_s3_path = session.upload_data(X_test_file, key_prefix='{}/data'.format(prefix))

## Train

The minimum requirement for hyperparameters is a target label.

In [10]:
hyperparameters = {'label': 'y'}

##### (Optional) hyperparameters can be passed to the `autogluon.task.TabularPrediction.fit` function.  

Below shows AutoGluon hyperparameters from the example [Predicting Columns in a Table - In Depth](https://autogluon.mxnet.io/tutorials/tabular_prediction/tabular-indepth.html#model-ensembling-with-stacking-bagging). Please see [fit parameters](https://autogluon.mxnet.io/api/autogluon.task.html?highlight=eval_metric#autogluon.task.TabularPrediction.fit) for further information.


Here's a more in depth example from the above tutorial that shows how to provide hyperparameter ranges and additional settings:

```python
nn_options = {
    'num_epochs': '10',
    'learning_rate': "ag.space.Real(1e-4, 1e-2, default=5e-4, log=True)",
    'activation': "ag.space.Categorical('relu', 'softrelu', 'tanh')",
    'layers': "ag.space.Categorical([100],[1000],[200,100],[300,200,100])",
    'dropout_prob': "ag.space.Real(0.0, 0.5, default=0.1)"
}

gbm_options = {
    'num_boost_round': '100',
    'num_leaves': "ag.space.Int(lower=26, upper=66, default=36)"
}

model_hps = {'NN': nn_options, 'GBM': gbm_options} 

hyperparameters = {
    'label': 'y',
    'time_limits': 2*60,
    'hyperparameters': model_hps,
    'auto_stack': False,    
    'hyperparameter_tune': True,
    'search_strategy': 'skopt'
}
```
**Note:** Your hyperparameter choices may affect the size of the model package, which could result in additional time taken to upload your model and complete training.

<br>

For local training set `train_instance_type` to `local` .  
For non-local training the recommended instance type is `ml.m5.2xlarge` .

In [12]:
%%time

instance_type = 'ml.m5.2xlarge'
instance_type = 'local'

ecr_image = f'{ecr_uri_prefix}/{training_algorithm_name}:latest'

estimator = Estimator(image_name=ecr_image,
                      role=role,
                      train_instance_count=1,
                      train_instance_type=instance_type,
                      hyperparameters=hyperparameters)

estimator.fit(train_s3_path)

Creating tmpuyhgdq1h_algo-1-c1p7l_1 ... 
[1BAttaching to tmpuyhgdq1h_algo-1-c1p7l_12mdone[0m
[36malgo-1-c1p7l_1  |[0m 2020-04-10 08:54:39,913 sagemaker-containers INFO     Imported framework sagemaker_mxnet_container.training
[36malgo-1-c1p7l_1  |[0m 2020-04-10 08:54:39,915 sagemaker-containers INFO     Failed to parse hyperparameter label value y to Json.
[36malgo-1-c1p7l_1  |[0m Returning the value itself
[36malgo-1-c1p7l_1  |[0m 2020-04-10 08:54:39,917 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-c1p7l_1  |[0m 2020-04-10 08:54:39,932 sagemaker_mxnet_container.training INFO     MXNet training environment: {'SM_HOSTS': '["algo-1-c1p7l"]', 'SM_NETWORK_INTERFACE_NAME': 'eth0', 'SM_HPS': '{"label":"y"}', 'SM_USER_ENTRY_POINT': 'train.py', 'SM_FRAMEWORK_PARAMS': '{}', 'SM_RESOURCE_CONFIG': '{"current_host":"algo-1-c1p7l","hosts":["algo-1-c1p7l"]}', 'SM_INPUT_DATA_CONFIG': '{"training":{"TrainingInputMode":"File"}}', 'SM_OUTPUT_DATA_DI

[36malgo-1-c1p7l_1  |[0m   Optimizer.opt_registry[name].__name__))
[36malgo-1-c1p7l_1  |[0m 
[36malgo-1-c1p7l_1  |[0m hosts,  type: <class 'list'>,  value: ['algo-1-c1p7l']
[36malgo-1-c1p7l_1  |[0m current_host,  type: <class 'str'>,  value: algo-1-c1p7l
[36malgo-1-c1p7l_1  |[0m num_gpus,  type: <class 'int'>,  value: 0
[36malgo-1-c1p7l_1  |[0m model_dir,  type: <class 'str'>,  value: /opt/ml/model
[36malgo-1-c1p7l_1  |[0m train,  type: <class 'str'>,  value: /opt/ml/input/data/training
[36malgo-1-c1p7l_1  |[0m test,  type: <class 'str'>,  value: 
[36malgo-1-c1p7l_1  |[0m label,  type: <class 'str'>,  value: y
[36malgo-1-c1p7l_1  |[0m problem_type,  type: <class 'NoneType'>,  value: None
[36malgo-1-c1p7l_1  |[0m eval_metric,  type: <class 'NoneType'>,  value: None
[36malgo-1-c1p7l_1  |[0m stopping_metric,  type: <class 'NoneType'>,  value: None
[36malgo-1-c1p7l_1  |[0m auto_stack,  type: <class 'bool'>,  value: False
[36malgo-1-c1p7l_1  |[0m hyperparameter_t

[36malgo-1-c1p7l_1  |[0m 2020-04-10 08:58:04,471 sagemaker-containers INFO     Reporting training SUCCESS
[36mtmpuyhgdq1h_algo-1-c1p7l_1 exited with code 0
[0mAborting on container exit...
===== Job Complete =====
CPU times: user 4min 33s, sys: 1.16 s, total: 4min 34s
Wall time: 8min 1s


### Create Model

In [13]:
# Create predictor object
class AutoGluonTabularPredictor(RealTimePredictor):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, content_type='text/csv', 
                         serializer=csv_serializer, 
                         deserializer=StringDeserializer(), **kwargs)

In [14]:
ecr_image = f'{ecr_uri_prefix}/{inference_algorithm_name}:latest'

if instance_type == 'local':
    model = estimator.create_model(image=ecr_image, role=role)
else:
    model_uri = os.path.join(estimator.output_path, estimator._current_job_name, "output", "model.tar.gz")
    model = Model(model_uri, ecr_image, role=role, sagemaker_session=session, predictor_cls=AutoGluonTabularPredictor)

### Batch Transform

For local mode, either `s3://<bucket>/<prefix>/output/` or `file:///<absolute_local_path>` can be used as outputs.

By including the label column in the test data, you can also evaluate prediction performance (In this case, passing `test_s3_path` instead of `X_test_s3_path`).

In [15]:
output_path = f's3://{bucket}/{prefix}/output/'
# output_path = f'file://{os.getcwd()}'

transformer = model.transformer(instance_count=1, 
                                instance_type=instance_type,
                                strategy='SingleRecord',
                                max_payload=100,
                                max_concurrent_transforms=1,                              
                                output_path=output_path)

transformer.transform(test_s3_path, content_type='text/csv')
transformer.wait()

Attaching to tmpi66p5p43_algo-1-tl6tz_1
[36malgo-1-tl6tz_1  |[0m 2020-04-10 10:15:25,899 [INFO ] main com.amazonaws.ml.mms.ModelServer - 
[36malgo-1-tl6tz_1  |[0m MMS Home: /usr/local/lib/python3.6/site-packages
[36malgo-1-tl6tz_1  |[0m Current directory: /
[36malgo-1-tl6tz_1  |[0m Temp directory: /home/model-server/tmp
[36malgo-1-tl6tz_1  |[0m Number of GPUs: 0
[36malgo-1-tl6tz_1  |[0m Number of CPUs: 4
[36malgo-1-tl6tz_1  |[0m Max heap size: 3566 M
[36malgo-1-tl6tz_1  |[0m Python executable: /usr/local/bin/python3.6
[36malgo-1-tl6tz_1  |[0m Config file: /etc/sagemaker-mms.properties
[36malgo-1-tl6tz_1  |[0m Inference address: http://0.0.0.0:8080
[36malgo-1-tl6tz_1  |[0m Management address: http://0.0.0.0:8080
[36malgo-1-tl6tz_1  |[0m Model Store: /.sagemaker/mms/models
[36malgo-1-tl6tz_1  |[0m Initial Models: ALL
[36malgo-1-tl6tz_1  |[0m Log dir: /logs
[36malgo-1-tl6tz_1  |[0m Metrics dir: /logs
[36malgo-1-tl6tz_1  |[0m Netty threads: 0
[36malgo-1-tl6

[36malgo-1-tl6tz_1  |[0m 2020-04-10 10:15:33,978 [INFO ] W-9001-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Prediction counts: Counter({'no': 11168, 'yes': 1188})
[36malgo-1-tl6tz_1  |[0m 2020-04-10 10:15:33,993 [INFO ] W-9001-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Label column (y) found in input data. Therefore, evaluating prediction performance...
[36malgo-1-tl6tz_1  |[0m 2020-04-10 10:15:34,556 [INFO ] W-9001-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - {
[36malgo-1-tl6tz_1  |[0m 2020-04-10 10:15:34,556 [INFO ] W-9001-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -     "accuracy": 0.9187439300744578,
[36malgo-1-tl6tz_1  |[0m 2020-04-10 10:15:34,556 [INFO ] W-9001-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -     "accuracy_score": 0.9187439300744578,
[36malgo-1-tl6tz_1  |[0m 2020-04-10 10:15:34,556 [INFO ] W-9001-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -     "balanced_accuracy_score": 0.76479435509171

In [19]:
# Check s3 for <filename>.csv.out file
if instance_type != 'local':
    !aws s3 ls {transformer.output_path} --recursive
    
elif 's3' in output_path:
    !aws s3 ls {output_path+transformer.latest_transform_job.job_name} --recursive

print(s3.list(transformer.output_path))

2020-04-10 10:15:35      38255 sagemaker/autogluon-tabular/output/autogluon-sagemaker-inference-2020-04-1-2020-04-10-10-15-01-985/test.csv.out


AttributeError: module 'sagemaker.s3' has no attribute 'list'

### Endpoint

##### Deploy remote or local endpoint

In [None]:
instance_type = 'ml.m5.2xlarge'
#instance_type = 'local'

predictor = model.deploy(initial_instance_count=1, 
                         instance_type=instance_type)

##### Attach to endpoint (or reattach if kernel was restarted)

In [None]:
# Select standard or local session based on instance_type
if instance_type == 'local': sess = local_session
else: sess = session

# Attach to endpoint
predictor = AutoGluonTabularPredictor(predictor.endpoint, sagemaker_session=sess)

##### Predict on unlabeled test data

In [None]:
results = predictor.predict(X_test.to_csv())

# Check output
print(Counter(results.splitlines()))

##### Predict on data that includes label column  
Prediction performance metrics will be printed to endpoint logs.

In [None]:
results = predictor.predict(test.to_csv())

# Check output
sleep(0.1); print(Counter(results.splitlines()))

##### Check that performance metrics match evaluation printed to endpoint logs as expected

In [None]:
import numpy as np
y_results = np.array(results.splitlines())

print("accuracy: {}".format(accuracy_score(y_true=y_test, y_pred=y_results)))
print(classification_report(y_true=y_test, y_pred=y_results, digits=6))

##### Clean up endpoint

In [None]:
predictor.delete_endpoint()