# AutoGluon Tabular with SageMaker
- AutoGluon Bring Your Own Container (BYOC) 예제
    - https://github.com/aws/amazon-sagemaker-examples/tree/master/advanced_functionality/autogluon-tabular
- AutoGluon 공식 Lab Git 
    - [AutoGluon](https://github.com/awslabs/autogluon) 



## Prerequisites

If using a SageMaker hosted notebook, select kernel `conda_mxnet_p36`.

In [None]:
# Make sure docker compose is set up properly for local mode
!./setup.sh

In [None]:
# Imports
import os
import boto3
import sagemaker
from time import sleep
from collections import Counter
import numpy as np
import pandas as pd
from sagemaker import get_execution_role, local, Model, utils, fw_utils, s3
from sagemaker.estimator import Estimator
from sagemaker.predictor import RealTimePredictor, csv_serializer, StringDeserializer
from sklearn.metrics import accuracy_score, classification_report
from IPython.core.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell

# Print settings
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 10)

# Account/s3 setup
session = sagemaker.Session()
local_session = local.LocalSession()
bucket = session.default_bucket()
prefix = 'sagemaker/autogluon-tabular'
region = session.boto_region_name
role = get_execution_role()
client = session.boto_session.client(
    "sts", region_name=region, endpoint_url=utils.sts_regional_endpoint(region)
    )
account = client.get_caller_identity()['Account']
ecr_uri_prefix = utils.get_ecr_image_uri_prefix(account, region)
registry_id = fw_utils._registry_id(region, 'mxnet', 'py3', account, '1.6.0')
registry_uri = utils.get_ecr_image_uri_prefix(registry_id, region)

Docker 이미지를 빌딩시에 베이스 이미지의 계정 및 URL을 가져오기 위함

In [3]:
registry_id, registry_uri

('763104351884', '763104351884.dkr.ecr.ap-northeast-2.amazonaws.com')

### Build docker images
로컬 노트북에서 훈련 이미지를 빌딩하고 ECR에 등록

In [4]:
training_algorithm_name = 'autogluon-sagemaker-training'

In [5]:
!/bin/bash ./container-training/build_push_training.sh {account} {region} {training_algorithm_name} {ecr_uri_prefix} {registry_id} {registry_uri}

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  14.62GB
Step 1/9 : ARG REGISTRY_URI
Step 2/9 : FROM ${REGISTRY_URI}/mxnet-training:1.6.0-cpu-py3
 ---> 148136370d72
Step 3/9 : RUN pip install autogluon
 ---> Using cache
 ---> 736b04ccce0b
Step 4/9 : RUN pip install PrettyTable
 ---> Using cache
 ---> a79327219225
Step 5/9 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> 0e465987d171
Step 6/9 : COPY container-training/train.py /opt/ml/code/train.py
 ---> 457001e9dc27
Step 7/9 : COPY container-training/inference.py /opt/ml/code/inference.py
 ---> 6662e75d6e23
Step 8/9 : ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code
 ---> Running in f566434cb7b0
Removing intermediate container f566434cb7b0
 ---> 17e992191e84
Step 9/9 : ENV SAGEMAKER_PROGRAM train.py
 ---> Running in 8d28048dff35
Removing intermediate con

### 데이타 로딩

In [6]:
%store -r no_auto_train_file
%store -r no_auto_test_file

train_pd = pd.read_csv(no_auto_train_file)
test_pd = pd.read_csv(no_auto_test_file)
print("auto_train_file: ", train_pd.shape)
print("auto_train_file: ", test_pd.shape)

from IPython.display import display as dp
dp(train_pd.head(2))
dp(test_pd.head(2))



auto_train_file:  (53740, 18)
auto_train_file:  (13436, 18)


Unnamed: 0,classes,customer_zip_code_prefix,customer_city,customer_state,price,freight_value,product_weight_g,product_category_name_english,seller_zip_code_prefix,seller_city,seller_state,order_weekday,order_day,order_month,customer_seller_state,customer_seller_city,customer_seller_zip_code_prefix,product_volume
0,3,12030,taubate,SP,29.99,10.96,9000.0,fashion_shoes,13481,limeira,SP,1,4,10,SP_SP,taubate_limeira,12030_13481,2640.0
1,2,78075,cuiaba,MT,23.9,26.82,1500.0,furniture_decor,17051,bauru,SP,1,4,10,MT_SP,cuiaba_bauru,78075_17051,14625.0


Unnamed: 0,classes,customer_zip_code_prefix,customer_city,customer_state,price,freight_value,product_weight_g,product_category_name_english,seller_zip_code_prefix,seller_city,seller_state,order_weekday,order_day,order_month,customer_seller_state,customer_seller_city,customer_seller_zip_code_prefix,product_volume
0,3,89219,joinville,SC,105.0,23.89,1000.0,watches_gifts,21840,rio de janeiro,RJ,1,19,6,SC_RJ,joinville_rio de janeiro,89219_21840,7632.0
1,0,9950,diadema,SP,99.97,15.8,650.0,housewares,32677,betim,MG,1,19,6,SP_MG,diadema_betim,9950_32677,9600.0


In [7]:
from utils import upload_s3

# Upload train. test file
data_prefix = 'autogluon/predict-deliver-time/data'

s3_train_file_path = upload_s3(bucket, no_auto_train_file, data_prefix)
s3_test_file_path = upload_s3(bucket, no_auto_test_file, data_prefix)
print("s3_train_file_path: ", s3_train_file_path)
print("s3_test_file_path: ", s3_test_file_path)

s3_train_file_path:  s3://sagemaker-ap-northeast-2-057716757052/autogluon/predict-deliver-time/data/preproc_data/auto_no_fe/train/train.csv
s3_test_file_path:  s3://sagemaker-ap-northeast-2-057716757052/autogluon/predict-deliver-time/data/preproc_data/auto_no_fe/test/test.csv


In [8]:
## Train Rows
train_rows = train_pd.shape[0]
train_rows = 500


## Hyperparameter Selection

The minimum required settings for training is just a target label, `fit_args['label']`.

Additional optional hyperparameters can be passed to the `autogluon.task.TabularPrediction.fit` function via `fit_args`.

Below shows a more in depth example of AutoGluon-Tabular hyperparameters from the example [Predicting Columns in a Table - In Depth](https://autogluon.mxnet.io/tutorials/tabular_prediction/tabular-indepth.html#model-ensembling-with-stacking-bagging). Please see [fit parameters](https://autogluon.mxnet.io/api/autogluon.task.html?highlight=eval_metric#autogluon.task.TabularPrediction.fit) for further information. Note that in order for hyperparameter ranges to work in SageMaker, values passed to the `fit_args['hyperparameters']` must be represented as strings.

```python
nn_options = {
    'num_epochs': "10",
    'learning_rate': "ag.space.Real(1e-4, 1e-2, default=5e-4, log=True)",
    'activation': "ag.space.Categorical('relu', 'softrelu', 'tanh')",
    'layers': "ag.space.Categorical([100],[1000],[200,100],[300,200,100])",
    'dropout_prob': "ag.space.Real(0.0, 0.5, default=0.1)"
}

gbm_options = {
    'num_boost_round': "100",
    'num_leaves': "ag.space.Int(lower=26, upper=66, default=36)"
}

model_hps = {'NN': nn_options, 'GBM': gbm_options} 

fit_args = {
  'label': 'y',
  'presets': ['best_quality', 'optimize_for_deployment'],
  'time_limits': 60*10,
  'hyperparameters': model_hps,
  'hyperparameter_tune': True,
  'search_strategy': 'skopt'
}

hyperparameters = {
  'fit_args': fit_args,
  'feature_importance': True
}
```
**Note:** Your hyperparameter choices may affect the size of the model package, which could result in additional time taken to upload your model and complete training. Including `'optimize_for_deployment'` in the list of `fit_args['presets']` is recommended to greatly reduce upload times.

<br>

In [9]:
# # Define required label and optional additional parameters
# fit_args = {
#   'label': 'y',
#   # Adding 'best_quality' to presets list will result in better performance (but longer runtime)
#   'presets': ['optimize_for_deployment'],
# }

# # Pass fit_args to SageMaker estimator hyperparameters
# hyperparameters = {
#   'fit_args': fit_args,
#   'feature_importance': True
# }

In [10]:
## S3 output path
# output_prefix = 'autogluon/predict-deliver-time/output'
# s3_output_path = 's3://{}/{}/'.format(bucket, output_prefix)
# print("s3_output_path: ", s3_output_path)

# Define required label and optional additional parameters
fit_args = {
  'label': 'classes',
  # Adding 'best_quality' to presets list will result in better performance (but longer runtime)
  'presets': ['optimize_for_deployment'],
}

hyperparameters = {
    'fit_args' : fit_args,
    #'presets': 'optimize_for_deployment',
    'feature_importance': True,    
    #'label' : 'classes', # label column name
    'train_rows': train_rows  
}



## Train

For local training set `train_instance_type` to `local` .   
For non-local training the recommended instance type is `ml.m5.2xlarge`.   

**Note:** Depending on how many underlying models are trained, `train_volume_size` may need to be increased so that they all fit on disk.

In [11]:
# Resource Type
# instance_type = 'ml.c5.9xlarge'
instance_type = 'local'

if instance_type == 'local':
    sess = None
    print("sess: ", sess)
else:
    sess = sagemaker.Session()
    print("sess: ", sess)    
    

from sagemaker.mxnet import MXNet

ecr_image = f'{ecr_uri_prefix}/{training_algorithm_name}:latest'

estimator = Estimator(
                    image_name =ecr_image,
                    role = role,
                    train_instance_type = instance_type,
                    train_instance_count = 1,
                    hyperparameters = hyperparameters,
                    train_volume_size=100,
                    # session = sess
                    )



Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.


sess:  None


In [12]:
%%time 
estimator.fit( {'train': s3_train_file_path, 'test': s3_test_file_path})

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


Creating tmpf9oh9rl1_algo-1-0muwl_1 ... 
[1BAttaching to tmpf9oh9rl1_algo-1-0muwl_12mdone[0m
[36malgo-1-0muwl_1  |[0m 2020-11-17 05:59:07,214 sagemaker-training-toolkit INFO     Imported framework sagemaker_mxnet_container.training
[36malgo-1-0muwl_1  |[0m 2020-11-17 05:59:07,216 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-0muwl_1  |[0m 2020-11-17 05:59:07,216 sagemaker-training-toolkit INFO     Failed to parse hyperparameter fit_args value {'label': 'classes', 'presets': ['optimize_for_deployment']} to Json.
[36malgo-1-0muwl_1  |[0m Returning the value itself
[36malgo-1-0muwl_1  |[0m 2020-11-17 05:59:07,216 sagemaker-training-toolkit INFO     Failed to parse hyperparameter feature_importance value True to Json.
[36malgo-1-0muwl_1  |[0m Returning the value itself
[36malgo-1-0muwl_1  |[0m 2020-11-17 05:59:07,226 sagemaker_mxnet_container.training INFO     MXNet training environment: {'SM_HOSTS': '["algo-1-0muwl"]', 'SM_NE

## Create Inference Image

In [13]:
inference_algorithm_name = 'autogluon-sagemaker-inference'
print(account, region, inference_algorithm_name, ecr_uri_prefix)

057716757052 ap-northeast-2 autogluon-sagemaker-inference 057716757052.dkr.ecr.ap-northeast-2.amazonaws.com


In [14]:

!/bin/bash ./container-inference/build_push_inference.sh {account} {region} {inference_algorithm_name} {ecr_uri_prefix} {registry_id} {registry_uri}

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  14.61GB
Step 1/5 : ARG REGISTRY_URI
Step 2/5 : FROM ${REGISTRY_URI}/mxnet-inference:1.6.0-cpu-py3
 ---> ac9d36e4dca4
Step 3/5 : RUN pip install autogluon
 ---> Using cache
 ---> 826d2318649f
Step 4/5 : RUN pip install PrettyTable
 ---> Using cache
 ---> 403562626280
Step 5/5 : ENV SAGEMAKER_PROGRAM inference.py
 ---> Using cache
 ---> 58e8b04fe4df
Successfully built 58e8b04fe4df
Successfully tagged autogluon-sagemaker-inference:latest
The push refers to repository [057716757052.dkr.ecr.ap-northeast-2.amazonaws.com/autogluon-sagemaker-inference]

[1Bf18e7531: Preparing 
[1B57beef4c: Preparing 
[1B3ff8312b: Preparing 
[1B59ec0d8a: Preparing 
[1B12639dc5: Preparing 
[1B0e9af77f: Preparing 
[1Bbdb96df8: Preparing 
[1B34244b0d: Preparing 
[1B229eddc1: Prep

### Create Model

In [15]:
# Create predictor object
class AutoGluonTabularPredictor(RealTimePredictor):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, content_type='text/csv', 
                         serializer=csv_serializer, 
                         deserializer=StringDeserializer(), **kwargs)

In [16]:
ecr_image = f'{ecr_uri_prefix}/{inference_algorithm_name}:latest'

if instance_type == 'local':
    model = estimator.create_model(image=ecr_image, role=role)
    print("local")
else:
    model_uri = os.path.join(estimator.output_path, estimator._current_job_name, "output", "model.tar.gz")
    model = Model(model_uri, ecr_image, role=role, sagemaker_session=session, predictor_cls=AutoGluonTabularPredictor)
    print("non-local")

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


local


### Endpoint

In [17]:
ecr_image

'057716757052.dkr.ecr.ap-northeast-2.amazonaws.com/autogluon-sagemaker-inference:latest'

##### Deploy remote or local endpoint

In [18]:
# instance_type = 'ml.m5.2xlarge'
instance_type = 'local'

predictor = model.deploy(initial_instance_count=1, 
                         instance_type=instance_type)

!

##### Attach to endpoint (or reattach if kernel was restarted)

In [19]:
# Select standard or local session based on instance_type
if instance_type == 'local': 
    sess = local_session
else: 
    sess = session

# Attach to endpoint
predictor = AutoGluonTabularPredictor(predictor.endpoint, sagemaker_session=sess)

##### Predict on unlabeled test data

In [20]:
# # Split test X/y
label = 'classes'
y_test = test_pd[label]
test_data_nolab = test_pd.drop(columns=[label])

# test_data = task.Dataset(file_path= no_auto_test_file)
# y_test = test_data[label_column]  # values to predict
# test_data_nolab = test_data.drop(labels=[label_column],axis=1)  # delete label column to prove we're not cheating
# print(test_data_nolab.head())



In [25]:
results = predictor.predict(test_data_nolab[0:1].to_csv(index=False)).splitlines()
print(results)
# Check output
# print(Counter(results))

['2']


##### Predict on data that includes label column  
Prediction performance metrics will be printed to endpoint logs.

In [22]:
results = predictor.predict(test_data_nolab[0:10].to_csv(index=False)).splitlines()
# results = predictor.predict(test_data_nolab.to_csv(index=False)).splitlines()

# Check output
print(Counter(results))

Counter({'2': 8, '0': 1, '3': 1})


##### Check that classification performance metrics match evaluation printed to endpoint logs as expected

##### Clean up endpoint

In [23]:
# predictor.delete_endpoint()