# Train Model using Local BYOS
- AWS Boto3: create_training_job
    - https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateTrainingJob.html
- Step 5: Train a Model    
    - Amazon SageMaker Python SDK, AWS SDK for Python (Boto3) 의 두가지 방법을 통한 모델 훈련 방법
    - https://docs.aws.amazon.com/sagemaker/latest/dg/ex1-train-model.html
- How to install external package     
    - https://sagemaker.readthedocs.io/en/stable/frameworks/mxnet/using_mxnet.html#use-third-party-libraries    

In [1]:
%store -r no_auto_train_file
%store -r no_auto_test_file

In [2]:
import sagemaker
from sagemaker import get_execution_role
import json
import os
import boto3
import datetime
import logging
import pandas as pd



In [3]:
# Project name
project_name = 'predict-deliver-time'
now = datetime.datetime.now
str_time = now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
# Region
region = boto3.Session().region_name
# Bucket
bucket = sagemaker.Session().default_bucket() 
# role
role = get_execution_role()

train_job_name = project_name + '-'+ str_time

In [4]:
from utils import upload_s3

# Upload train. test file
data_prefix = 'autogluon/predict-deliver-time/data'
train_file = no_auto_train_file.split('/')[-1] # 파일 이름만 추출
test_file = no_auto_test_file.split('/')[-1]

s3_train_file_path = upload_s3(bucket, no_auto_train_file, data_prefix)
s3_test_file_path = upload_s3(bucket, no_auto_test_file, data_prefix)
print("s3_train_file_path: ", s3_train_file_path)
print("s3_test_file_path: ", s3_test_file_path)

s3_train_file_path:  s3://sagemaker-ap-northeast-2-057716757052/autogluon/predict-deliver-time/data/preproc_data/auto_no_fe/train/train.csv
s3_test_file_path:  s3://sagemaker-ap-northeast-2-057716757052/autogluon/predict-deliver-time/data/preproc_data/auto_no_fe/test/test.csv


In [5]:
## Train Rows
df = pd.read_csv(no_auto_train_file)
train_rows = df.shape[0]
train_rows = 500

In [6]:
## S3 output path
output_prefix = 'autogluon/predict-deliver-time/output'
s3_output_path = 's3://{}/{}/'.format(bucket, output_prefix)
print("s3_output_path: ", s3_output_path)

# Define required label and optional additional parameters
# fit_args = {
#   'label': 'classes',
#   # Adding 'best_quality' to presets list will result in better performance (but longer runtime)
#   'presets': ['optimize_for_deployment'],
# }

hyperparameters = {
    'presets': 'optimize_for_deployment',
    'feature_importance': True,    
#     'train_filename':train_file,
#     'test_filename':test_file, 
#     's3_output' :  s3_output_path,
#    'train_job_name' : train_job_name,
    'label' : 'classes', # label column name
    'train_rows': train_rows  
}


s3_output_path:  s3://sagemaker-ap-northeast-2-057716757052/autogluon/predict-deliver-time/output/


In [7]:
# Resource Type
# instance_type = 'ml.c5.9xlarge'
instance_type = 'local'


In [8]:
if instance_type == 'local':
    sess = None
    print("sess: ", sess)
else:
    sess = sagemaker.Session()
    print("sess: ", sess)    

sess:  None


In [9]:
%%time 

from sagemaker.mxnet import MXNet

training_image = '763104351884.dkr.ecr.{}.amazonaws.com/mxnet-training:1.6.0-cpu-py3'.format(region)

mx_estimator = MXNet(
                     entry_point = 'train.py',
                     source_dir = 'src',
                     framework_version='1.6.0',
                     py_version='py3',
                     role = role,
                     instance_type = instance_type,
                     instance_count = 1,
                     hyperparameters = hyperparameters,
                     session = sess
                    )

from sagemaker.inputs import TrainingInput

s3_train = TrainingInput(s3_train_file_path, distribution='FullyReplicated') 
s3_test = TrainingInput(s3_test_file_path, distribution='FullyReplicated')
mx_estimator.fit( {'train': s3_train_file_path, 'test': s3_test_file_path})

                     
                     

Creating tmpi6356bnr_algo-1-oadji_1 ... 
[1BAttaching to tmpi6356bnr_algo-1-oadji_12mdone[0m
[36malgo-1-oadji_1  |[0m 2020-11-17 01:00:26,052 sagemaker-training-toolkit INFO     Imported framework sagemaker_mxnet_container.training
[36malgo-1-oadji_1  |[0m 2020-11-17 01:00:26,054 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-oadji_1  |[0m 2020-11-17 01:00:26,064 sagemaker_mxnet_container.training INFO     MXNet training environment: {'SM_HOSTS': '["algo-1-oadji"]', 'SM_NETWORK_INTERFACE_NAME': 'eth0', 'SM_HPS': '{"feature_importance":true,"label":"classes","presets":"optimize_for_deployment","train_rows":500}', 'SM_USER_ENTRY_POINT': 'train.py', 'SM_FRAMEWORK_PARAMS': '{}', 'SM_RESOURCE_CONFIG': '{"current_host":"algo-1-oadji","hosts":["algo-1-oadji"]}', 'SM_INPUT_DATA_CONFIG': '{"test":{"TrainingInputMode":"File"},"train":{"TrainingInputMode":"File"}}', 'SM_OUTPUT_DATA_DIR': '/opt/ml/output/data', 'SM_CHANNELS': '["test","train"]

## Create Model

In [19]:
# Create predictor object
from sagemaker.predictor import RealTimePredictor, csv_serializer, StringDeserializer

class AutoGluonTabularPredictor(RealTimePredictor):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, content_type='text/csv', 
                         serializer=csv_serializer, 
                         deserializer=StringDeserializer(), **kwargs)

In [20]:
from sagemaker.mxnet import MXNetModel

## 모델 결과 파일 저장
# model_artifact='s3://{}/{}/output/model.tar.gz'.format(bucket, train_byos_job_name)
# print('model_artifact: ', model_artifact)                                                                                                              

# training_image, inferene_image 가 동일
inference_image = '057716757052.dkr.ecr.ap-northeast-2.amazonaws.com/autogluon-sagemaker-inference:latest'

if instance_type == 'local':
    model = estimator.create_model(image=inference_image, role=role)
    print("local")

else:
    print("not local")
    pass
#     model_uri = os.path.join(estimator.output_path, estimator._current_job_name, "output", "model.tar.gz")
#     model = Model(model_uri, ecr_image, role=role, sagemaker_session=session, 
#                   predictor_cls=AutoGluonTabularPredictor)



NameError: name 'estimator' is not defined

In [12]:
# predictor = model.deploy(initial_instance_count=1, 
#                          instance_type=instance_type)

In [13]:
# from sagemaker import local
# local_session = local.LocalSession()


In [14]:
# # Select standard or local session based on instance_type
# if instance_type == 'local': 
#     sess = local_session
# else: 
#     sess = session

# # Attach to endpoint
# predictor = AutoGluonTabularPredictor(predictor.endpoint, sagemaker_session=sess)

In [15]:
# %store -r no_auto_test_file
# test_pd = pd.read_csv(no_auto_test_file)
# label_column = 'classes'
# y_test = test_pd[label_column]
# X_test = test_pd.drop(columns=[label_column])
# X_test = X_test[0:1].to_csv(index=False)
# X_test

In [16]:
# from sagemaker.serializers import CSVSerializer
# from sagemaker.deserializers import JSONDeserializer
# CSVSerializer.ACCEPT = 'text/csv'
# predictor.serializer = CSVSerializer()
# JSONDeserializer.ACCEPT = 'application/json'
# predictor.deserializer = JSONDeserializer()

In [17]:
# results = predictor.predict(X_test)

In [18]:
# results.splitlines()

## Store 