# Train Model using Boto3
- AWS Boto3: create_training_job
    - https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateTrainingJob.html
- Step 5: Train a Model    
    - Amazon SageMaker Python SDK, AWS SDK for Python (Boto3) 의 두가지 방법을 통한 모델 훈련 방법
    - https://docs.aws.amazon.com/sagemaker/latest/dg/ex1-train-model.html
- How to install external package     
    - https://sagemaker.readthedocs.io/en/stable/frameworks/mxnet/using_mxnet.html#use-third-party-libraries    

In [50]:
%store -r no_auto_train_file
%store -r no_auto_test_file

In [51]:
import sagemaker
from sagemaker import get_execution_role
import json
import os
import boto3
import datetime
import logging
import pandas as pd



In [52]:
# Project name
project_name = 'predict-deliver-time'
now = datetime.datetime.now
str_time = now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
# Region
region = boto3.Session().region_name
# Bucket
bucket = sagemaker.Session().default_bucket() 
# role
role = get_execution_role()

train_job_name = project_name + '-'+ str_time

In [53]:
from utils import upload_s3

# Upload train. test file
data_prefix = 'autogluon/predict-deliver-time/data'
train_file = no_auto_train_file.split('/')[-1] # 파일 이름만 추출
test_file = no_auto_test_file.split('/')[-1]

s3_train_file_path = upload_s3(bucket, no_auto_train_file, data_prefix)
s3_test_file_path = upload_s3(bucket, no_auto_test_file, data_prefix)
print("s3_train_file_path: ", s3_train_file_path)
print("s3_test_file_path: ", s3_test_file_path)

s3_train_file_path:  s3://sagemaker-ap-northeast-2-057716757052/autogluon/predict-deliver-time/data/preproc_data/auto_no_fe/train.csv
s3_test_file_path:  s3://sagemaker-ap-northeast-2-057716757052/autogluon/predict-deliver-time/data/preproc_data/auto_no_fe/test.csv


In [54]:
## Train Rows
df = pd.read_csv(no_auto_train_file)
train_rows = df.shape[0]
train_rows = 500

In [55]:
## S3 output path
output_prefix = 'autogluon/predict-deliver-time/output'
s3_output_path = 's3://{}/{}/'.format(bucket, output_prefix)
print("s3_output_path: ", s3_output_path)

hyperparameters = {
    'train_filename':train_file,
    'test_filename':test_file, 
    's3_output' :  s3_output_path,
    'train_job_name' : train_job_name,
    'label_column' : 'classes', # label column name
    'train_rows': train_rows  
}


s3_output_path:  s3://sagemaker-ap-northeast-2-057716757052/autogluon/predict-deliver-time/output/


In [None]:
sess = sagemaker.Session()

In [58]:
from sagemaker.mxnet import MXNet

# Resource Type
instance_type = 'ml.c5.9xlarge'
instance_type = 'local'
training_image = '763104351884.dkr.ecr.{}.amazonaws.com/mxnet-training:1.6.0-cpu-py3'.format(region)

mx_estimator = MXNet(entry_point = 'autogluon_train.py',
                     source_dir = 'src',
                     image_uri = training_image,
                     role = role,
                     instance_type = instance_type,
                     instance_count = 1,
                     hyperparameters = hyperparameters,
                     session = sess
                    )

                     
                     

In [59]:
%%time
from sagemaker.inputs import TrainingInput

s3_train = TrainingInput(s3_train_file_path, distribution='FullyReplicated') 
s3_test = TrainingInput(s3_test_file_path, distribution='FullyReplicated')
mx_estimator.fit( {'train': s3_train_file_path, 'test': s3_test_file_path})

Creating tmp17nq_slw_algo-1-79rwe_1 ... 
[1BAttaching to tmp17nq_slw_algo-1-79rwe_12mdone[0m
[36malgo-1-79rwe_1  |[0m 2020-11-14 11:48:14,194 sagemaker-training-toolkit INFO     Imported framework sagemaker_mxnet_container.training
[36malgo-1-79rwe_1  |[0m 2020-11-14 11:48:14,196 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-79rwe_1  |[0m 2020-11-14 11:48:14,206 sagemaker_mxnet_container.training INFO     MXNet training environment: {'SM_HOSTS': '["algo-1-79rwe"]', 'SM_NETWORK_INTERFACE_NAME': 'eth0', 'SM_HPS': '{"label_column":"classes","s3_output":"s3://sagemaker-ap-northeast-2-057716757052/autogluon/predict-deliver-time/output/","test_filename":"test.csv","train_filename":"train.csv","train_job_name":"predict-deliver-time-2020-11-14-11-47-41-216","train_rows":500}', 'SM_USER_ENTRY_POINT': 'autogluon_train.py', 'SM_FRAMEWORK_PARAMS': '{}', 'SM_RESOURCE_CONFIG': '{"current_host":"algo-1-79rwe","hosts":["algo-1-79rwe"]}', 'SM_INPU

## Store 