# BYOC(Bring Your Own Container) - LightGBM
----

이 노트북은 SageMaker에서 지원하지 않는 LightGBM 알고리즘을 Docker 이미지로 빌드 후, Amazon ECR로 푸시하여 iris dataset에서
간단한 학습과 추론른 수행하는 예시입니다.

In [1]:
!pip install lightgbm joblib

[33mYou are using pip version 10.0.1, however version 20.2b1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [12]:
import boto3
import re
import os
from os import path
import numpy as np
import pandas as pd
from sagemaker import get_execution_role
import sagemaker as sage
import lightgbm as lgb
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics
import json

prefix = 'sagemaker/byom-lightgbm/'
role = get_execution_role()
sess = sage.Session()
bucket_name = sess.default_bucket()


먼저, 로컬에서 도커 이미지 빌드를 `docker build` 명령으로 수행합니다.
이미지 빌드가 완료되면, 도커의 로컬 인스턴스에게 이미지를 저장할 위치를 태깅을 통해 알려주어야 합니다. 

```
$ docker build <image name>
$ docker tag <image name> <repository name> <account number>.dkr.ecr.<region>.amazonaws.com/<image name>:<tag>
```
저장소 이름을 지닌 이미지에 태깅을 하지 않을 경우, 도커는 기본 설정에 맞춰서 Amazon ECR이 아니라 Docker hub에 업로드하게 됩니다. Amazon SageMaker는 현재 도커 이미지를 Amazon ECR에 올리도록 되어 있습니다. 이미지를 Doker hub가 아니라 ECR에 푸시하려면, 저장소의 호스트 이름을 가지고 태깅 작업을 해야 합니다. Amazon ECR에 업로드하는 방법은, 아래 코드 셀을 참조해 주세요.

In [3]:
%%sh

algorithm_name=sagemaker-lightgbm
 
chmod +x lightgbm/train
chmod +x lightgbm/serve
 
account=$(aws sts get-caller-identity --query Account --output text)
 
region=$(aws configure get region)
region=${region:-us-west-2}
 

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"
 
aws --region ${region} ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
 
if [ $? -ne 0 ]
then
    aws --region ${region} ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

$(aws ecr get-login --region ${region} --no-include-email)
 
docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}
 
docker push ${fullname}

Login Succeeded
Sending build context to Docker daemon  111.6kB
Step 1/11 : FROM ubuntu:18.04
 ---> c3c304cb4f22
Step 2/11 : RUN apt -y update && apt install -y --no-install-recommends     wget     python3-distutils     nginx     ca-certificates     libgomp1     && apt clean
 ---> Using cache
 ---> 00b6b901760e
Step 3/11 : RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py &&     pip install wheel numpy scipy scikit-learn pandas lightgbm flask gevent gunicorn &&     rm -rf /root/.cache
 ---> Using cache
 ---> 08b8a7b618d8
Step 4/11 : RUN ln -s /usr/bin/python3 /usr/bin/python
 ---> Using cache
 ---> d91c6f662327
Step 5/11 : RUN ln -s /usr/bin/pip3 /usr/bin/pip
 ---> Using cache
 ---> a41526b455cc
Step 6/11 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> 63ac8e60802a
Step 7/11 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> 2fdd466d522b
Step 8/11 : ENV PATH="/opt/program:${PATH}"
 ---> Using cache
 ---> abef54f90fd3
Step 9/11 : COPY lightgbm /opt/progra

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



## 1. Data Preparation
----

In [13]:
iris = datasets.load_iris()
 

train_x, valid_x, train_y, valid_y = train_test_split(iris.data, iris.target, test_size=0.2, 
                                                                stratify=iris.target)

LightGBM에 최적화된 binary 데이터셋을 생성합니다.

In [17]:
train = lgb.Dataset(train_x, label=train_y)
#valid = lgb.Dataset(valid_x, label=valid_y) 

valid = train.create_valid(valid_x, label=valid_y)
    
train_data_local = './data/train.bin'
valid_data_local = './data/valid.bin'
 
train.save_binary(train_data_local)
valid.save_binary(valid_data_local)

<lightgbm.basic.Dataset at 0x7f4d1b6a1208>

In [None]:
# # lgb用データセットを作成する
# train = lgb.Dataset(train_x, label=train_y)

# # validationデータは学習用データと関連づける
# validation = train.create_valid(validation_x, label=validation_y)

# # ローカルの保存場所
# train_data_local = './data/train.bin'
# val_data_local = './data/validation.bin'

# # バイナリ形式で保存する
# train.save_binary(train_data_local)
# validation.save_binary(val_data_local)


In [18]:
train = lgb.Dataset(train_data_local)
valid = lgb.Dataset(valid_data_local) 

In [9]:
train = lgb.Dataset(train_x, label=train_y)
valid = lgb.Dataset(valid_x, label=valid_y) 

In [19]:
hyperparams = {
    'objective':'multiclass',
    'num_class':3,
    'verbose': 1
}


In [None]:
# Training
model = lgb.train(
    params=hyperparams, 
    train_set=train,
    valid_sets=[train, valid]
)

In [34]:
train_dir = './data/'
valid_dir = './data/'
train_filepath = os.path.join(train_dir, 'train.bin')
valid_filepath = os.path.join(valid_dir, 'valid.bin')
print(train_filepath, valid_filepath)
dtrain = lgb.Dataset(train_filepath)
dvalid = lgb.Dataset(valid_filepath)       
valid_list = [dtrain, dvalid]

./data/train.bin ./data/valid.bin


In [35]:
hyperparams = {
    'objective':'multiclass',
    'num_class':3,
    'verbose': 1
}


In [None]:
# Training
model = lgb.train(
    params=hyperparams, 
    train_set=dtrain,
    valid_sets=valid_list
)

In [5]:
hyperparams = {'num_round': 10,
    'objective':'multiclass',
    'num_class':3
}


train_filepath = os.path.join(train_dir, 'train.bin')
valid_filepath = os.path.join(valid_dir, 'valid.bin')
print(train_filepath, valid_filepath)
dtrain = lgb.Dataset(train_filepath)
dvalid = lgb.Dataset(valid_filepath)       

./data/train.bin ./data/valid.bin


In [None]:
# Training
model = lgb.train(
    params=hyperparams, 
    train_set=dtrain
)




In [6]:
train_data_s3 = sess.upload_data(train_data_local, key_prefix=path.join(prefix, 'input/train'), bucket=bucket_name)
valid_data_s3 = sess.upload_data(valid_data_local, key_prefix=path.join(prefix, 'input/valid'), bucket=bucket_name)

## 2. Training
----

In [7]:
params = dict(
    num_round = 10,
    objective = 'multiclass',
    num_class = len(iris.target_names)
)
metric_definitions = [dict(
    Name = 'multilogloss',
    Regex = '.*\\[[0-9]+\\].*valid_[0-9]+\'s\\smulti_logloss: (\\S+)'
)]

In [8]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name

In [None]:
modelartifact_path = "s3://"+path.join(bucket_name, prefix, 'output')
print(modelartifact_path)
model = sage.estimator.Estimator(
    '{}.dkr.ecr.{}.amazonaws.com/sagemaker-lightgbm:latest'.format(account, region),
    role,
    1, # number of instances
    'ml.c4.2xlarge', 
    output_path=modelartifact_path,
    sagemaker_session=sess,
    metric_definitions=metric_definitions
)
 
model.set_hyperparameters(**params)
 
model.fit(dict(
    train = train_data_s3
))

s3://sagemaker-us-west-2-143656149352/sagemaker/byom-lightgbm/output
2020-06-13 14:55:11 Starting - Starting the training job...
2020-06-13 14:55:13 Starting - Launching requested ML instances.

In [13]:
modelartifact_path = "s3://"+path.join(bucket_name, prefix, 'output')
print(modelartifact_path)
model = sage.estimator.Estimator(
    '{}.dkr.ecr.{}.amazonaws.com/sagemaker-lightgbm:latest'.format(account, region),
    role,
    1, # number of instances
    'ml.c4.2xlarge', 
    output_path=modelartifact_path,
    sagemaker_session=sess,
    metric_definitions=metric_definitions
)
 
model.set_hyperparameters(**params)
 
model.fit(dict(
    train = train_data_s3,
    valid = valid_data_s3
))

s3://practice-daekeun/sagemaker/byom-lightgbm/output
2019-08-15 08:38:09 Starting - Starting the training job...
2019-08-15 08:38:11 Starting - Launching requested ML instances......
2019-08-15 08:39:16 Starting - Preparing the instances for training...
2019-08-15 08:40:03 Downloading - Downloading input data...
2019-08-15 08:40:38 Training - Training image download completed. Training in progress..
[31mStarting the training.[0m
[31m[LightGBM] [Info] Total Bins 85[0m
[31m[LightGBM] [Info] Number of data: 120, number of used features: 4[0m
[31m[LightGBM] [Info] Start training from score -1.098612[0m
[31m[LightGBM] [Info] Start training from score -1.098612[0m
[31m[LightGBM] [Info] Start training from score -1.098612[0m
[31m[1]#011training's multi_logloss: 0.976374#011valid_1's multi_logloss: 0.979219[0m
[31m[2]#011training's multi_logloss: 0.873532#011valid_1's multi_logloss: 0.878614[0m
[31m[3]#011training's multi_logloss: 0.786093#011valid_1's multi_logloss: 0.79283[

## 3. Prediction
----

In [10]:
from sagemaker.predictor import csv_serializer
predictor = model.deploy(1, 'ml.m4.xlarge', serializer=csv_serializer)

---------------------------------------------------------------------------------------!

In [11]:
result = predictor.predict(validation_x)
result = json.loads(result)
result

{'results': [[0.7257590295228772, 0.13655327430896527, 0.13768769616815751],
  [0.15715624612525764, 0.6611616462968629, 0.1816821075778795],
  [0.1416324830383643, 0.14817439774286004, 0.7101931192187756],
  [0.7257590295228772, 0.13655327430896527, 0.13768769616815751],
  [0.15157895240496283, 0.6877240205579943, 0.16069702703704303],
  [0.7012227664352552, 0.14751818565326474, 0.15125904791148015],
  [0.15157895240496283, 0.6877240205579943, 0.16069702703704303],
  [0.1418854741073229, 0.2585667619465168, 0.5995477639461603],
  [0.15157895240496283, 0.6877240205579943, 0.16069702703704303],
  [0.1486892054284723, 0.39355273240652433, 0.4577580621650033],
  [0.15805570862923718, 0.17406798279313812, 0.6678763085776247],
  [0.7012227664352552, 0.14751818565326474, 0.15125904791148015],
  [0.7006628780463077, 0.14740040037409344, 0.15193672157959892],
  [0.13417928359075812, 0.35296708925318, 0.5128536271560619],
  [0.1369826720223735, 0.1489770643551188, 0.7140402636225076],
  [0.1462

### Delete endpoint

In [12]:
sess.delete_endpoint(predictor.endpoint)