# 引入依赖

In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
import os
import sys
import time
import json
from IPython.display import display
from time import strftime, gmtime
import boto3
import re

!{sys.executable} -m pip install sagemaker -U
!{sys.executable} -m pip install sagemaker-experiments

import sagemaker
from sagemaker.sklearn import SKLearn
from sagemaker import get_execution_role
from sagemaker.local import LocalSession
from sagemaker.predictor import csv_serializer
from sagemaker.debugger import rule_configs, Rule, DebuggerHookConfig
from sagemaker.model_monitor import DataCaptureConfig, DatasetFormat, DefaultModelMonitor
from sagemaker.s3 import S3Uploader, S3Downloader

from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker

Collecting sagemaker
  Downloading sagemaker-2.66.1.tar.gz (450 kB)
[K     |████████████████████████████████| 450 kB 9.1 MB/s eta 0:00:01
Building wheels for collected packages: sagemaker
  Building wheel for sagemaker (setup.py) ... [?25ldone
[?25h  Created wheel for sagemaker: filename=sagemaker-2.66.1-py2.py3-none-any.whl size=624255 sha256=a97ce4992f953303348717e6cc1cede9efd621a5200e12b40b576cc239d68339
  Stored in directory: /root/.cache/pip/wheels/4d/64/29/3e1c68861c8dea948537d8ea3e473414e06bc814bdb22023af
Successfully built sagemaker
Installing collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.65.0
    Uninstalling sagemaker-2.65.0:
      Successfully uninstalled sagemaker-2.65.0
Successfully installed sagemaker-2.66.1


### 打包和部署镜像(该步骤在Cloud9或者本地执行，sagemaker studio不支持docker)
如果执行docker 不存在执行命令 sudo yum install docker ,sudo service docker start

* login skitlear imgage account  , 这一步需要提前执行，否则无法加载到基础镜像
* 1. aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 246618743249.dkr.ecr.us-west-2.amazonaws.com
* 2. docker build -t training-lightgbm .
* 3. docker tag training-lightgbm:latest 517141035927.dkr.ecr.us-west-2.amazonaws.com/training-lightgbm:latest
* 4. docker push 517141035927.dkr.ecr.us-west-2.amazonaws.com/training-lightgbm:latest  ，，2，3，4 步骤命令已自己仓库命令为准


# 模型训练
注意：
1. 替换image_uri 为自己ecr的镜像地址
2. output_path 替换为自己桶的模型输出位置，可以使用 bucket = sess.default_bucket() 输出的桶

In [4]:
hyperparameters = {
    "tree_num_leaves": 31,
    "num_round": 5
}

_estimator = SKLearn(
    image_uri='517141035927.dkr.ecr.us-west-2.amazonaws.com/training-lightgbm:latest',
    entry_point='entry_point.py',
    source_dir='training_code',
    hyperparameters=hyperparameters,
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.c5.xlarge',
    output_path='s3://sagemaker-us-west-2-517141035927/output',
    base_job_name='lightgbm-model-training',
    disable_profiler=True
)

注意data_channels 数据替换为自己桶存储数据的位置

In [5]:
data_channels = {
    'x_train': 's3://sagemaker-us-west-2-517141035927/dataset/CR_train_x.csv',
    'y_train': 's3://sagemaker-us-west-2-517141035927/dataset/CR_train_y.csv',
    'x_test': 's3://sagemaker-us-west-2-517141035927/dataset/CR_test_x.csv',
    'y_test': 's3://sagemaker-us-west-2-517141035927/dataset/CR_test_y.csv'
                }
_estimator.fit(data_channels)

INFO:sagemaker:Creating training-job with name: lightgbm-model-training-2021-10-27-12-17-13-171


2021-10-27 12:17:13 Starting - Starting the training job...
2021-10-27 12:17:15 Starting - Launching requested ML instances...
2021-10-27 12:18:09 Starting - Preparing the instances for training......
2021-10-27 12:19:12 Downloading - Downloading input data...
2021-10-27 12:19:19 Training - Downloading the training image......
2021-10-27 12:20:42 Training - Training image download completed. Training in progress..[34m2021-10-27 12:20:42,909 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-10-27 12:20:42,912 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-10-27 12:20:42,920 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-10-27 12:20:43,186 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-10-27 12:20:49,413 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2

# 批量推理

In [22]:
import sagemaker
from sagemaker.sklearn import SKLearnModel
model_data = "s3://sagemaker-us-west-2-517141035927/output/lightgbm-model-training-2021-10-27-12-17-13-171/output/model.tar.gz"
_model = SKLearnModel(
    model_data=model_data,
    role=sagemaker.get_execution_role(),
    entry_point='entry_point.py',
    source_dir='batch_code',
    framework_version = '0.20.0',
    py_version='py3'
)

In [23]:
from sagemaker import get_execution_role
from time import strftime, gmtime
sagemaker_session = sagemaker.Session()

role = get_execution_role()
region = sagemaker_session.boto_session.region_name

prediction_data_path = 's3://sagemaker-us-west-2-517141035927/dataset/validate_data.csv'
out_predict_data_path = 's3://sagemaker-us-west-2-517141035927/output/'

_instance_type = 'ml.c5.4xlarge'
_instance_count = 1

_max_concurrent_transforms = 1
_max_payload = 10

_job_name = 'lightgbm-batch-{}'.format(strftime("%Y-%m-%d-%H-%M-%S", gmtime()))
print(_job_name)

lightgbm_transformer = _model.transformer(
                            instance_count=_instance_count,
                            instance_type=_instance_type,
                            strategy = 'MultiRecord',
                            max_concurrent_transforms=_max_concurrent_transforms,
                            max_payload=_max_payload,
                            output_path=out_predict_data_path,
                            assemble_with='Line',
                            accept='text/csv')

lightgbm_transformer.transform(
    data=prediction_data_path,
    content_type='text/csv',
    split_type='Line',
    input_filter="$", ##输入选择所有或者删除这个属性 ，否则输入特征和训练特征会不同，导致推理不出来
    join_source="Input",
    output_filter="$[0 ,-1]",
    job_name=_job_name
)
lightgbm_transformer.wait()

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.


lightgbm-batch-2021-10-28-03-20-42


INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2021-10-28-03-20-43-418
INFO:sagemaker:Creating transform job with name: lightgbm-batch-2021-10-28-03-20-42


...........................
[34mProcessing /opt/ml/code[0m
[34mBuilding wheels for collected packages: entry-point
  Building wheel for entry-point (setup.py): started
  Building wheel for entry-point (setup.py): finished with status 'done'
  Created wheel for entry-point: filename=entry_point-1.0.0-py2.py3-none-any.whl size=68773 sha256=a1b98c2c6b2dfa516d80dcc9d73987a4c2d1b65db487d003637557f18993e0ab
  Stored in directory: /tmp/pip-ephem-wheel-cache-7kjnu6t1/wheels/3e/0f/51/2f1df833dd0412c1bc2f5ee56baac195b5be563353d111dca6[0m
[34mSuccessfully built entry-point[0m
[34mInstalling collected packages: entry-point[0m
[34mSuccessfully installed entry-point-1.0.0[0m
[34mCollecting joblib
  Downloading joblib-1.1.0-py2.py3-none-any.whl (306 kB)[0m
[34mCollecting pathlib
  Downloading pathlib-1.0.1.tar.gz (49 kB)[0m
[34mCollecting lightgbm
  Downloading lightgbm-3.3.1-py3-none-manylinux1_x86_64.whl (2.0 MB)[0m
[34mCollecting pandas==1.3.4
  Downloading pandas-1.3.4-cp37-cp37m

In [14]:
!aws s3 cp --recursive $lightgbm_transformer.output_path ./

download: s3://sagemaker-us-west-2-517141035927/output/lightgbm-model-training-2021-10-27-12-17-13-171/debug-output/training_job_end.ts to lightgbm-model-training-2021-10-27-12-17-13-171/debug-output/training_job_end.ts
download: s3://sagemaker-us-west-2-517141035927/output/validate_data.csv.out to ./validate_data.csv.out
download: s3://sagemaker-us-west-2-517141035927/output/lightgbm-model-training-2021-10-26-12-17-52-220/output/model.tar.gz to lightgbm-model-training-2021-10-26-12-17-52-220/output/model.tar.gz
download: s3://sagemaker-us-west-2-517141035927/output/lightgbm-model-training-2021-10-26-12-17-52-220/debug-output/training_job_end.ts to lightgbm-model-training-2021-10-26-12-17-52-220/debug-output/training_job_end.ts
download: s3://sagemaker-us-west-2-517141035927/output/lightgbm-model-training-2021-10-25-11-37-37-645/debug-output/training_job_end.ts to lightgbm-model-training-2021-10-25-11-37-37-645/debug-output/training_job_end.ts
download: s3://sagemaker-us-west-2-5171410

In [16]:
!head -c 10000 validate_data.csv.out

0.0007647735698224176,0.0019096561889231812,0.000764819440755992,0.0007647735698224176,0.0007647735698224176,0.0007648306552935623,0.000764830655293563,0.0007648306552935623,0.000764830655293563,0.000764830655293563,0.0007648364991217725,0.0007586076785576501,0.0007648320479003741,0.0007648306552935623,0.0007648306552935623,0.0007648306552935623,0.0007648306552935623,0.0007648306552935623,0.0007648234687245943,0.0007648320479003741,0.0007648234687245936,0.0007648306552935623,0.0007648306552935623,0.000750498358877397,0.0007648364960800974,0.0007639653732651306,0.0007648364991217719,0.000763364272048218,0.0007648364960800974,0.0007648364960800974,0.0007648364960800974,0.0007648364960800974,0.0007648364960800974,0.0007648234687245957,0.0007648364991217719,0.0007648234687245936,0.0007648364960800994,0.0007647735865600053,0.0007647735698224176,0.0023359793160726483,0.0007647735698224176,0.0007638774595612161,0.0007648366586815664,0.0007648364991217719,0.0007648306552935623,0.00076483065529