# 引入依赖

In [6]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
import os
import sys
import time
import json
from IPython.display import display
from time import strftime, gmtime
import boto3
import re

!{sys.executable} -m pip install sagemaker -U
!{sys.executable} -m pip install sagemaker-experiments

import sagemaker
from sagemaker.sklearn import SKLearn
from sagemaker import get_execution_role
from sagemaker.local import LocalSession
from sagemaker.predictor import csv_serializer
from sagemaker.debugger import rule_configs, Rule, DebuggerHookConfig
from sagemaker.model_monitor import DataCaptureConfig, DatasetFormat, DefaultModelMonitor
from sagemaker.s3 import S3Uploader, S3Downloader

from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.[0m
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.[0m


### 打包和部署镜像(该步骤在Cloud9或者本地执行，sagemaker studio不支持docker)
如果执行docker 不存在执行命令 sudo yum install docker ,sudo service docker start

* login skitlear imgage account  , 这一步需要提前执行，否则无法加载到基础镜像
* 1. aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 246618743249.dkr.ecr.us-west-2.amazonaws.com
* 2. docker build -t training-lightgbm .
* 3. docker tag training-lightgbm:latest 517141035927.dkr.ecr.us-west-2.amazonaws.com/training-lightgbm:latest
* 4. docker push 517141035927.dkr.ecr.us-west-2.amazonaws.com/training-lightgbm:latest  ，，2，3，4 步骤命令已自己仓库命令为准


# 模型训练
注意：
1. 替换image_uri 为自己ecr的镜像地址
2. output_path 替换为自己桶的模型输出位置，可以使用 bucket = sess.default_bucket() 输出的桶

In [7]:
hyperparameters = {
    "tree_num_leaves": 31,
    "num_round": 5
}

_estimator = SKLearn(
    image_uri='517141035927.dkr.ecr.us-west-2.amazonaws.com/training-lightgbm:latest',
    entry_point='entry_point.py',
    source_dir='training_code',
    hyperparameters=hyperparameters,
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.c5.xlarge',
    output_path='s3://sagemaker-us-west-2-517141035927/output',
    base_job_name='lightgbm-model-training',
    disable_profiler=True
)

注意data_channels 数据替换为自己桶存储数据的位置

In [8]:
data_channels = {
    'x_train': 's3://sagemaker-us-west-2-517141035927/dataset/CR_train_x.csv',
    'y_train': 's3://sagemaker-us-west-2-517141035927/dataset/CR_train_y.csv',
    'x_test': 's3://sagemaker-us-west-2-517141035927/dataset/CR_test_x.csv',
    'y_test': 's3://sagemaker-us-west-2-517141035927/dataset/CR_test_y.csv'
                }
_estimator.fit(data_channels)

INFO:sagemaker:Creating training-job with name: lightgbm-model-training-2021-10-26-12-17-52-220


2021-10-26 12:17:52 Starting - Starting the training job...
2021-10-26 12:17:54 Starting - Launching requested ML instances...
2021-10-26 12:18:50 Starting - Preparing the instances for training.........
2021-10-26 12:20:21 Downloading - Downloading input data
2021-10-26 12:20:21 Training - Downloading the training image.........
2021-10-26 12:21:50 Uploading - Uploading generated training model.[34m2021-10-26 12:21:45,401 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-10-26 12:21:45,403 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-10-26 12:21:45,411 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-10-26 12:21:45,718 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-10-26 12:21:45,940 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-10-26 12:21:45

# 批量推理

In [9]:
sess = sagemaker.Session()
bucket = sess.default_bucket()
prefix = 'sagemaker/DEMO-batch-transform'

gbm_transformer = _estimator.transformer(instance_count=1,
                                  instance_type='ml.m4.xlarge',
                                  strategy='MultiRecord',
                                  assemble_with='Line',
                                  output_path='s3://{}/{}/pca/transform/train'.format(bucket, prefix))

INFO:sagemaker:Creating model with name: lightgbm-model-training-2021-10-26-12-24-20-465


In [None]:
train_s3 = 's3://sagemaker-us-west-2-517141035927/dataset/X_train.csv'
gbm_transformer.transform(train_s3, content_type='text/csv', split_type='Line')
gbm_transformer.wait()

INFO:sagemaker:Creating transform job with name: lightgbm-model-training-2021-10-26-12-25-23-787


..............................

In [None]:
!aws s3 cp --recursive $pca_transformer.output_path ./

In [None]:
!head train.csv.out

In [None]:
pca_model = sess.create_model_from_job(pca._current_job_name, name='{}-test'.format(pca._current_job_name))

In [None]:
pca_test_transformer = Transformer(pca_model,
                                   1,
                                   'ml.m4.xlarge',
                                   output_path='s3://{}/{}/pca/transform/test'.format(bucket, prefix),
                                   sagemaker_session=sess,
                                   strategy='MultiRecord',
                                   assemble_with='Line')
pca_test_transformer.transform(test_s3, content_type='text/csv', split_type='Line')
pca_test_transformer.wait()

In [None]:
!aws s3 cp --recursive $pca_test_transformer.output_path ./

In [None]:
!head -c 10000 test.csv.out