In [8]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
import os
import sys
import time
import json
from IPython.display import display
from time import strftime, gmtime
import boto3
import re

!{sys.executable} -m pip install sagemaker -U
!{sys.executable} -m pip install sagemaker-experiments

import sagemaker
from sagemaker.sklearn import SKLearn
from sagemaker import get_execution_role
from sagemaker.local import LocalSession
from sagemaker.predictor import csv_serializer
from sagemaker.debugger import rule_configs, Rule, DebuggerHookConfig
from sagemaker.model_monitor import DataCaptureConfig, DatasetFormat, DefaultModelMonitor
from sagemaker.s3 import S3Uploader, S3Downloader

from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker

Collecting sagemaker
  Downloading sagemaker-2.66.1.tar.gz (450 kB)
[K     |████████████████████████████████| 450 kB 23 kB/s eta 0:00:013
Building wheels for collected packages: sagemaker
  Building wheel for sagemaker (setup.py) ... [?25ldone
[?25h  Created wheel for sagemaker: filename=sagemaker-2.66.1-py2.py3-none-any.whl size=624253 sha256=9ba29e0d1b3bd5aa514650ed6c4457cd6e71e6be4b78c3010d0bc3e118f4377a
  Stored in directory: /home/ec2-user/.cache/pip/wheels/76/e2/c5/05eca6d01d7b18bb49b111b63c6692debfea4c5cc4191542ff
Successfully built sagemaker
Installing collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.59.5
    Uninstalling sagemaker-2.59.5:
      Successfully uninstalled sagemaker-2.59.5
Successfully installed sagemaker-2.66.1


### 打包和部署镜像

In [28]:
%%time
!aws ecr get-login-password --region cn-northwest-1 | docker login --username AWS --password-stdin 451049120500.dkr.ecr.cn-northwest-1.amazonaws.com.cn
!bash build_push.sh

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  3.459MB
Step 1/3 : FROM 451049120500.dkr.ecr.cn-northwest-1.amazonaws.com.cn/sagemaker-scikit-learn:0.20.0-cpu-py3
 ---> 6c10beb91746
Step 2/3 : COPY requirements.txt /requirements.txt
 ---> Using cache
 ---> c3059de34c94
Step 3/3 : RUN pip install --no-cache -r /requirements.txt -i https://pypi.douban.com/simple &&     rm /requirements.txt
 ---> Running in fabcd9b9c016
Looking in indexes: https://pypi.douban.com/simple
Collecting lightgbm
  Downloading https://pypi.doubanio.com/packages/18/b2/fff8370f48549ce223f929fe8cab4ee6bf285a41f86037d91312b48ed95b/lightgbm-3.2.1-py3-none-manylinux1_x86_64.whl (2.0 MB)
Collecting joblib
  Downloading https://pypi.doubanio.com/packages/55/85/70c6602b078bd9e6f3da4f467047e906525c355a4dacd4f71b97a35d9897/joblib-1.0.1-py3-none

### 模型训练

In [9]:
hyperparameters = {
    "tree_n_estimators": 20000,
    "tree_max_depth": 2,
    "tree_num_leaves": 31,
    "tree_min_child_samples": 1,
    "tree_boosting_type": "dart",
    "min_child_weight":0.0001
}

_metric_definitions = []
_metric_definitions.append({'Name':'validation_l2','Regex':' l2: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'})

_estimator = SKLearn(
    image_uri='542319707026.dkr.ecr.cn-northwest-1.amazonaws.com.cn/training-lightgbm',
    entry_point='entry_point.py',
    source_dir='training_code',
    hyperparameters=hyperparameters,
    metric_definitions=_metric_definitions,
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.c5.xlarge',
    output_path='s3://sagemaker-cn-northwest-1-542319707026/baozun/lightgbm/output',
    base_job_name='lightgbm-model-training',
    disable_profiler=True
)

In [10]:
data_channels = {
    'x_train': 's3://sagemaker-cn-northwest-1-542319707026/baozun/datasets/X_train.csv',
    'y_train': 's3://sagemaker-cn-northwest-1-542319707026/baozun/datasets/y_train.csv',
    'x_test': 's3://sagemaker-cn-northwest-1-542319707026/baozun/datasets/X_test.csv',
    'y_test': 's3://sagemaker-cn-northwest-1-542319707026/baozun/datasets/y_test.csv'
                }

_estimator.fit(data_channels)

2021-08-13 08:34:02 Starting - Starting the training job...
2021-08-13 08:34:04 Starting - Launching requested ML instances...
2021-08-13 08:34:52 Starting - Preparing the instances for training......
2021-08-13 08:35:53 Downloading - Downloading input data...
2021-08-13 08:36:08 Training - Downloading the training image........[34m2021-08-13 08:37:37,741 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-08-13 08:37:37,743 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-08-13 08:37:37,751 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-08-13 08:37:38,055 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-08-13 08:37:38,670 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-08-13 08:37:38,681 sagemaker-training-toolkit INFO     No GPUs detected (normal if 

### 超参调优

In [16]:
### Hyperparameter Tuning

from time import gmtime, strftime
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

_tuning_job_name = "bz-lightgbm-job-{}".format(strftime("%d-%H-%M-%S", gmtime()))

_hyperparameter_ranges = {
                        'tree_max_depth': IntegerParameter(2, 9),
                        'tree_num_leaves':IntegerParameter(25,35),
                        'tree_min_child_samples':IntegerParameter(20,25),
                        'min_child_weight':ContinuousParameter(0.001,0.01)
                        }


_tuner = HyperparameterTuner(estimator = _estimator,
                            objective_metric_name = 'validation_l2',
                            metric_definitions = [{'Name':'validation_l2','Regex':' l2: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}],
                            hyperparameter_ranges = _hyperparameter_ranges,
                            objective_type='Minimize',
                            max_jobs=10,
                            strategy = 'Bayesian',
                            max_parallel_jobs=5)

_tuner.fit(data_channels, 
          job_name=_tuning_job_name,
          include_cls_metadata=False)
_tuner.wait()

tuner_metrics = sagemaker.HyperparameterTuningJobAnalytics(_tuning_job_name)
tuner_metrics.dataframe().sort_values(['FinalObjectiveValue'], ascending=False).head(5)

................................................................................................................................................................................................................................................................!
!


Unnamed: 0,min_child_weight,tree_max_depth,tree_min_child_samples,tree_num_leaves,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
19,0.008552,4.0,24.0,34.0,bz-lightgbm-job-13-09-11-40-001-2048998c,Completed,0.00101,2021-08-13 09:13:45+00:00,2021-08-13 09:16:27+00:00,162.0
17,0.009498,8.0,25.0,25.0,bz-lightgbm-job-13-09-11-40-003-0f4379bd,Completed,0.000973,2021-08-13 09:13:55+00:00,2021-08-13 09:17:02+00:00,187.0
6,0.002503,2.0,23.0,29.0,bz-lightgbm-job-13-09-11-40-014-21ca8141,Completed,0.000948,2021-08-13 09:24:14+00:00,2021-08-13 09:26:33+00:00,139.0
18,0.004052,4.0,25.0,32.0,bz-lightgbm-job-13-09-11-40-002-a54b4433,Completed,0.000919,2021-08-13 09:14:00+00:00,2021-08-13 09:16:41+00:00,161.0
1,0.004967,5.0,21.0,25.0,bz-lightgbm-job-13-09-11-40-019-85a8dd3c,Completed,0.000854,2021-08-13 09:29:15+00:00,2021-08-13 09:32:20+00:00,185.0


In [18]:
tuner_metrics = sagemaker.HyperparameterTuningJobAnalytics(_tuning_job_name)

In [20]:
tuner_metrics.dataframe().sort_values(['FinalObjectiveValue'], ascending=True).head(5)

Unnamed: 0,min_child_weight,tree_max_depth,tree_min_child_samples,tree_num_leaves,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
15,0.003259,3.0,22.0,28.0,bz-lightgbm-job-13-09-11-40-005-6c6207ad,Completed,0.000727,2021-08-13 09:14:17+00:00,2021-08-13 09:16:49+00:00,152.0
13,0.001,3.0,22.0,26.0,bz-lightgbm-job-13-09-11-40-007-4275574b,Completed,0.000727,2021-08-13 09:19:31+00:00,2021-08-13 09:21:59+00:00,148.0
9,0.001218,2.0,21.0,30.0,bz-lightgbm-job-13-09-11-40-011-e1ffe1c5,Completed,0.000733,2021-08-13 09:23:49+00:00,2021-08-13 09:26:31+00:00,162.0
2,0.009292,2.0,21.0,35.0,bz-lightgbm-job-13-09-11-40-018-79f394d8,Completed,0.000733,2021-08-13 09:29:24+00:00,2021-08-13 09:32:07+00:00,163.0
3,0.004865,2.0,21.0,29.0,bz-lightgbm-job-13-09-11-40-017-78387cda,Completed,0.000733,2021-08-13 09:28:57+00:00,2021-08-13 09:31:21+00:00,144.0


### 模型在线服务

In [9]:
_estimartor = sagemaker.estimator.Estimator.attach('bz-lightgbm-job-13-09-11-40-005-6c6207ad')


2021-08-13 09:16:49 Starting - Preparing the instances for training
2021-08-13 09:16:49 Downloading - Downloading input data
2021-08-13 09:16:49 Training - Training image download completed. Training in progress.
2021-08-13 09:16:49 Uploading - Uploading generated training model
2021-08-13 09:16:49 Completed - Training job completed


In [56]:
from sagemaker.sklearn import SKLearnModel

_endpoint_name = "endpoint-lightgbm-{}".format(strftime("%d-%H-%M-%S", gmtime()))

_model = SKLearnModel(
    model_data=_estimartor.model_data,
    role=sagemaker.get_execution_role(),
    entry_point='entry_point.py',
    source_dir='inference_code',
    framework_version = '0.20.0',
    py_version='py3'
)

In [57]:
_model.deploy(
    endpoint_name=_endpoint_name,
    instance_type='ml.c5.xlarge',
    initial_instance_count=1
)

--------------!

<sagemaker.sklearn.model.SKLearnPredictor at 0x7f8796c112e8>

### 模型在线服务调用

In [61]:
import boto3
import json

runtime_client = boto3.client('runtime.sagemaker', region_name='cn-northwest-1')

data = {'inputs':[[0.06349206349206349,533.0,1235.0,10.0,0.0,36.0,10.0,10.0,8.0,10.0,1290.0,8.0,8.0,8.0,468.63]]}

response = runtime_client.invoke_endpoint(EndpointName=_endpoint_name, 
                                   ContentType='application/json', 
                                   Body=json.dumps(data).encode('utf-8'))

result = response['Body'].read().decode('ascii')

print('Predicted label is {}'.format(result))

Predicted label is {"predictions":[0.01094387974143577]}



### 模型批量转换

In [40]:
_estimartor = sagemaker.estimator.Estimator.attach('bz-lightgbm-job-13-09-11-40-005-6c6207ad')


2021-08-13 09:16:49 Starting - Preparing the instances for training
2021-08-13 09:16:49 Downloading - Downloading input data
2021-08-13 09:16:49 Training - Training image download completed. Training in progress.
2021-08-13 09:16:49 Uploading - Uploading generated training model
2021-08-13 09:16:49 Completed - Training job completed


In [37]:
import sagemaker
from sagemaker.sklearn import SKLearnModel
model_data = "s3://sagemaker-us-west-2-517141035927/output/lightgbm-model-training-2021-10-26-12-17-52-220/output/model.tar.gz"
_model = SKLearnModel(
    model_data=model_data,
    role=sagemaker.get_execution_role(),
    entry_point='entry_point.py',
    source_dir='batch_code',
    framework_version = '0.20.0',
    py_version='py3'
)

In [38]:
# !pip install numpy pandas --upgrade

In [39]:
from sagemaker import get_execution_role
from time import strftime, gmtime
sagemaker_session = sagemaker.Session()

role = get_execution_role()
region = sagemaker_session.boto_session.region_name

prediction_data_path = 's3://sagemaker-us-west-2-517141035927/dataset/v_data.csv'
out_predict_data_path = 's3://sagemaker-us-west-2-517141035927/output/'

_instance_type = 'ml.c5.4xlarge'
_instance_count = 1

_max_concurrent_transforms = 1
_max_payload = 10

_job_name = 'lightgbm-batch-{}'.format(strftime("%Y-%m-%d-%H-%M-%S", gmtime()))
print(_job_name)

lightgbm_transformer = _model.transformer(
                            instance_count=_instance_count,
                            instance_type=_instance_type,
                            strategy = 'MultiRecord',
                            max_concurrent_transforms=_max_concurrent_transforms,
                            max_payload=_max_payload,
                            output_path=out_predict_data_path,
                            assemble_with='Line',
                            accept='text/csv')

lightgbm_transformer.transform(
    data=prediction_data_path,
    content_type='text/csv',
    split_type='Line',
#     input_filter="$[1:]",
#     join_source="Input",
#     output_filter="$[0 ,-1]",
    job_name=_job_name
)
lightgbm_transformer.wait()

lightgbm-batch-2021-10-27-11-59-34
............................[34mProcessing /opt/ml/code[0m
[34mBuilding wheels for collected packages: entry-point
  Building wheel for entry-point (setup.py): started
  Building wheel for entry-point (setup.py): finished with status 'done'
  Created wheel for entry-point: filename=entry_point-1.0.0-py2.py3-none-any.whl size=4835 sha256=fe76399590601d1ca9f552ff98f402a4c4737423d07e8e608b05bd5c60b916c6
  Stored in directory: /tmp/pip-ephem-wheel-cache-ursdz_33/wheels/3e/0f/51/2f1df833dd0412c1bc2f5ee56baac195b5be563353d111dca6[0m
[34mSuccessfully built entry-point[0m
[34mInstalling collected packages: entry-point[0m
[34mSuccessfully installed entry-point-1.0.0[0m
[34mLooking in indexes: https://pypi.douban.com/simple[0m
[34m2021/10/27 12:04:05 [crit] 25#25: *1 connect() to unix:/tmp/gunicorn.sock failed (2: No such file or directory) while connecting to upstream, client: 169.254.255.130, server: , request: "GET /ping HTTP/1.1", upstream: "ht

UnexpectedStatusException: Error for Transform job lightgbm-batch-2021-10-27-11-59-34: Failed. Reason: AlgorithmError: See job logs for more information