/
catboost_bring_your_own_container_local_training_and_serving.py
75 lines (57 loc) · 2.64 KB
/
catboost_bring_your_own_container_local_training_and_serving.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# This is a sample Python program that trains a simple CatBoost Regressor tree model, and then performs inference.
# This implementation will work on your *local computer*.
#
# Prerequisites:
# 1. Install required Python packages:
# pip install boto3 sagemaker pandas scikit-learn
# pip install 'sagemaker[local]'
# 2. Docker Desktop has to be installed on your computer, and running.
# 3. Open terminal and run the following commands:
# docker build -t sagemaker-catboost-regressor-local container/.
########################################################################################################################
import pandas as pd
import os
from sagemaker.estimator import Estimator
from sagemaker.local import LocalSession
from sagemaker.predictor import csv_serializer
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
sagemaker_session = LocalSession()
sagemaker_session.config = {'local': {'local_code': True}}
# For local training a dummy role will be sufficient
role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001'
data = fetch_california_housing()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=45)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=45)
trainX = pd.DataFrame(X_train, columns=data.feature_names)
trainX['target'] = y_train
valX = pd.DataFrame(X_test, columns=data.feature_names)
valX['target'] = y_test
testX = pd.DataFrame(X_test, columns=data.feature_names)
os.makedirs('data/train', exist_ok=True)
local_train = './data/train/california_train.csv'
os.makedirs('data/validation', exist_ok=True)
local_validation = './data/validation/california_validation.csv'
os.makedirs('data/test', exist_ok=True)
local_test = './data/test/california_test.csv'
trainX.to_csv(local_train, header=None, index=False)
valX.to_csv(local_validation, header=None, index=False)
testX.to_csv(local_test, header=None, index=False)
image = 'sagemaker-catboost-regressor-local'
env={
"MODEL_SERVER_WORKERS":"2"
}
local_regressor = Estimator(
image,
role,
instance_count=1,
instance_type="local")
train_location = 'file://'+local_train
validation_location = 'file://'+local_validation
local_regressor.fit({'train':train_location, 'validation': validation_location}, logs=True)
predictor = local_regressor.deploy(1, 'local', serializer=csv_serializer, env=env)
with open(local_test, 'r') as f:
payload = f.read().strip()
predicted = predictor.predict(payload).decode('utf-8')
print(predicted)
predictor.delete_endpoint()