# Local BYOC Churn

In [19]:
import boto3
import sagemaker
from sagemaker import get_execution_role

ecr_namespace = 'sagemaker-training-containers/'
prefix = 'pca'

ecr_repository_name = ecr_namespace + prefix
role = get_execution_role()
account_id = role.split(':')[4]
region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
bucket = sagemaker_session.default_bucket()

print(account_id)
print(region)
print(role)
print(bucket)

057716757052
us-east-2
arn:aws:iam::057716757052:role/service-role/AmazonSageMaker-ExecutionRole-20191128T110038
sagemaker-us-east-2-057716757052


Let's take a look at the Dockerfile which defines the statements for building our custom SageMaker training container:

## Docker 생성 및 ECR 푸시

In [20]:
! cp pca_byoc_train.py docker/code/

In [21]:
%%writefile docker/Dockerfile

FROM 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3
    
# install python package
RUN pip install joblib


ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE

ENV PATH="/opt/ml/code:${PATH}"

# Copy training code
COPY code/* /opt/ml/code/
 
WORKDIR /opt/ml/code

# ENTRYPOINT ["python", "pca_train.py"]
# In order to use SageMaker Env varaibles, use the statement below
ENV SAGEMAKER_PROGRAM pca_byoc_train.py

Overwriting docker/Dockerfile


In [22]:
import os
os.environ['account_id'] = account_id
os.environ['region'] = region
os.environ['ecr_repository_name'] = ecr_repository_name

In [23]:
%%sh

ACCOUNT_ID=${account_id}
REGION=${region}
REPO_NAME=${ecr_repository_name}

echo $REGION
echo $ACCOUNT_ID
echo $REPO_NAME


# Get the login command from ECR in order to pull down the Tensorflow-gpu:1.5 image
$(aws ecr get-login --registry-ids 257758044811 --region ${region} --no-include-email)



docker build -f docker/Dockerfile -t $REPO_NAME docker

docker tag $REPO_NAME $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$REPO_NAME:latest

$(aws ecr get-login --no-include-email --registry-ids $ACCOUNT_ID)

aws ecr describe-repositories --repository-names $REPO_NAME || aws ecr create-repository --repository-name $REPO_NAME

docker push $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$REPO_NAME:latest



us-east-2
057716757052
sagemaker-training-containers/pca
Login Succeeded
Sending build context to Docker daemon  8.704kB
Step 1/8 : FROM 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3
 ---> 30adb1aa9af5
Step 2/8 : RUN pip install joblib
 ---> Using cache
 ---> 0574906c196e
Step 3/8 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> 72f929011350
Step 4/8 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> 7b2b4471af62
Step 5/8 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> 2fc403c35061
Step 6/8 : COPY code/* /opt/ml/code/
 ---> Using cache
 ---> 05b93f78946b
Step 7/8 : WORKDIR /opt/ml/code
 ---> Using cache
 ---> 6cdb924e1b9f
Step 8/8 : ENV SAGEMAKER_PROGRAM pca_byoc_train.py
 ---> Using cache
 ---> 8f33e6b9fb2a
Successfully built 8f33e6b9fb2a
Successfully tagged sagemaker-training-containers/pca:latest
Login Succeeded
{
    "repositories": [
        {
            "repositoryArn": "arn:aws:ecr:us-east-2:057716757052:repository/sagem

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



In [24]:
container_image_uri = '{0}.dkr.ecr.{1}.amazonaws.com/{2}:latest'.format(account_id, region, ecr_repository_name)
print(container_image_uri)

057716757052.dkr.ecr.us-east-2.amazonaws.com/sagemaker-training-containers/pca:latest


##  Churn 데이타 준비

In [25]:
import pandas as pd
preprocessed_train_path_file = 's3://sagemaker-us-east-2-057716757052/sagemaker/customer-churn/transformtrain-train-output/sagemaker-scikit-learn-2020-08-12-07-07-2020-08-12-07-07-08-229/train.csv.out'
local_preprocessed_train_path_file = 'train.csv.out'
churn_df = pd.read_csv(local_preprocessed_train_path_file, header=None)
churn_df.head()
train_y = churn_df.iloc[:,0]
train_X = churn_df.iloc[:,1:]

print("Shape of train_X: ", train_X.shape)
print("Shape of train_y: ", train_y.shape)

Shape of train_X:  (2333, 69)
Shape of train_y:  (2333,)


In [26]:
! aws s3 cp  {preprocessed_train_path_file} .

download: s3://sagemaker-us-east-2-057716757052/sagemaker/customer-churn/transformtrain-train-output/sagemaker-scikit-learn-2020-08-12-07-07-2020-08-12-07-07-08-229/train.csv.out to ./train.csv.out


In [27]:
from sklearn import datasets
import os
import numpy as np


os.makedirs('./data', exist_ok =True)
np.savetxt('./data/churn.csv', train_X, delimiter=',',
           # fmt='%1.3f, %1.3f, %1.3f, %1.3f'
           fmt='%1.3f'
          )

WORK_DIRECTORY = 'data'
prefix = 'Scikit-pca-custom'
train_input = sagemaker_session.upload_data(WORK_DIRECTORY,
                                            key_prefix="{}/{}".format(prefix, WORK_DIRECTORY)
                                           )
print("train_input: ", train_input)


train_input:  s3://sagemaker-us-east-2-057716757052/Scikit-pca-custom/data


# PCA 학습

In [28]:
%%time

import sagemaker

instance_type = 'local'
# instance_type = 'ml.m4.xlarge'

pca_estimator = sagemaker.estimator.Estimator(container_image_uri,
                                    role, 
                                    train_instance_count=1, 
                                    train_instance_type= instance_type,
                                    base_job_name=prefix)

pca_estimator.set_hyperparameters(n_components= 2)

train_config = sagemaker.session.s3_input(train_input, content_type='text/csv')

pca_estimator.fit({'train': train_config})

Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


Creating tmpafvbmov1_algo-1-3fxgi_1 ... 
[1BAttaching to tmpafvbmov1_algo-1-3fxgi_12mdone[0m
[36malgo-1-3fxgi_1  |[0m 2020-08-12 12:02:17,421 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
[36malgo-1-3fxgi_1  |[0m 2020-08-12 12:02:17,423 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-3fxgi_1  |[0m 2020-08-12 12:02:17,432 sagemaker_sklearn_container.training INFO     Invoking user training script.
[36malgo-1-3fxgi_1  |[0m 2020-08-12 12:02:17,432 sagemaker-containers INFO     Module pca_byoc_train does not provide a setup.py. 
[36malgo-1-3fxgi_1  |[0m Generating setup.py
[36malgo-1-3fxgi_1  |[0m 2020-08-12 12:02:17,433 sagemaker-containers INFO     Generating setup.cfg
[36malgo-1-3fxgi_1  |[0m 2020-08-12 12:02:17,433 sagemaker-containers INFO     Generating MANIFEST.in
[36malgo-1-3fxgi_1  |[0m 2020-08-12 12:02:17,433 sagemaker-containers INFO     Installing module with the following command:

# PCA 트랜스포머

In [29]:
preprocessed_train_path_file
preproc_df = pd.read_csv(preprocessed_train_path_file, header=None)

preproc_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,60,61,62,63,64,65,66,67,68,69
0,0.0,0.119414,-0.596238,1.744368,0.978957,-0.028993,-0.893185,-0.801703,-1.982529,-1.530559,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
1,0.0,-1.852652,-0.596238,0.140284,-0.310405,0.970689,-0.689888,0.146389,1.232901,0.124852,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
2,1.0,1.181295,-0.596238,1.83513,0.185503,0.030988,-0.639063,1.568529,-0.063643,-0.846802,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
3,0.0,0.776769,-0.596238,0.216227,0.334276,0.136954,1.393914,1.394712,-0.634123,0.844596,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
4,0.0,-0.234547,1.508734,-0.459859,0.483049,-0.230929,0.224952,1.056954,0.92173,-0.810815,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0


In [30]:
num_cols = preproc_df.shape[1]
num_cols

70

In [31]:


instance_type = 'local'
# instance_type = 'ml.m4.2xlarge'
transform_train_output_path = 's3://{}/{}/{}/'.format(bucket, prefix, 'transformtrain-pca-train-output')

pca_model = pca_estimator.create_model(
    env={'TRANSFORM_MODE': 'feature-transform', 'LENGTH_COLS': str(num_cols)})

# scikit_learn_inferencee_model 에서 Train Transformer 생성
transformer_train = pca_model.transformer(
    instance_count=1, 
    instance_type= instance_type,
    assemble_with = 'Line',
    output_path = transform_train_output_path,
    accept = 'text/csv')


# Preprocess training input
transformer_train.transform(preprocessed_train_path_file, 
                            content_type='text/csv',                            
                           )

print('Waiting for transform job: ' + transformer_train.latest_transform_job.job_name)
transformer_train.wait()

preprocessed_train_path = transformer_train.output_path + transformer_train.latest_transform_job.job_name
print(preprocessed_train_path)

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


Attaching to tmpugazg8gz_algo-1-712ly_1
[36malgo-1-712ly_1  |[0m Processing /opt/ml/code
[36malgo-1-712ly_1  |[0m Building wheels for collected packages: pca-byoc-train
[36malgo-1-712ly_1  |[0m   Building wheel for pca-byoc-train (setup.py) ... [?25ldone
[36malgo-1-712ly_1  |[0m [?25h  Created wheel for pca-byoc-train: filename=pca_byoc_train-1.0.0-py2.py3-none-any.whl size=8213 sha256=ff605f7d01bbb7fd3529569a1017ef0a23973be57207926a78f6c244c864f152
[36malgo-1-712ly_1  |[0m   Stored in directory: /tmp/pip-ephem-wheel-cache-m9xrtfdb/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3
[36malgo-1-712ly_1  |[0m Successfully built pca-byoc-train
[36malgo-1-712ly_1  |[0m Installing collected packages: pca-byoc-train
[36malgo-1-712ly_1  |[0m Successfully installed pca-byoc-train-1.0.0
[36malgo-1-712ly_1  |[0m   import imp
[36malgo-1-712ly_1  |[0m [2020-08-12 12:02:22 +0000] [68] [INFO] Starting gunicorn 19.9.0
[36malgo-1-712ly_1  |[0m [2020-08-12 12:02:2

# RealTime Endpoint

In [32]:
pca_infer_model = pca_estimator.create_model(
    env={'TRANSFORM_MODE': 'inverse-label-transform', 'LENGTH_COLS': '69'})


Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


In [33]:
# %%time 

# from time import gmtime, strftime
# timestamp_prefix = strftime("%Y-%m-%d-%H-%M-%S", gmtime())

# instance_type='local'
# # instance_type='ml.m4.2xlarge'


# endpoint_name= 'churn-model-pipeline-endpoint-' + timestamp_prefix

# deployed_model = pca_infer_model.deploy(
#     initial_instance_count=1, 
#     instance_type= instance_type, 
#     endpoint_name = endpoint_name,        
#     wait = True
# )

In [34]:
from sagemaker.predictor import csv_serializer

instance_type = 'local'
# instance_type = 'ml.m4.xlarge'

est_predictor = pca_estimator.deploy(
    initial_instance_count = 1,
    instance_type = instance_type,
    model_name = pca_infer_model.name,
    serializer=csv_serializer
)

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


Attaching to tmpxbq8pe8i_algo-1-ecm1y_1
[36malgo-1-ecm1y_1  |[0m Processing /opt/ml/code
[36malgo-1-ecm1y_1  |[0m Building wheels for collected packages: pca-byoc-train
[36malgo-1-ecm1y_1  |[0m   Building wheel for pca-byoc-train (setup.py) ... [?25ldone
[36malgo-1-ecm1y_1  |[0m [?25h  Created wheel for pca-byoc-train: filename=pca_byoc_train-1.0.0-py2.py3-none-any.whl size=8214 sha256=12d4b263c49a1cbb1388fe5e9c275948f02aba5ebece6719b587908ce0a994ce
[36malgo-1-ecm1y_1  |[0m   Stored in directory: /tmp/pip-ephem-wheel-cache-bhb9fbo9/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3
[36malgo-1-ecm1y_1  |[0m Successfully built pca-byoc-train
[36malgo-1-ecm1y_1  |[0m Installing collected packages: pca-byoc-train
[36malgo-1-ecm1y_1  |[0m Successfully installed pca-byoc-train-1.0.0
[36malgo-1-ecm1y_1  |[0m   import imp
[36malgo-1-ecm1y_1  |[0m [2020-08-12 12:02:29 +0000] [68] [INFO] Starting gunicorn 19.9.0
[36malgo-1-ecm1y_1  |[0m [2020-08-12 12:02:2

In [35]:
# sample = train_X[0:1].reshape(1,-1) # Single Sample (1,-1)
sample = train_X[0:2]
print("Shape of sample: ", sample.shape)
sample.values
result = est_predictor.predict(sample.values)
result

Shape of sample:  (2, 69)
[36malgo-1-ecm1y_1  |[0m 2020-08-12 12:02:31,843 INFO - root - input_fn: 
[36malgo-1-ecm1y_1  |[0m 2020-08-12 12:02:31,843 INFO - root - shape of requested data: '(2, 69)'
[36malgo-1-ecm1y_1  |[0m 2020-08-12 12:02:31,901 INFO - root - requested data: '         0   ...   68
[36malgo-1-ecm1y_1  |[0m 0  0.119414  ...  0.0
[36malgo-1-ecm1y_1  |[0m 1 -1.852652  ...  0.0
[36malgo-1-ecm1y_1  |[0m 
[36malgo-1-ecm1y_1  |[0m [2 rows x 69 columns]'
[36malgo-1-ecm1y_1  |[0m predict_fn: Starting 
[36malgo-1-ecm1y_1  |[0m ################ Environment Variables: ################
[36malgo-1-ecm1y_1  |[0m {'HOME': '/root',
[36malgo-1-ecm1y_1  |[0m  'HOSTNAME': 'ab1b2923d31e',
[36malgo-1-ecm1y_1  |[0m  'KMP_INIT_AT_FORK': 'FALSE',
[36malgo-1-ecm1y_1  |[0m  'LANG': 'C.UTF-8',
[36malgo-1-ecm1y_1  |[0m  'LC_ALL': 'C.UTF-8',
[36malgo-1-ecm1y_1  |[0m  'PATH': '/opt/ml/code:/miniconda3/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin',
[

b'[[-0.8231424492120871, -0.1087239516814408], [-0.34342985863498615, 0.09079016341557401]]'

[36malgo-1-ecm1y_1  |[0m 172.18.0.1 - - [12/Aug/2020:12:02:32 +0000] "POST /invocations HTTP/1.1" 200 89 "-" "-"


In [37]:
result

b'[[-0.8231424492120871, -0.1087239516814408], [-0.34342985863498615, 0.09079016341557401]]'

[36malgo-1-ecm1y_1  |[0m 2020-08-12 12:08:18,272 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36malgo-1-ecm1y_1  |[0m   import imp
[36malgo-1-ecm1y_1  |[0m 172.18.0.1 - - [12/Aug/2020:12:08:18 +0000] "GET /ping HTTP/1.1" 200 0 "-" "-"
[36malgo-1-ecm1y_1  |[0m 172.18.0.1 - - [12/Aug/2020:12:08:18 +0000] "GET /execution-parameters HTTP/1.1" 404 232 "-" "-"
[36malgo-1-ecm1y_1  |[0m 2020-08-12 12:08:18,811 INFO - root - input_fn: 
[36malgo-1-ecm1y_1  |[0m 2020-08-12 12:08:18,811 INFO - root - shape of requested data: '(2333, 21)'
[36malgo-1-ecm1y_1  |[0m 2020-08-12 12:08:18,836 INFO - root - requested data: '      0   ...      20
[36malgo-1-ecm1y_1  |[0m 0     ID  ...  False.
[36malgo-1-ecm1y_1  |[0m 1     WY  ...  False.
[36malgo-1-ecm1y_1  |[0m 2     NV  ...   True.
[36malgo-1-ecm1y_1  |[0m 3     OR  ...  False.
[36malgo-1-ecm1y_1  |[0m 4     NJ  ...  False.
[36malgo-1-ecm1y_1  |[0m ...   ..  ...     ...
[36malgo-1-ecm1y_1  |[0