# 1. Library and Installations

In [None]:
!pip install sagemaker-experiments
!pip install s3fs
!pip install matplotlib
!pip install seaborn
!pip install shap
!pip install smdebug



In [None]:
from io import StringIO
import numpy as np
import os
import pandas as pd
import boto3
import time
import s3fs
from datetime import datetime

import matplotlib.pyplot as plt
import seaborn as sns
import re
import shap
from scipy import stats
import copy

In [None]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.analytics import ExperimentAnalytics

from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker

from sagemaker.sklearn.estimator import SKLearn
from sagemaker.debugger import rule_configs, Rule, DebuggerHookConfig,CollectionConfig
from sagemaker.estimator import Estimator
from sagemaker.session import s3_input
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.session import Session

from smdebug.trials  import create_trial

[2021-05-19 08:27:51.057 ip-172-16-47-84:18285 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None


# 2. Configs

In [None]:
now = datetime.now()
current_time = now.strftime("%Y-%m-%d--%H-%M-%S")
print(current_time)

2021-05-19--08-27-51


In [None]:
from sagemaker.local import LocalSession

sagemaker_session = LocalSession(boto3.session.Session())
sagemaker_session.config = {'local':{'local_code':True}}

# Get a SageMaker-compatible role used by this Notebook Instance.
role =  "arn:aws:iam::565342550273:role/service-role/AmazonSageMaker-ExecutionRole-20210524T170167 "
      

In [None]:
# S3 prefix
bucket = "housing-1717"
prefix = 'scikit-pre-model-Inference-pipeline'

train_data = "s3://housing-1717/housing/input-datasets/train_data_without_header.csv"
test_data = "s3://housing-1717/housing/input-datasets/test_data_without_header.csv"

FRAMEWORK_VERSION = "0.23-1"
script_path = 'sklearn_pipeline.py'
dependency_path ='dependancies.py'

base_job_name = f"Builtin-XGB-algo-local-{current_time}"

output_data_prefix = f'housing/datasets/output/{base_job_name}'
data_output_path = f's3://{bucket}/{output_data_prefix}'

debug_prefix = f'housing/jobs/debug/{base_job_name}'
debug_path = f's3://{bucket}/{debug_prefix}'

experiment_name_prefix = "builtin-xgboost-track13"

# 3. Batch Transform 
<b><h2> 3.1 Fit the train data </h2></b>

In [None]:
sklearn_preprocessor = SKLearn(
    entry_point=script_path,
    role=role,
    framework_version=FRAMEWORK_VERSION,
    instance_type="local",
    #train_use_spot_instance = True,
    train_max_run = 600,
    train_max_wait = 1200,
    #sagemaker_session=sagemaker_session,
    dependencies=[dependency_path])

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.


In [None]:
sklearn_preprocessor.fit(
    inputs={'train': train_data},
    job_name=base_job_name)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: Builtin-XGB-algo-local-2021-05-19--08-27-51
INFO:sagemaker.local.local_session:Starting training job
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-r1jly:
    command: train
    container_name: xb4hfjqccv-algo-1-r1jly
    environment:
    - '[Masked]'
    - '[Masked]'
    image: 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3
    networks:
      sagemaker-local:
        aliases:
        - algo-1-r1jly
    stdin_open: true
    tty: true
    volumes:
    - /tmp/tmpdpsz6evo/algo-1-r1jly/output/data:/opt/ml/output/data
    - /tmp/tmpdpsz6evo/algo-1-r1jly/

Creating xb4hfjqccv-algo-1-r1jly ... 
Creating xb4hfjqccv-algo-1-r1jly ... done
Attaching to xb4hfjqccv-algo-1-r1jly
[36mxb4hfjqccv-algo-1-r1jly |[0m 2021-05-19 08:31:47,182 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
[36mxb4hfjqccv-algo-1-r1jly |[0m 2021-05-19 08:31:47,186 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36mxb4hfjqccv-algo-1-r1jly |[0m 2021-05-19 08:31:47,198 sagemaker_sklearn_container.training INFO     Invoking user training script.
[36mxb4hfjqccv-algo-1-r1jly |[0m 2021-05-19 08:31:47,383 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36mxb4hfjqccv-algo-1-r1jly |[0m 2021-05-19 08:31:47,403 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36mxb4hfjqccv-algo-1-r1jly |[0m 2021-05-19 08:31:47,418 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36mxb4hfjqccv-algo-1-r1jly |[0m

<b><h2>3.2 Transform the train data</h2></b>

In [None]:
# Define a SKLearn Transformer from the trained SKLearn Estimator
transformer = sklearn_preprocessor.transformer(
    instance_count=1, 
    instance_type='local',
    assemble_with = 'Line',
    accept = 'text/csv',
    output_path = data_output_path
)

INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2021-05-19-08-31-54-643


In [None]:
transformer.transform(
    data=train_data,
    content_type="text/csv",
    job_name=base_job_name+"-train"
)
print("Waiting for transform job: " + transformer.latest_transform_job.job_name)
transformer.wait()

preprocessed_train = transformer.output_path

INFO:sagemaker:Creating transform job with name: Builtin-XGB-algo-local-2021-05-19--08-27-51-train
INFO:sagemaker.local.image:serving
INFO:sagemaker.local.image:creating hosting dir in /tmp/tmpr51ufue8
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-150re:
    command: serve
    container_name: vo48o0rjz9-algo-1-150re
    environment:
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    image: 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3
    networks:
      sagemaker-local:
        aliases:
        - algo-1-150re
    ports:
    - 8080:8080
    stdin_open: true
    tty: true
    volumes:
    - /tmp/tmphsrpiyw_:/opt/ml/model
version: '2.3'

INFO:sagemaker.local.image:docker command: docker-compose -f /tmp/t

Attaching to vo48o0rjz9-algo-1-150re
[36mvo48o0rjz9-algo-1-150re |[0m 2021-05-19 08:32:00,924 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36mvo48o0rjz9-algo-1-150re |[0m 2021-05-19 08:32:00,927 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36mvo48o0rjz9-algo-1-150re |[0m 2021-05-19 08:32:00,928 INFO - sagemaker-containers - nginx config: 
[36mvo48o0rjz9-algo-1-150re |[0m worker_processes auto;
[36mvo48o0rjz9-algo-1-150re |[0m daemon off;
[36mvo48o0rjz9-algo-1-150re |[0m pid /tmp/nginx.pid;
[36mvo48o0rjz9-algo-1-150re |[0m error_log  /dev/stderr;
[36mvo48o0rjz9-algo-1-150re |[0m 
[36mvo48o0rjz9-algo-1-150re |[0m worker_rlimit_nofile 4096;
[36mvo48o0rjz9-algo-1-150re |[0m 
[36mvo48o0rjz9-algo-1-150re |[0m events {
[36mvo48o0rjz9-algo-1-150re |[0m   worker_connections 2048;
[36mvo48o0rjz9-algo-1-150re |[0m }
[36mvo48o0rjz9-algo-1-150re |[0m 
[36mvo48o0rjz9-algo-1-150re |[0m http {
[36mvo48o0rj

INFO:sagemaker.local.entities:Checking if serving container is up, attempt: 10
INFO:sagemaker.local.entities:Container still not up, got: 502


[36mvo48o0rjz9-algo-1-150re |[0m 2021/05/19 08:32:03 [crit] 14#14: *1 connect() to unix:/tmp/gunicorn.sock failed (2: No such file or directory) while connecting to upstream, client: 172.18.0.1, server: , request: "GET /ping HTTP/1.1", upstream: "http://unix:/tmp/gunicorn.sock:/ping", host: "localhost:8080"
[36mvo48o0rjz9-algo-1-150re |[0m 172.18.0.1 - - [19/May/2021:08:32:03 +0000] "GET /ping HTTP/1.1" 502 182 "-" "python-urllib3/1.26.4"
[36mvo48o0rjz9-algo-1-150re |[0m [2021-05-19 08:32:03 +0000] [30] [INFO] Starting gunicorn 20.0.4
[36mvo48o0rjz9-algo-1-150re |[0m [2021-05-19 08:32:03 +0000] [30] [INFO] Listening at: unix:/tmp/gunicorn.sock (30)
[36mvo48o0rjz9-algo-1-150re |[0m [2021-05-19 08:32:03 +0000] [30] [INFO] Using worker: gevent
[36mvo48o0rjz9-algo-1-150re |[0m [2021-05-19 08:32:03 +0000] [33] [INFO] Booting worker with pid: 33
[36mvo48o0rjz9-algo-1-150re |[0m [2021-05-19 08:32:03 +0000] [34] [INFO] Booting worker with pid: 34


INFO:sagemaker.local.entities:Checking if serving container is up, attempt: 15


[36mvo48o0rjz9-algo-1-150re |[0m 2021-05-19 08:32:08,514 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36mvo48o0rjz9-algo-1-150re |[0m 172.18.0.1 - - [19/May/2021:08:32:09 +0000] "GET /ping HTTP/1.1" 200 0 "-" "python-urllib3/1.26.4"
[36mvo48o0rjz9-algo-1-150re |[0m 172.18.0.1 - - [19/May/2021:08:32:09 +0000] "GET /execution-parameters HTTP/1.1" 404 232 "-" "python-urllib3/1.26.4"
[36mvo48o0rjz9-algo-1-150re |[0m 2021-05-19 08:32:09,420 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36mvo48o0rjz9-algo-1-150re |[0m 172.18.0.1 - - [19/May/2021:08:32:10 +0000] "POST /invocations HTTP/1.1" 200 4081216 "-" "python-urllib3/1.26.4"
Gracefully stopping... (press Ctrl+C again to force)
.Waiting for transform job: Builtin-XGB-algo-local-2021-05-19--08-27-51-train
.

<b><h2>3.2 Transform the test data</h2></b>

In [None]:
transformer.transform(
    data=test_data,
    content_type="text/csv",
    job_name=base_job_name+"-test"
)
print("Waiting for transform job: " + transformer.latest_transform_job.job_name)
transformer.wait()

preprocessed_test = transformer.output_path

INFO:sagemaker:Creating transform job with name: Builtin-XGB-algo-local-2021-05-19--08-27-51-test
INFO:sagemaker.local.image:serving
INFO:sagemaker.local.image:creating hosting dir in /tmp/tmp20rqvt87
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-2s5hk:
    command: serve
    container_name: 15147y6kxi-algo-1-2s5hk
    environment:
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    image: 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3
    networks:
      sagemaker-local:
        aliases:
        - algo-1-2s5hk
    ports:
    - 8080:8080
    stdin_open: true
    tty: true
    volumes:
    - /tmp/tmpyy3ufved:/opt/ml/model
version: '2.3'

INFO:sagemaker.local.image:docker command: docker-compose -f /tmp/tm

Attaching to 15147y6kxi-algo-1-2s5hk
[36m15147y6kxi-algo-1-2s5hk |[0m 2021-05-19 08:32:13,914 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36m15147y6kxi-algo-1-2s5hk |[0m 2021-05-19 08:32:13,917 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36m15147y6kxi-algo-1-2s5hk |[0m 2021-05-19 08:32:13,918 INFO - sagemaker-containers - nginx config: 
[36m15147y6kxi-algo-1-2s5hk |[0m worker_processes auto;
[36m15147y6kxi-algo-1-2s5hk |[0m daemon off;
[36m15147y6kxi-algo-1-2s5hk |[0m pid /tmp/nginx.pid;
[36m15147y6kxi-algo-1-2s5hk |[0m error_log  /dev/stderr;
[36m15147y6kxi-algo-1-2s5hk |[0m 
[36m15147y6kxi-algo-1-2s5hk |[0m worker_rlimit_nofile 4096;
[36m15147y6kxi-algo-1-2s5hk |[0m 
[36m15147y6kxi-algo-1-2s5hk |[0m events {
[36m15147y6kxi-algo-1-2s5hk |[0m   worker_connections 2048;
[36m15147y6kxi-algo-1-2s5hk |[0m }
[36m15147y6kxi-algo-1-2s5hk |[0m 
[36m15147y6kxi-algo-1-2s5hk |[0m http {
[36m15147y6k

INFO:sagemaker.local.entities:Checking if serving container is up, attempt: 10


[36m15147y6kxi-algo-1-2s5hk |[0m 2021-05-19 08:32:17,149 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36m15147y6kxi-algo-1-2s5hk |[0m 172.18.0.1 - - [19/May/2021:08:32:17 +0000] "GET /ping HTTP/1.1" 200 0 "-" "python-urllib3/1.26.4"
[36m15147y6kxi-algo-1-2s5hk |[0m 172.18.0.1 - - [19/May/2021:08:32:17 +0000] "GET /execution-parameters HTTP/1.1" 404 232 "-" "python-urllib3/1.26.4"
[36m15147y6kxi-algo-1-2s5hk |[0m 2021-05-19 08:32:17,971 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36m15147y6kxi-algo-1-2s5hk |[0m 172.18.0.1 - - [19/May/2021:08:32:18 +0000] "POST /invocations HTTP/1.1" 200 1020419 "-" "python-urllib3/1.26.4"
Gracefully stopping... (press Ctrl+C again to force)
.Waiting for transform job: Builtin-XGB-algo-local-2021-05-19--08-27-51-test
.

# 4. Real Time Prediction using endpoint

In [None]:
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel
import boto3
from time import gmtime, strftime


timestamp_prefix = strftime("%Y-%m-%d-%H-%M-%S", gmtime())

scikit_learn_inference_model = sklearn_preprocessor.create_model()

model_name = 'inference-pipeline-' + timestamp_prefix
endpoint_name = 'inference-pipeline-ep-' + timestamp_prefix

#sm_model.sagemaker_session=None

scikit_learn_inference_model.deploy(initial_instance_count=1, instance_type='local', endpoint_name=endpoint_name)

from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

payload = "-121.89,37.29,38,1568,351,710,339,2.7042,<1H OCEAN"

predictor = Predictor(
        endpoint_name = endpoint_name,
        sagemaker_session = sagemaker_session,
        serializer = CSVSerializer(),
        deserializer = JSONDeserializer(),
    )
print(predictor.predict(payload))

INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2021-05-19-08-40-15-331
INFO:sagemaker:Creating endpoint with name inference-pipeline-ep-2021-05-19-08-40-15
INFO:sagemaker.local.image:serving
INFO:sagemaker.local.image:creating hosting dir in /tmp/tmpoavjmzde
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-gbqlu:
    command: serve
    container_name: xbzhrhoe6x-algo-1-gbqlu
    environment:
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    image: 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3
    networks:
      sagemaker-local:
        aliases:
        - algo-1-gbqlu
    ports:
    - 8080:8080
    stdin_open: true
    tty: true
    volumes:
    - /tmp/tmp3ro5077a:/opt/ml/model
version: '2.3'

INFO:sagemaker.local.image:

Attaching to xbzhrhoe6x-algo-1-gbqlu
[36mxbzhrhoe6x-algo-1-gbqlu |[0m 2021-05-19 08:40:18,002 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36mxbzhrhoe6x-algo-1-gbqlu |[0m 2021-05-19 08:40:18,007 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36mxbzhrhoe6x-algo-1-gbqlu |[0m 2021-05-19 08:40:18,008 INFO - sagemaker-containers - nginx config: 
[36mxbzhrhoe6x-algo-1-gbqlu |[0m worker_processes auto;
[36mxbzhrhoe6x-algo-1-gbqlu |[0m daemon off;
[36mxbzhrhoe6x-algo-1-gbqlu |[0m pid /tmp/nginx.pid;
[36mxbzhrhoe6x-algo-1-gbqlu |[0m error_log  /dev/stderr;
[36mxbzhrhoe6x-algo-1-gbqlu |[0m 
[36mxbzhrhoe6x-algo-1-gbqlu |[0m worker_rlimit_nofile 4096;
[36mxbzhrhoe6x-algo-1-gbqlu |[0m 
[36mxbzhrhoe6x-algo-1-gbqlu |[0m events {
[36mxbzhrhoe6x-algo-1-gbqlu |[0m   worker_connections 2048;
[36mxbzhrhoe6x-algo-1-gbqlu |[0m }
[36mxbzhrhoe6x-algo-1-gbqlu |[0m 
[36mxbzhrhoe6x-algo-1-gbqlu |[0m http {
[36mxbzhrhoe

INFO:sagemaker.local.entities:Checking if serving container is up, attempt: 10


[36mxbzhrhoe6x-algo-1-gbqlu |[0m [2021-05-19 08:40:20 +0000] [30] [INFO] Starting gunicorn 20.0.4
[36mxbzhrhoe6x-algo-1-gbqlu |[0m [2021-05-19 08:40:20 +0000] [30] [INFO] Listening at: unix:/tmp/gunicorn.sock (30)
[36mxbzhrhoe6x-algo-1-gbqlu |[0m [2021-05-19 08:40:20 +0000] [30] [INFO] Using worker: gevent
[36mxbzhrhoe6x-algo-1-gbqlu |[0m [2021-05-19 08:40:20 +0000] [33] [INFO] Booting worker with pid: 33
[36mxbzhrhoe6x-algo-1-gbqlu |[0m [2021-05-19 08:40:20 +0000] [34] [INFO] Booting worker with pid: 34
[36mxbzhrhoe6x-algo-1-gbqlu |[0m 2021-05-19 08:40:21,192 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
![36mxbzhrhoe6x-algo-1-gbqlu |[0m 172.18.0.1 - - [19/May/2021:08:40:21 +0000] "GET /ping HTTP/1.1" 200 0 "-" "python-urllib3/1.26.4"
[36mxbzhrhoe6x-algo-1-gbqlu |[0m 2021-05-19 08:40:21,796 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
{'instances': [{'features': [-1.1560428086829155, 0.7719496164846016, 0.7