# 1. Library and Installations

In [None]:
!pip install sagemaker-experiments
!pip install s3fs
!pip install matplotlib
!pip install seaborn
!pip install shap
!pip install smdebug

Collecting sagemaker-experiments
  Using cached sagemaker_experiments-0.1.31-py3-none-any.whl (42 kB)
Installing collected packages: sagemaker-experiments
Successfully installed sagemaker-experiments-0.1.31
Collecting shap
  Downloading shap-0.39.0.tar.gz (356 kB)
[K     |████████████████████████████████| 356 kB 6.1 MB/s eta 0:00:01
Collecting tqdm>4.25.0
  Downloading tqdm-4.60.0-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 5.3 MB/s  eta 0:00:01
[?25hCollecting slicer==0.0.7
  Downloading slicer-0.0.7-py3-none-any.whl (14 kB)
Building wheels for collected packages: shap
  Building wheel for shap (setup.py) ... [?25ldone
[?25h  Created wheel for shap: filename=shap-0.39.0-cp36-cp36m-linux_x86_64.whl size=481382 sha256=d0bda538d20906d8e3de9a168fb9c9d98ce25ca7507e7cd4febdde09e973cee1
  Stored in directory: /home/ec2-user/.cache/pip/wheels/6f/08/25/2992725334291786ea084e06cac493d93049b80e3470318a1b
Successfully built shap
Installing collected packages:

In [None]:
from io import StringIO
import numpy as np
import os
import pandas as pd
import boto3
import time
import s3fs
from datetime import datetime

import matplotlib.pyplot as plt
import seaborn as sns
import re
import shap
from scipy import stats
import copy

In [None]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.analytics import ExperimentAnalytics

from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker

from sagemaker.sklearn.estimator import SKLearn
from sagemaker.debugger import rule_configs, Rule, DebuggerHookConfig,CollectionConfig
from sagemaker.estimator import Estimator
from sagemaker.session import s3_input
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.session import Session

from smdebug.trials  import create_trial

[2021-05-19 18:14:54.828 ip-172-16-10-49:16000 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None


# 2. Configs

In [None]:
now = datetime.now()
current_time = now.strftime("%Y-%m-%d--%H-%M-%S")
print(current_time)

2021-05-19--18-15-49


In [None]:
sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

# S3 prefix
bucket = "housing-1717"
prefix = 'scikit-pre-model-Inference-pipeline'

train_data = "s3://housing-1717/housing/input-datasets/train_data_without_header.csv"
test_data = "s3://housing-1717/housing/input-datasets/test_data_without_header.csv"

FRAMEWORK_VERSION = "0.23-1"
script_path = 'sklearn_pipeline.py'
dependency_path ='dependancies.py'

base_job_name = f"Builtin-XGB-algo-{current_time}"

output_data_prefix = f'housing/datasets/output/{base_job_name}'
data_output_path = f's3://{bucket}/{output_data_prefix}'

debug_prefix = f'housing/jobs/debug/{base_job_name}'
debug_path = f's3://{bucket}/{debug_prefix}'

experiment_name_prefix = "builtin-xgboost-track13"

# 3. Batch Transform 
<b><h2> 3.1 Fit the train data </h2></b>

In [None]:
sklearn_preprocessor = SKLearn(
    entry_point=script_path,
    role=role,
    framework_version=FRAMEWORK_VERSION,
    instance_type="ml.m5.xlarge",
    train_use_spot_instance = True,
    train_max_run = 600,
   # train_max_wait = 1200,
    sagemaker_session=sagemaker_session,
    dependencies=[dependency_path])

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.


In [None]:
sklearn_preprocessor.fit(
    inputs={'train': train_data},
    job_name=base_job_name)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: Builtin-XGB-algo-2021-05-19--18-15-49


2021-05-19 18:16:05 Starting - Starting the training job...
2021-05-19 18:16:07 Starting - Launching requested ML instancesProfilerReport-1621448165: InProgress
......
2021-05-19 18:17:30 Starting - Preparing the instances for training......
2021-05-19 18:18:31 Downloading - Downloading input data...
2021-05-19 18:19:01 Training - Training image download completed. Training in progress.
2021-05-19 18:19:01 Uploading - Uploading generated training model[34m2021-05-19 18:18:56,377 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-05-19 18:18:56,380 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-19 18:18:56,389 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-05-19 18:18:56,680 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-19 18:18:56,692 sagemaker-training-toolkit INFO     No GPUs detected (

<b><h2>3.2 Transform the train data</h2></b>

In [None]:
#Define a SKLearn Transformer from the trained SKLearn Estimator
transformer = sklearn_preprocessor.transformer(
    instance_count=1, 
    instance_type='ml.m5.xlarge',
    assemble_with = 'Line',
    accept = 'text/csv',
    output_path = data_output_path
)

INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2021-05-19-18-20-16-069


In [None]:
transformer.transform(
    data=train_data,
    content_type="text/csv",
    job_name=base_job_name+"-train"
)
print("Waiting for transform job: " + transformer.latest_transform_job.job_name)
transformer.wait()

preprocessed_train = transformer.output_path

INFO:sagemaker:Creating transform job with name: Builtin-XGB-algo-2021-05-19--18-15-49-train


...........................
.[34m2021-05-19 18:24:43,335 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-19 18:24:43,338 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-19 18:24:43,338 INFO - sagemaker-containers - nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;
[0m
[34mworker_rlimit_nofile 4096;
[0m
[34mevents {
  worker_connections 2048;[0m
[34m}
[0m
[35m2021-05-19 18:24:43,335 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[35m2021-05-19 18:24:43,338 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[35m2021-05-19 18:24:43,338 INFO - sagemaker-containers - nginx config: [0m
[35mworker_processes auto;[0m
[35mdaemon off;[0m
[35mpid /tmp/nginx.pid;[0m
[35merror_log  /dev/stderr;
[0m
[35mworker_rlimit_nofile 4096;
[0m
[35mevents 

<b><h2>3.2 Transform the test data</h2></b>

In [None]:
transformer.transform(
    data=test_data,
    content_type="text/csv",
    job_name=base_job_name+"-test"
)
print("Waiting for transform job: " + transformer.latest_transform_job.job_name)
transformer.wait()

preprocessed_test = transformer.output_path

INFO:sagemaker:Creating transform job with name: Builtin-XGB-algo-2021-05-19--18-15-49-test


...........................
[34m2021-05-19 18:29:17,192 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-19 18:29:17,194 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-19 18:29:17,195 INFO - sagemaker-containers - nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;
[0m
[34mworker_rlimit_nofile 4096;
[0m
[34mevents {
  worker_connections 2048;[0m
[34m}
[0m
[35m2021-05-19 18:29:17,192 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[35m2021-05-19 18:29:17,194 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[35m2021-05-19 18:29:17,195 INFO - sagemaker-containers - nginx config: [0m
[35mworker_processes auto;[0m
[35mdaemon off;[0m
[35mpid /tmp/nginx.pid;[0m
[35merror_log  /dev/stderr;
[0m
[35mworker_rlimit_nofile 4096;
[0m
[35mevents {

# 4. Real Time Prediction using endpoint

In [None]:
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel
import boto3
from time import gmtime, strftime
from sagemaker.estimator import Estimator
from sagemaker import PipelineModel

timestamp_prefix = current_time

scikit_learn_inference_model = sklearn_preprocessor.create_model()
#scikit_learn_inferencee_model.env = {"SAGEMAKER_DEFAULT_INVOCATIONS_ACCEPT":"text/csv"}
model_containers = [scikit_learn_inference_model]

model_name = 'inference-pipeline-' + timestamp_prefix
endpoint_name = 'inference-pipeline-ep-' + timestamp_prefix

sm_model = PipelineModel(
            name=model_name,
            role=role,
            models=model_containers)

predictor = sm_model.deploy(initial_instance_count=1,
                           instance_type='ml.m5.xlarge',
                           endpoint_name=endpoint_name,
                           )

from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

payload = "-121.89,37.29,38,1568,351,710,339,2.7042,<1H OCEAN"

predictor = Predictor(
        endpoint_name = endpoint_name,
        sagemaker_session = sagemaker_session,
        serializer = CSVSerializer(),
        deserializer = JSONDeserializer(),
    )
print(predictor.predict(payload))

INFO:sagemaker:Creating model with name: inference-pipeline-2021-05-19--18-15-49
INFO:sagemaker:Creating endpoint with name inference-pipeline-ep-2021-05-19--18-15-49


-------------!{'instances': [{'features': [-1.1560428086829155, 0.7719496164846016, 0.7433308916510305, -0.49323393384425046, -0.4454382074687401, -0.6362114070375079, -0.4206984222235789, -0.6149374443958345, -0.31205451913809157, -0.0864987054157523, 0.15531753037148296, 1.0, 0.0, 0.0, 0.0, 0.0]}]}


In [None]:
sm_client = sagemaker_session.boto_session.client('sagemaker')
sm_client.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': '21c3cad2-086f-4787-a4a0-bae56d82501a',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '21c3cad2-086f-4787-a4a0-bae56d82501a',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Wed, 19 May 2021 18:36:28 GMT'},
  'RetryAttempts': 0}}