In [1]:
# 1. Download data

In [4]:
%%sh
wget -N https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip
unzip bank-additional.zip

Archive:  bank-additional.zip
   creating: bank-additional/
  inflating: bank-additional/bank-additional-names.txt  
  inflating: bank-additional/bank-additional.csv  
  inflating: bank-additional/bank-additional-full.csv  


--2021-04-04 20:23:25--  https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip
Resolving sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com (sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com)... 52.218.196.41
Connecting to sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com (sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com)|52.218.196.41|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 432828 (423K) [application/zip]
Saving to: ‘bank-additional.zip’

     0K .......... .......... .......... .......... .......... 11%  148M 0s
    50K .......... .......... .......... .......... .......... 23% 62.0M 0s
   100K .......... .......... .......... .......... .......... 35%  115M 0s
   150K .......... .......... .......... .......... .......... 47% 91.8M 0s
   200K .......... .......... .......... .......... .......... 59%  102M 0s
   250K .......... .......... .......... .....

In [5]:
# 2. Upload data to s3

In [2]:
import sagemaker 

In [3]:
prefix = 'sagemaker/DEMO-autopilot/input'
sess = sagemaker.Session()

In [6]:
uri = sess.upload_data(path="./bank-additional/bank-additional-full.csv", key_prefix=prefix)
print(uri)

s3://sagemaker-us-west-2-076084266064/sagemaker/DEMO-autopilot/input/bank-additional-full.csv


In [7]:
# output

In [8]:
## Deployment and Prediction 

In [9]:
# Endpoint: my-first-autopilot-endpoint

In [14]:
ep_name = 'my-first-autopilot-endpoint'
sample='56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,261,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0'

In [11]:
import boto3
# create a boto3 client for the SageMaker runtime. This runtim contains a single API, invoke+endpoint
sm_rt = boto3.Session().client('runtime.sagemaker')

In [15]:
response = sm_rt.invoke_endpoint(EndpointName=ep_name, ContentType='text/csv', Accept='text/csv', Body=sample)

In [16]:
response

{'ResponseMetadata': {'RequestId': '5d4dbf70-a6ab-41b0-8816-8d0ace024e43',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '5d4dbf70-a6ab-41b0-8816-8d0ace024e43',
   'x-amzn-invoked-production-variant': 'default-variant-name',
   'date': 'Mon, 05 Apr 2021 00:39:15 GMT',
   'content-type': 'text/csv; charset=utf-8',
   'content-length': '3'},
  'RetryAttempts': 0},
 'ContentType': 'text/csv; charset=utf-8',
 'InvokedProductionVariant': 'default-variant-name',
 'Body': <botocore.response.StreamingBody at 0x7fef74fc1910>}

In [17]:
response = response['Body'].read().decode('utf-8')
print(f'model-response : {response}')

model-response : no



In [19]:
!aws sagemaker list-endpoints 

{
    "Endpoints": [
        {
            "EndpointName": "my-first-autopilot-endpoint",
            "EndpointArn": "arn:aws:sagemaker:us-west-2:076084266064:endpoint/my-first-autopilot-endpoint",
            "CreationTime": 1617582097.974,
            "LastModifiedTime": 1617582485.185,
            "EndpointStatus": "InService"
        }
    ]
}


## Once the feature engineering is complete, the model tuning starts. While it's running, we can use the SM Experiments SDK to keep track of jobs. 

In [21]:
import pandas as pd
from sagemaker.analytics import ExperimentAnalytics 

In [24]:
exp = ExperimentAnalytics(
    sagemaker_session=sess, 
    experiment_name = 'my-first-autopilot-job' + '-aws-auto-ml-job'
)

In [25]:
df = exp.dataframe()

In [26]:
print(f'Number of jobs: {len(df)}')

Number of jobs: 271


In [30]:
df = pd.concat([df['ObjectiveMetric - Max'],
               df.drop(['ObjectiveMetric - Max'], axis=1)], axis=1)

In [31]:
df.sort_values('ObjectiveMetric - Max', ascending=0)[:5]

Unnamed: 0,ObjectiveMetric - Max,TrialComponentName,DisplayName,SourceArn,SageMaker.ImageUri,SageMaker.InstanceCount,SageMaker.InstanceType,SageMaker.VolumeSizeInGB,_tuning_objective_metric,alpha,...,code - MediaType,code - Value,input_channel_mode,job_name,label_col,max_dataset_size,SageMaker.ImageUri - MediaType,SageMaker.ImageUri - Value,ds - MediaType,ds - Value
23,0.78789,tuning-job-1-d3c3521924bf4cd7bf-226-7d5c8a9b-a...,tuning-job-1-d3c3521924bf4cd7bf-226-7d5c8a9b-a...,arn:aws:sagemaker:us-west-2:076084266064:train...,246618743249.dkr.ecr.us-west-2.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,0.429141,...,,,,,,,,,,
146,0.78731,tuning-job-1-d3c3521924bf4cd7bf-103-276c1416-a...,tuning-job-1-d3c3521924bf4cd7bf-103-276c1416-a...,arn:aws:sagemaker:us-west-2:076084266064:train...,246618743249.dkr.ecr.us-west-2.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,3e-06,...,,,,,,,,,,
116,0.787,tuning-job-1-d3c3521924bf4cd7bf-128-0dfb7438-a...,tuning-job-1-d3c3521924bf4cd7bf-128-0dfb7438-a...,arn:aws:sagemaker:us-west-2:076084266064:train...,246618743249.dkr.ecr.us-west-2.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,0.751338,...,,,,,,,,,,
21,0.78645,tuning-job-1-d3c3521924bf4cd7bf-230-e37ee377-a...,tuning-job-1-d3c3521924bf4cd7bf-230-e37ee377-a...,arn:aws:sagemaker:us-west-2:076084266064:train...,246618743249.dkr.ecr.us-west-2.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,0.359242,...,,,,,,,,,,
0,0.78555,tuning-job-1-d3c3521924bf4cd7bf-249-9c9289c1-a...,tuning-job-1-d3c3521924bf4cd7bf-249-9c9289c1-a...,arn:aws:sagemaker:us-west-2:076084266064:train...,246618743249.dkr.ecr.us-west-2.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,2.0,...,,,,,,,,,,


## Once model tuning is complete, we can easily find the best candidate 

Using sagemaker SDK and boto3

## Cleaning up

## Model Artifacts

In [34]:
!aws s3 ls s3://sagemaker-us-west-2-076084266064/sagemaker/DEMO-autopilot/output/my-first-autopilot-job/

                           PRE data-processor-models/
                           PRE documentation/
                           PRE preprocessed-data/
                           PRE sagemaker-automl-candidates/
                           PRE transformed-data/
                           PRE tuning/
                           PRE validations/


In [38]:
!aws s3 ls s3://sagemaker-us-west-2-076084266064/sagemaker/DEMO-autopilot/output/my-first-autopilot-job/preprocessed-data/tuning_data/

                           PRE train/
                           PRE validation/



PRE data-processor-models/PRE preprocessed-data/PRE sagemaker-automl-candidates/PRE transformed-data/PRE tuning/

    - The preprocessed-data/tuning_data prefix contains the training and validation splits generated from the input dataset. Each split is further broken into small CSV chunks:

    - The sagemaker-automl-candidates prefix contains ten data preprocessing scripts (dpp[0-9].py), one for each pipeline. It also contains the code to train them (trainer.py) on the input dataset, and the code to process the input dataset with each one of the ten resulting models (sagemaker_serve.py).
    - The data-processor-models prefix contains the ten data processing models trained by the dpp scripts.
    - The transformed-data prefix contains the ten processed versions of the training and validation splits.
    - The sagemaker-automl-candidates prefix contains the two auto-generated notebooks.
    - Finally, the tuning prefix contains the actual models trained during the Model Tuning step.
