# AutoML Notebook

In this example we will use Sagemaker Autopilot to build a model on the churn data

In [4]:
import sagemaker
import boto3
from sagemaker import get_execution_role

region = boto3.Session().region_name

session = sagemaker.Session()

role = get_execution_role()

# This is the client we will use to interact with SageMaker AutoPilot
sm = boto3.Session().client(service_name='sagemaker',region_name=region)

In [5]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
import os
import sys
import time
import json
from IPython.display import display
from time import strftime, gmtime
import sagemaker
from sagemaker.predictor import csv_serializer


# Data sets

These URIs are copied from the XGBoost Notebook where the data was uploaded to S3

In [6]:
train_data_s3_path = 's3://telco-churn-seoul/xgboost-example/train.csv'
test_data_s3_path = 's3://telco-churn-seoul/xgboost-example/validation.csv'


In [7]:
# You can modify the following to use a bucket of your choosing
bucket = "telco-churn-seoul"
prefix = 'automl-example'

# Configure

We create configuration objects that will tell the AUtoML job where to find data and place results

In [8]:
input_data_config = [{
      'DataSource': {
        'S3DataSource': {
          'S3DataType': 'S3Prefix',
          'S3Uri': train_data_s3_path
        }
      },
      'TargetAttributeName': 'Churn'
    }
  ]

output_data_config = {
    'S3OutputPath': 's3://{}/{}/output'.format(bucket,prefix)
  }



In [9]:
from time import gmtime, strftime, sleep
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())

auto_ml_job_name = 'automl-churn-' + timestamp_suffix
print('AutoMLJobName: ' + auto_ml_job_name)

###
# The larger (MaxCandidates), the longer the auto pilot training will run.
###
sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name,
                      InputDataConfig=input_data_config,
                      OutputDataConfig=output_data_config,
                      AutoMLJobConfig={'CompletionCriteria':
                                       {'MaxCandidates': 5}
                                      },
                      RoleArn=role)


AutoMLJobName: automl-churn-20-22-50-51


{'AutoMLJobArn': 'arn:aws:sagemaker:ap-northeast-2:320389841409:automl-job/automl-churn-20-22-50-51',
 'ResponseMetadata': {'RequestId': '49d97a0a-70d1-4b2e-abab-bef20957d955',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '49d97a0a-70d1-4b2e-abab-bef20957d955',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '100',
   'date': 'Sun, 20 Jun 2021 22:50:51 GMT'},
  'RetryAttempts': 0}}

In [10]:
%%time

print ('JobStatus - Secondary Status')
print('------------------------------')


describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
job_run_status = describe_response['AutoMLJobStatus']
    
while job_run_status not in ('Failed', 'Completed', 'Stopped'):
    describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    job_run_status = describe_response['AutoMLJobStatus']
    
    print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
    sleep(30)


JobStatus - Secondary Status
------------------------------
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
I

# Results

In [11]:
best_candidate = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)['BestCandidate']
best_candidate_name = best_candidate['CandidateName']
print(best_candidate)
print('\n')
print("CandidateName: " + best_candidate_name)
print("FinalAutoMLJobObjectiveMetricName: " + best_candidate['FinalAutoMLJobObjectiveMetric']['MetricName'])
print("FinalAutoMLJobObjectiveMetricValue: " + str(best_candidate['FinalAutoMLJobObjectiveMetric']['Value']))

{'CandidateName': 'automl-churn-20-22-50-51yv0fXsOh-004-3455ab5c', 'FinalAutoMLJobObjectiveMetric': {'MetricName': 'validation:f1', 'Value': 0.7007099986076355}, 'ObjectiveStatus': 'Succeeded', 'CandidateSteps': [{'CandidateStepType': 'AWS::SageMaker::ProcessingJob', 'CandidateStepArn': 'arn:aws:sagemaker:ap-northeast-2:320389841409:processing-job/automl-churn-20-22-50-51-db-1-7420a20ea3104cb3804debb76050aba07', 'CandidateStepName': 'automl-churn-20-22-50-51-db-1-7420a20ea3104cb3804debb76050aba07'}, {'CandidateStepType': 'AWS::SageMaker::TrainingJob', 'CandidateStepArn': 'arn:aws:sagemaker:ap-northeast-2:320389841409:training-job/automl-churn-20-22-50-51-dpp4-1-0a3535294f3949a58be123345d4c1bc', 'CandidateStepName': 'automl-churn-20-22-50-51-dpp4-1-0a3535294f3949a58be123345d4c1bc'}, {'CandidateStepType': 'AWS::SageMaker::TransformJob', 'CandidateStepArn': 'arn:aws:sagemaker:ap-northeast-2:320389841409:transform-job/automl-churn-20-22-50-51-dpp4-csv-1-0f9d373c8794482ebd6190076a6', 'Candi

# Understand what AutoPilot did

Autopilot allows you to download notebooks and code snippets that define the autoML candidates.
You can modify the feature processing and hyperparameters of these candidates and re-execute the models.


In [12]:
job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)

In [13]:
job_data_notebook = job['AutoMLJobArtifacts']['DataExplorationNotebookLocation']
job_candidate_notebook = job['AutoMLJobArtifacts']['CandidateDefinitionNotebookLocation']

print(job_data_notebook)
print(job_candidate_notebook)

s3://telco-churn-seoul/automl-example/output/automl-churn-20-22-50-51/sagemaker-automl-candidates/automl-churn-20-22-50-51-pr-1-c2cd7f7ae99b4e20a12992ebecf60b3e3/notebooks/SageMakerAutopilotDataExplorationNotebook.ipynb
s3://telco-churn-seoul/automl-example/output/automl-churn-20-22-50-51/sagemaker-automl-candidates/automl-churn-20-22-50-51-pr-1-c2cd7f7ae99b4e20a12992ebecf60b3e3/notebooks/SageMakerAutopilotCandidateDefinitionNotebook.ipynb


In [14]:
!aws s3 cp <REPLACE WITH YOUR PATH> ./

download: s3://telco-churn-seoul/automl-example/output/automl-churn-20-22-50-51/sagemaker-automl-candidates/automl-churn-20-22-50-51-pr-1-c2cd7f7ae99b4e20a12992ebecf60b3e3/notebooks/SageMakerAutopilotDataExplorationNotebook.ipynb to ./SageMakerAutopilotDataExplorationNotebook.ipynb


In [15]:
!aws s3 cp <REPLACE WITH YOUR PATH> ./

download: s3://telco-churn-seoul/automl-example/output/automl-churn-20-22-50-51/sagemaker-automl-candidates/automl-churn-20-22-50-51-pr-1-c2cd7f7ae99b4e20a12992ebecf60b3e3/notebooks/SageMakerAutopilotCandidateDefinitionNotebook.ipynb to ./SageMakerAutopilotCandidateDefinitionNotebook.ipynb
