# AutoPilot Experiment

In this Notebook we take the labelled set of articles and use Sagemaker Autopilot to build a classifier.

This Notebook was run in Sagemaker Studio with The **Python 3 (Data Science)** Kernel.


In [1]:
import sagemaker
from sagemaker import get_execution_role
import boto3

sess = sagemaker.Session()

role = get_execution_role()
print(role)  

arn:aws:iam::320389841409:role/service-role/AmazonSageMaker-ExecutionRole-20201022T141998


In [14]:

region = boto3.Session().region_name

# This is the client we will use to interact with SageMaker AutoPilot
sm = boto3.Session().client(service_name='sagemaker',region_name=region)

In [2]:
bucket = "funnybones"
prefix = "rural/topics/autopilot"  

In [3]:
import pandas as pd

In [4]:
test_file = "data/test.csv"

In [16]:
train_file = "data/training_with_header.csv"

In [17]:
%%time

train_channel = prefix + "/train"

sess.upload_data(path=train_file, bucket=bucket, key_prefix=train_channel)

s3_train_data = "s3://{}/{}/training_with_header.csv".format(bucket, train_channel)

CPU times: user 29.6 ms, sys: 12.4 ms, total: 42 ms
Wall time: 220 ms


# Setup AutoPilot Model


In [18]:
input_data_config = [{
      'DataSource': {
        'S3DataSource': {
          'S3DataType': 'S3Prefix',
          'S3Uri': s3_train_data
        }
      },
      'TargetAttributeName': 'category'
    }
  ]

output_data_config = {
    'S3OutputPath': 's3://{}/{}/output'.format(bucket,prefix)
  }

In [19]:

from time import gmtime, strftime, sleep
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())

auto_ml_job_name = 'automl-topics-' + timestamp_suffix
print('AutoMLJobName: ' + auto_ml_job_name)



AutoMLJobName: automl-topics-06-03-26-13


In [20]:
sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name,
                      InputDataConfig=input_data_config,
                      OutputDataConfig=output_data_config,
                      AutoMLJobConfig={'CompletionCriteria':
                                       {'MaxCandidates': 10}
                                      },
                      RoleArn=role)


{'AutoMLJobArn': 'arn:aws:sagemaker:ap-southeast-2:320389841409:automl-job/automl-topics-06-03-26-13',
 'ResponseMetadata': {'RequestId': '8f28506b-8732-4d41-8458-161295eb4e77',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '8f28506b-8732-4d41-8458-161295eb4e77',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '101',
   'date': 'Tue, 06 Jul 2021 03:26:14 GMT'},
  'RetryAttempts': 0}}

In [22]:
%%time

print ('JobStatus - Secondary Status')
print('------------------------------')


describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
job_run_status = describe_response['AutoMLJobStatus']
    
while job_run_status not in ('Failed', 'Completed', 'Stopped'):
    describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    job_run_status = describe_response['AutoMLJobStatus']
    
    print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
    sleep(30)



JobStatus - Secondary Status
------------------------------
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProg

In [23]:


best_candidate = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)['BestCandidate']
best_candidate_name = best_candidate['CandidateName']
print(best_candidate)
print('\n')
print("CandidateName: " + best_candidate_name)
print("FinalAutoMLJobObjectiveMetricName: " + best_candidate['FinalAutoMLJobObjectiveMetric']['MetricName'])
print("FinalAutoMLJobObjectiveMetricValue: " + str(best_candidate['FinalAutoMLJobObjectiveMetric']['Value']))



{'CandidateName': 'automl-topics-06-03-26-13mBDVjQY-002-2f237882', 'FinalAutoMLJobObjectiveMetric': {'MetricName': 'validation:accuracy', 'Value': 0.5944899916648865}, 'ObjectiveStatus': 'Succeeded', 'CandidateSteps': [{'CandidateStepType': 'AWS::SageMaker::ProcessingJob', 'CandidateStepArn': 'arn:aws:sagemaker:ap-southeast-2:320389841409:processing-job/automl-topics-06-03-26-13-db-1-85d95408b6944e0ca40c5aaa6e5495a5', 'CandidateStepName': 'automl-topics-06-03-26-13-db-1-85d95408b6944e0ca40c5aaa6e5495a5'}, {'CandidateStepType': 'AWS::SageMaker::TrainingJob', 'CandidateStepArn': 'arn:aws:sagemaker:ap-southeast-2:320389841409:training-job/automl-topics-06-03-26-13-dpp4-1-074034d2f29a48508c6e3cca0c068a', 'CandidateStepName': 'automl-topics-06-03-26-13-dpp4-1-074034d2f29a48508c6e3cca0c068a'}, {'CandidateStepType': 'AWS::SageMaker::TransformJob', 'CandidateStepArn': 'arn:aws:sagemaker:ap-southeast-2:320389841409:transform-job/automl-topics-06-03-26-13-dpp4-rpb-1-407ea655032d49b6a5f8614dbf', 