In [1]:
import os
from datetime import datetime
import ast
import random

import pandas as pd

import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

In [2]:
# Set some region and role for the execution of the jobs
region = boto3.Session().region_name

session = sagemaker.Session()
bucket = session.default_bucket()
prefix = 'sagemaker/autopilot-dm'

role = get_execution_role()

sm = boto3.Session().client(service_name='sagemaker',region_name=region)

In [3]:
dfp_cards = pd.read_csv(f'{os.environ["AWS_SAGEMAKER_S3_LOCATION"]}/data/V1_cards.csv')

In [None]:
# cardids_for_feature = dfp_cards['cardid'].unique().tolist()
# print('Length of the list:', len(cardids_for_feature))

In [4]:
dfp_data_ml = pd.read_csv(f'{os.environ["AWS_SAGEMAKER_S3_LOCATION"]}/data/dataset_ml.csv')
#dfp_data_ml['cards'] = dfp_data_ml['cards'].apply(lambda cards:ast.literal_eval(cards))
dfp_data_ml.head()

Unnamed: 0,hero,cards,count_minions,count_spells,count_weapons,archetype
0,paladin,"[29, 29, 103, 103, 422, 422, 473, 473, 581, 67...",17.0,8.0,5.0,Aggro Paladin
1,paladin,"[23, 23, 189, 189, 260, 260, 383, 383, 391, 33...",18.0,10.0,2.0,Midrange Paladin
2,hunter,"[22, 22, 99, 99, 163, 163, 239, 344, 344, 363,...",0.0,24.0,5.0,Spell Hunter
3,druid,"[131, 131, 282, 282, 548, 548, 620, 620, 633, ...",9.0,20.0,0.0,Jade Lotus Druid
4,rogue,"[3, 92, 92, 103, 103, 286, 286, 324, 324, 382,...",23.0,7.0,0.0,Tempo Rogue


In [10]:
%%time
dfp_data_automl = dfp_data_ml.copy().sample(frac=1).head(1000)

#for cardid in [29,422,163]:
# for cardid in random.choices(cardids_for_feature, k=100):
#     dfp_data_automl[f'cardfeature_count-{cardid}'] = dfp_data_automl['cards'].apply(lambda cards: cards.count(cardid))
    
dfp_data_automl.drop('cards', axis=1, inplace=True)
dfp_data_automl.sample(frac=1).head()

CPU times: user 70.9 ms, sys: 0 ns, total: 70.9 ms
Wall time: 69.2 ms


Unnamed: 0,hero,count_minions,count_spells,count_weapons,archetype
23142,warrior,21.0,7.0,2.0,Taunt Warrior
74977,shaman,16.0,14.0,0.0,Shudderwock Shaman
217359,druid,20.0,10.0,0.0,Beast Druid
153044,hunter,8.0,20.0,2.0,Reno Hunter
87045,mage,11.0,19.0,0.0,Freeze Mage


In [11]:
dfp_train_data = dfp_data_automl.sample(frac=0.8,random_state=200)
dfp_test_data = dfp_data_automl.drop(dfp_train_data.index)
dfp_test_data_no_target = dfp_data_automl.drop(columns=['archetype'])

In [12]:
train_file = 'train_data.csv';
dfp_train_data.to_csv(train_file, index=False, header=True)
train_data_s3_path = session.upload_data(path=train_file, key_prefix=prefix + "/train")
print('Train data uploaded to: ' + train_data_s3_path)

test_file = 'test_data.csv';
dfp_test_data_no_target.to_csv(test_file, index=False, header=False)
test_data_s3_path = session.upload_data(path=test_file, key_prefix=prefix + "/test")
print('Test data uploaded to: ' + test_data_s3_path)

Train data uploaded to: s3://sagemaker-us-east-1-574603769924/sagemaker/autopilot-dm/train/train_data.csv
Test data uploaded to: s3://sagemaker-us-east-1-574603769924/sagemaker/autopilot-dm/test/test_data.csv


In [13]:
input_data_config = [{
      'DataSource': {
        'S3DataSource': {
          'S3DataType': 'S3Prefix',
          'S3Uri': 's3://{}/{}/train'.format(bucket,prefix)
        }
      },
      'TargetAttributeName': 'archetype'
    }
  ]

output_data_config = {
    'S3OutputPath': 's3://{}/{}/output'.format(bucket,prefix)
}

In [14]:
from time import gmtime, strftime, sleep
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())

auto_ml_job_name = 'automl-hearthpwn-' + timestamp_suffix
print('AutoMLJobName: ' + auto_ml_job_name)

sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name,
                      InputDataConfig=input_data_config,
                      OutputDataConfig=output_data_config,
                      RoleArn=role)

AutoMLJobName: automl-hearthpwn-13-10-53-22


{'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:574603769924:automl-job/automl-hearthpwn-13-10-53-22',
 'ResponseMetadata': {'RequestId': 'f59d20a8-2cbe-40b5-9cbd-ba4cf4b382d9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f59d20a8-2cbe-40b5-9cbd-ba4cf4b382d9',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '99',
   'date': 'Tue, 13 Oct 2020 10:53:23 GMT'},
  'RetryAttempts': 0}}

## Analyse the magic

In [15]:
list_response = sm.list_auto_ml_jobs()
list_response

{'AutoMLJobSummaries': [{'AutoMLJobName': 'automl-hearthpwn-13-10-53-22',
   'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:574603769924:automl-job/automl-hearthpwn-13-10-53-22',
   'AutoMLJobStatus': 'InProgress',
   'AutoMLJobSecondaryStatus': 'AnalyzingData',
   'CreationTime': datetime.datetime(2020, 10, 13, 10, 53, 22, 954000, tzinfo=tzlocal()),
   'LastModifiedTime': datetime.datetime(2020, 10, 13, 10, 53, 26, 223000, tzinfo=tzlocal())},
  {'AutoMLJobName': 'automl-hearthpwn-12-23-57-50',
   'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:574603769924:automl-job/automl-hearthpwn-12-23-57-50',
   'AutoMLJobStatus': 'Stopped',
   'AutoMLJobSecondaryStatus': 'Stopped',
   'CreationTime': datetime.datetime(2020, 10, 12, 23, 57, 50, 756000, tzinfo=tzlocal()),
   'EndTime': datetime.datetime(2020, 10, 13, 10, 9, 47, 722000, tzinfo=tzlocal()),
   'LastModifiedTime': datetime.datetime(2020, 10, 13, 10, 9, 47, 753000, tzinfo=tzlocal())},
  {'AutoMLJobName': 'automl-banking-12-23-57-36',
   'Au

In [16]:
auto_ml_job_name = 'automl-hearthpwn-13-10-53-22'

In [None]:
print('JobStatus - Secondary Status')
print('------------------------------')


describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
job_run_status = describe_response['AutoMLJobStatus']
    
while job_run_status not in ('Failed', 'Completed', 'Stopped'):
    describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    job_run_status = describe_response['AutoMLJobStatus']
    
    print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
    sleep(30)

In [None]:
best_candidate = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)['BestCandidate']
best_candidate_name = best_candidate['CandidateName']
print(best_candidate)
print('\n')
print("CandidateName: " + best_candidate_name)
print("FinalAutoMLJobObjectiveMetricName: " + best_candidate['FinalAutoMLJobObjectiveMetric']['MetricName'])
print("FinalAutoMLJobObjectiveMetricValue: " + str(best_candidate['FinalAutoMLJobObjectiveMetric']['Value']))