Purpose: Demonstrate Model Creation using AWS boto3 sdk using Iris Multi-class dataset

Steps:
1. Upload Training Set and Training Schema files to S3
2. Create Training Data Source, Evaluation Data Source
3. Create MultiClass Model using Training Data Source
4. Create Evaluation using  Evaluation Data Source
5. Get Status and Print Performance Metrics Avg. F1 Score

In [11]:
import boto3
import os

Machine Learning boto3 API Reference:
http://boto3.readthedocs.io/en/latest/reference/services/machinelearning.html#MachineLearning.Client.create_ml_model

S3:
http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Bucket.upload_file

In [12]:
# Local Data Path
# Training, Eval Files
# Batch Evaluation Files
# Download Batch result to local drive
data_path = r'C:\AWSMLCourse\Data\ModelCreationDemo'
# Training and Test Files
training_file = 'iris_data_train.csv'
training_schema_file = 'iris_data_train.csv.schema'
batch_test_file = 'iris_data_classifier_test.csv'

In [13]:
# S3 Bucket name - bucket name needs to be globally unique across AWS (not just your account)
# Example (make sure you modify it to point to your s3 bucket)
s3_bucket_name = 'my-ml-demo'
s3_folder_name = 'boto3_demo' 
# s3_path will be s3_bucket_name/s3_folder_name/

In [14]:
# Initialize Session with appropriate user profile.
# Optionally, use a different profile (profile_name='mluser') instead of default profile
# Specify region where you want to create your ML Models and files

# http://docs.aws.amazon.com/machine-learning/latest/dg/regions-and-endpoints.html
# currently ML is available in US East (N. Virginia)	us-east-1 and EU (Ireland)	eu-west-1

session   = boto3.Session(region_name='us-east-1',profile_name='ml_user')
ml_client = session.client('machinelearning' )

In [15]:
def upload_files_to_s3():    
    s3Client = session.resource('s3')
    fileNames = [training_file, training_schema_file, batch_test_file]
    for fileName in fileNames:
        filePath = os.path.join(data_path,fileName)
        print(filePath)        
        # upload files to s3 bucket.
        s3Client.Bucket(s3_bucket_name).upload_file(filePath, s3_folder_name + '/' + fileName)

In [16]:
upload_files_to_s3()

C:\AWSMLCourse\Data\ModelCreationDemo\iris_data_train.csv
C:\AWSMLCourse\Data\ModelCreationDemo\iris_data_train.csv.schema
C:\AWSMLCourse\Data\ModelCreationDemo\iris_data_classifier_test.csv


In [17]:
# Derived from
#https://github.com/awslabs/machine-learning-samples/blob/master/social-media/create-aml-model.py
def create_data_source(dataset_name, 
                       s3_data_uri, s3_schema_uri, 
                       ds_type, percent_begin, 
                       percent_end, 
                       compute_statistics):
    ds_id = "ds-boto3-iris-{0}".format(ds_type)
    data_spec = {}
    data_spec['DataLocationS3'] = s3_data_uri
    data_spec['DataSchemaLocationS3'] = s3_schema_uri
    data_spec['DataRearrangement'] = \
        '{{"splitting":{{"percentBegin":{0},"percentEnd":{1},"strategy":"sequential"}}}}'.format(
        percent_begin, percent_end)
    
    response = ml_client.create_data_source_from_s3(
        DataSourceId=ds_id,
        DataSourceName="{0}_[percentBegin={1}, percentEnd={2}]".format(dataset_name, percent_begin, percent_end),
        DataSpec=data_spec,    
        ComputeStatistics=compute_statistics)
    
    print("Creating {0} datasource".format(ds_type))
    return response

In [18]:
# Create Training Data Source
s3_train_uri = "s3://{0}/{1}/{2}".format(s3_bucket_name, s3_folder_name,training_file)
s3_train_schema_uri = "s3://{0}/{1}/{2}".format(s3_bucket_name, s3_folder_name,training_schema_file)

In [19]:
s3_train_uri, s3_train_schema_uri

('s3://my-ml-demo/boto3_demo/iris_data_train.csv',
 's3://my-ml-demo/boto3_demo/iris_data_train.csv.schema')

In [20]:
train_datasource = create_data_source(
    'iris_training', 
    s3_train_uri,
    s3_train_schema_uri,
    'Training',0,70,True)

Creating Training datasource


In [21]:
eval_datasource = create_data_source(
    'iris_evaluation', 
    s3_train_uri,
    s3_train_schema_uri,
    'Evaluation',70,100,False)

Creating Evaluation datasource


In [22]:
print(train_datasource['DataSourceId'])
print(eval_datasource['DataSourceId'])

ds-boto3-iris-Training
ds-boto3-iris-Evaluation


In [23]:
model_create_response = ml_client.create_ml_model(
    MLModelId='ml-iris-demo',
    MLModelName='ML model: iris-demo-from-code',
    MLModelType='MULTICLASS',    
    TrainingDataSourceId=train_datasource['DataSourceId'])

In [24]:
model_create_response

{'MLModelId': 'ml-iris-demo',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '28',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Sun, 23 Oct 2016 04:12:46 GMT',
   'x-amzn-requestid': 'f3d27d32-98d6-11e6-8c33-95538750b699'},
  'HTTPStatusCode': 200,
  'RequestId': 'f3d27d32-98d6-11e6-8c33-95538750b699',
  'RetryAttempts': 0}}

In [27]:
# Query 'Status': 'PENDING'|'INPROGRESS'|'FAILED'|'COMPLETED'|'DELETED', 
ml_client.get_ml_model(MLModelId = model_create_response['MLModelId'])['Status']

'PENDING'

In [28]:
evaluation_response = ml_client.create_evaluation (
    EvaluationId='eval-iris-demo',
    EvaluationName='Eval ML model: iris-demo-from-code',
    MLModelId = model_create_response['MLModelId'],    
    EvaluationDataSourceId=eval_datasource['DataSourceId'])

In [29]:
evaluation_response

{'EvaluationId': 'eval-iris-demo',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '33',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Sun, 23 Oct 2016 04:14:31 GMT',
   'x-amzn-requestid': '32c39d67-98d7-11e6-9666-bda171dda03c'},
  'HTTPStatusCode': 200,
  'RequestId': '32c39d67-98d7-11e6-9666-bda171dda03c',
  'RetryAttempts': 0}}

In [39]:
#Query 'Status': 'PENDING'|'INPROGRESS'|'FAILED'|'COMPLETED'|'DELETED'
eval_result = ml_client.get_evaluation(EvaluationId=evaluation_response['EvaluationId'])

In [40]:
eval_result['Status']

'COMPLETED'

In [41]:
eval_result['PerformanceMetrics']

{'Properties': {'MulticlassAvgFScore': '0.9279503105590003'}}

In [42]:
eval_result

{'ComputeTime': 108000,
 'CreatedAt': datetime.datetime(2016, 10, 22, 21, 14, 31, 854000, tzinfo=tzlocal()),
 'CreatedByIamUser': 'arn:aws:iam::144943967277:user/ml_user',
 'EvaluationDataSourceId': 'ds-boto3-iris-Evaluation',
 'EvaluationId': 'eval-iris-demo',
 'FinishedAt': datetime.datetime(2016, 10, 22, 21, 19, 10, 294000, tzinfo=tzlocal()),
 'InputDataLocationS3': 's3://my-ml-demo/boto3_demo/iris_data_train.csv',
 'LastUpdatedAt': datetime.datetime(2016, 10, 22, 21, 19, 10, 294000, tzinfo=tzlocal()),
 'LogUri': 'https://eml-prod-emr.s3.amazonaws.com/144943967277-ev-eval-iris-demo/userlog/144943967277-ev-eval-iris-demo?AWSAccessKeyId=AKIAJ76NNIATX32EN2VA&Expires=1477801195&Signature=YO1wqv67lNHKr8iaSPI%2B4cTfYgE%3D',
 'MLModelId': 'ml-iris-demo',
 'Name': 'Eval ML model: iris-demo-from-code',
 'PerformanceMetrics': {'Properties': {'MulticlassAvgFScore': '0.9279503105590003'}},
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '748',
   'content-type': 'application/x-amz-json-