Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 137 lines (121 sloc) 4.56 KB
#!/usr/bin/env python
# Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Amazon Software License (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://aws.amazon.com/asl/
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
# or implied. See the License for the specific language governing permissions
# and limitations under the License.
"""
Demonstrates all the steps needed to build an ML Model
for the targeted marketing example in the Getting Started Guide
for Amazon Machine Learning
Usage:
python build_model.py ["Optional name for created objects"]
"""
import base64
import boto3
import json
import os
import sys
TRAINING_DATA_S3_URL = "s3://aml-sample-data/banking.csv"
def build_model(data_s3_url, schema_fn, recipe_fn, name, train_percent=70):
"""Creates all the objects needed to build an ML Model & evaluate its quality.
"""
ml = boto3.client('machinelearning')
(train_ds_id, test_ds_id) = create_data_sources(ml, data_s3_url, schema_fn,
train_percent, name)
ml_model_id = create_model(ml, train_ds_id, recipe_fn, name)
eval_id = create_evaluation(ml, ml_model_id, test_ds_id, name)
return ml_model_id
def create_data_sources(ml, data_s3_url, schema_fn, train_percent, name):
"""Create two data sources. One with (train_percent)% of the data,
which will be used for training. The other one with the remainder of the data,
which is commonly called the "test set" and will be used to evaluate the quality
of the ML Model.
"""
train_ds_id = 'ds-' + base64.b32encode(os.urandom(10))
spec = {
"DataLocationS3": data_s3_url,
"DataRearrangement": json.dumps({
"splitting": {
"percentBegin": 0,
"percentEnd": train_percent
}
}),
"DataSchema": open(schema_fn).read(),
}
ml.create_data_source_from_s3(
DataSourceId=train_ds_id,
DataSpec=spec,
DataSourceName=name + " - training split",
ComputeStatistics=True
)
print("Created training data set %s" % train_ds_id)
test_ds_id = 'ds-' + base64.b32encode(os.urandom(10))
spec['DataRearrangement'] = json.dumps({
"splitting": {
"percentBegin": train_percent,
"percentEnd": 100
}
})
ml.create_data_source_from_s3(
DataSourceId=test_ds_id,
DataSpec=spec,
DataSourceName=name + " - testing split",
ComputeStatistics=True
)
print("Created test data set %s" % test_ds_id)
return (train_ds_id, test_ds_id)
def create_model(ml, train_ds_id, recipe_fn, name):
"""Creates an ML Model object, which begins the training process.
The quality of the model that the training algorithm produces depends
primarily on the data, but also on the hyper-parameters specified
in the parameters map, and the feature-processing recipe.
"""
model_id = 'ml-' + base64.b32encode(os.urandom(10))
ml.create_ml_model(
MLModelId=model_id,
MLModelName=name + " model",
MLModelType="BINARY", # we're predicting True/False values
Parameters={
# Refer to the "Machine Learning Concepts" documentation
# for guidelines on tuning your model
"sgd.maxPasses": "100",
"sgd.maxMLModelSizeInBytes": "104857600", # 100 MiB
"sgd.l2RegularizationAmount": "1e-4",
},
Recipe=open(recipe_fn).read(),
TrainingDataSourceId=train_ds_id
)
print("Created ML Model %s" % model_id)
return model_id
def create_evaluation(ml, model_id, test_ds_id, name):
eval_id = 'ev-' + base64.b32encode(os.urandom(10))
ml.create_evaluation(
EvaluationId=eval_id,
EvaluationName=name + " evaluation",
MLModelId=model_id,
EvaluationDataSourceId=test_ds_id
)
print("Created Evaluation %s" % eval_id)
return eval_id
if __name__ == "__main__":
try:
data_s3_url = TRAINING_DATA_S3_URL
schema_fn = "banking.csv.schema"
recipe_fn = "recipe.json"
if len(sys.argv) > 2:
name = sys.argv[1]
else:
name = "Marketing sample"
except:
raise
model_id = build_model(data_s3_url, schema_fn, recipe_fn, name=name)
print("""\nFor the next step in the demo, run:
python use_model.py %s 0.77 s3://your-bucket/ml-output/""" % model_id)
You can’t perform that action at this time.