From dff321469467339341bff828d94d94e7c22ba591 Mon Sep 17 00:00:00 2001 From: John M Calhoun Date: Fri, 29 Mar 2019 17:01:27 -0400 Subject: [PATCH] removed BYOD example, fixed external repo bug --- example/README.md | 2 - example/bin/build.py | 22 -- example/config/endpoint.py | 12 - example/config/training.py | 39 -- example/inference/Dockerfile | 41 -- example/inference/decision_trees/nginx.conf | 38 -- example/inference/decision_trees/predictor.py | 83 ---- example/inference/decision_trees/serve | 71 ---- example/inference/decision_trees/train | 84 ----- example/inference/decision_trees/wsgi.py | 7 - example/train/Dockerfile | 41 -- example/train/decision_trees/nginx.conf | 38 -- example/train/decision_trees/predictor.py | 83 ---- example/train/decision_trees/serve | 71 ---- example/train/decision_trees/train | 84 ----- example/train/decision_trees/wsgi.py | 7 - sagebuild/SageMakerNotebook/index.js | 2 +- .../BYOD/bring_your_own_docker.ipynb | 355 ------------------ .../SageMakerNotebook/notebooks/BYOD/iris.csv | 150 -------- .../tensorflow_distributed_mnist.ipynb | 2 +- sagebuild/alexa/index.js | 2 +- sagebuild/bin/check.js | 1 + sagebuild/cfn/index.js | 2 +- sagebuild/codebuild/index.js | 2 +- sagebuild/index.js | 15 +- sagebuild/info/interface.js | 3 +- sagebuild/info/parameters.js | 12 +- sagebuild/step_function/index.js | 6 +- .../lambdas/config/amazon/ModelConfig.js | 9 +- .../lambdas/config/byod/ModelConfig.js | 10 +- .../step_function/lambdas/config/index.js | 2 +- .../lambdas/config/tensorflow/ModelConfig.js | 9 +- sagebuild/step_function/lambdas/core/index.js | 2 +- .../lambdas/core/sagemaker/HPOStatus.js | 4 +- .../lambdas/core/sagemaker/StartHPO.js | 18 +- sagebuild/step_function/launch/index.js | 8 +- .../stateMachines/DockerTrainDeploy/train.js | 4 + 37 files changed, 80 insertions(+), 1261 deletions(-) delete mode 100644 example/README.md delete mode 100755 example/bin/build.py delete mode 100644 example/config/endpoint.py delete mode 100644 example/config/training.py delete mode 100644 example/inference/Dockerfile delete mode 100644 example/inference/decision_trees/nginx.conf delete mode 100644 example/inference/decision_trees/predictor.py delete mode 100755 example/inference/decision_trees/serve delete mode 100755 example/inference/decision_trees/train delete mode 100644 example/inference/decision_trees/wsgi.py delete mode 100644 example/train/Dockerfile delete mode 100644 example/train/decision_trees/nginx.conf delete mode 100644 example/train/decision_trees/predictor.py delete mode 100755 example/train/decision_trees/serve delete mode 100755 example/train/decision_trees/train delete mode 100644 example/train/decision_trees/wsgi.py delete mode 100644 sagebuild/SageMakerNotebook/notebooks/BYOD/bring_your_own_docker.ipynb delete mode 100644 sagebuild/SageMakerNotebook/notebooks/BYOD/iris.csv diff --git a/example/README.md b/example/README.md deleted file mode 100644 index 43ed563..0000000 --- a/example/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# SageMaker Build Example -This directory contains example cdoe and assets for the tutorial jupyter notebook. diff --git a/example/bin/build.py b/example/bin/build.py deleted file mode 100755 index 347c6f1..0000000 --- a/example/bin/build.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python -import boto3 - -cf = boto3.client('cloudformation') -sns = boto3.client('sns') -step=boto3.client('stepfunctions') - -StackName='SageBuild-40' -#Get outputs from build stack -result=cf.describe_stacks( - StackName=StackName -) -outputs={} -for output in result['Stacks'][0]['Outputs']: - outputs[output['OutputKey']]=output['OutputValue'] - -#Start an execution through sns -result=sns.publish( - TopicArn=outputs['LaunchTopic'], - Message="start" -) - diff --git a/example/config/endpoint.py b/example/config/endpoint.py deleted file mode 100644 index 97c2ece..0000000 --- a/example/config/endpoint.py +++ /dev/null @@ -1,12 +0,0 @@ -import json - -def handler(event,context): - print(json.dumps(event,indent=2)) - return { - "ProductionVariants": [{ - "InitialInstanceCount": 1, - "InstanceType": "ml.t2.medium", - "ModelName":event["model"]["name"], - "VariantName":"prod", - }] - } diff --git a/example/config/training.py b/example/config/training.py deleted file mode 100644 index 9a4594a..0000000 --- a/example/config/training.py +++ /dev/null @@ -1,39 +0,0 @@ -import json - -def handler(event,context): - print(json.dumps(event,indent=2)) - return { - "AlgorithmSpecification": { - "TrainingImage":event["images"]["train"], - "TrainingInputMode": "File" - }, - "InputDataConfig": [ - { - "ChannelName": "training", - "DataSource": { - "S3DataSource": { - "S3DataType": "S3Prefix", - "S3Uri":f"s3://{event['Buckets']['Data']}/train/", - "S3DataDistributionType": "FullyReplicated" - } - }, - "CompressionType": "None", - "RecordWrapperType": "None" - }, - ], - "OutputDataConfig": { - 'S3OutputPath':f"s3://{event['Buckets']['Artifact']}", - }, - "ResourceConfig": { - "InstanceCount": 1, - "InstanceType": "ml.m4.xlarge" , - "VolumeSizeInGB": 1, - }, - "RoleArn":event["params"]["training"]["role"], - "StoppingCondition": { - "MaxRuntimeInSeconds": 600 - }, - "TrainingJobName":event["name"], - "HyperParameters": {}, - "Tags": [] - } diff --git a/example/inference/Dockerfile b/example/inference/Dockerfile deleted file mode 100644 index cf16121..0000000 --- a/example/inference/Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -# Build an image that can do training and inference in SageMaker -# This is a Python 2 image that uses the nginx, gunicorn, flask stack -# for serving inferences in a stable way. - -FROM ubuntu:16.04 - -MAINTAINER Amazon AI - -RUN apt-get -y update && apt-get install -y \ - wget \ - python \ - nginx \ - ca-certificates \ - build-essential \ - python-dev \ - python3-dev \ - && rm -rf /var/lib/apt/lists/* - -# Here we get all python packages. -# There's substantial overlap between scipy and numpy that we eliminate by -# linking them together. Likewise, pip leaves the install caches populated which uses -# a significant amount of space. These optimizations save a fair amount of space in the -# image, which reduces start up time. -RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py && \ - pip install numpy==1.14.5 scipy scikit-learn pandas flask gevent gunicorn && \ - (cd /usr/local/lib/python2.7/dist-packages/scipy/.libs; rm *; ln ../../numpy/.libs/* .) && \ - rm -rf /root/.cache - -# Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard -# output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE -# keeps Python from writing the .pyc files which are unnecessary in this case. We also update -# PATH so that the train and serve programs are found when the container is invoked. - -ENV PYTHONUNBUFFERED=TRUE -ENV PYTHONDONTWRITEBYTECODE=TRUE -ENV PATH="/opt/program:${PATH}" - -# Set up the program in the image -COPY decision_trees /opt/program -WORKDIR /opt/program - diff --git a/example/inference/decision_trees/nginx.conf b/example/inference/decision_trees/nginx.conf deleted file mode 100644 index cb581ac..0000000 --- a/example/inference/decision_trees/nginx.conf +++ /dev/null @@ -1,38 +0,0 @@ -worker_processes 1; -daemon off; # Prevent forking - - -pid /tmp/nginx.pid; -error_log /var/log/nginx/error.log; - -events { - # defaults -} - -http { - include /etc/nginx/mime.types; - default_type application/octet-stream; - access_log /var/log/nginx/access.log combined; - - upstream gunicorn { - server unix:/tmp/gunicorn.sock; - } - - server { - listen 8080 deferred; - client_max_body_size 5m; - - keepalive_timeout 5; - - location ~ ^/(ping|invocations) { - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header Host $http_host; - proxy_redirect off; - proxy_pass http://gunicorn; - } - - location / { - return 404 "{}"; - } - } -} diff --git a/example/inference/decision_trees/predictor.py b/example/inference/decision_trees/predictor.py deleted file mode 100644 index 3a2fbc1..0000000 --- a/example/inference/decision_trees/predictor.py +++ /dev/null @@ -1,83 +0,0 @@ -# This is the file that implements a flask server to do inferences. It's the file that you will modify to -# implement the scoring for your own algorithm. - -from __future__ import print_function - -import os -import json -import pickle -import StringIO -import sys -import signal -import traceback - -import flask - -import pandas as pd - -prefix = '/opt/ml/' -model_path = os.path.join(prefix, 'model') - -# A singleton for holding the model. This simply loads the model and holds it. -# It has a predict function that does a prediction based on the model and the input data. - -class ScoringService(object): - model = None # Where we keep the model when it's loaded - - @classmethod - def get_model(cls): - """Get the model object for this instance, loading it if it's not already loaded.""" - if cls.model == None: - with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'r') as inp: - cls.model = pickle.load(inp) - return cls.model - - @classmethod - def predict(cls, input): - """For the input, do the predictions and return them. - - Args: - input (a pandas dataframe): The data on which to do the predictions. There will be - one prediction per row in the dataframe""" - clf = cls.get_model() - return clf.predict(input) - -# The flask app for serving predictions -app = flask.Flask(__name__) - -@app.route('/ping', methods=['GET']) -def ping(): - """Determine if the container is working and healthy. In this sample container, we declare - it healthy if we can load the model successfully.""" - health = ScoringService.get_model() is not None # You can insert a health check here - - status = 200 if health else 404 - return flask.Response(response='\n', status=status, mimetype='application/json') - -@app.route('/invocations', methods=['POST']) -def transformation(): - """Do an inference on a single batch of data. In this sample server, we take data as CSV, convert - it to a pandas data frame for internal use and then convert the predictions back to CSV (which really - just means one prediction per line, since there's a single column. - """ - data = None - - # Convert from CSV to pandas - if flask.request.content_type == 'text/csv': - data = flask.request.data.decode('utf-8') - s = StringIO.StringIO(data) - data = pd.read_csv(s, header=None) - else: - return flask.Response(response='This predictor only supports CSV data', status=415, mimetype='text/plain') - - print('Invoked with {} records'.format(data.shape[0])) - - # Do the prediction - predictions = ScoringService.predict(data) - - # Convert from numpy back to CSV - out = StringIO.StringIO() - pd.DataFrame({'results':predictions}).to_csv(out, header=False, index=False) - result = out.getvalue() - - return flask.Response(response=result, status=200, mimetype='text/csv') diff --git a/example/inference/decision_trees/serve b/example/inference/decision_trees/serve deleted file mode 100755 index 6747fbd..0000000 --- a/example/inference/decision_trees/serve +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python - -# This file implements the scoring service shell. You don't necessarily need to modify it for various -# algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until -# gunicorn exits. -# -# The flask server is specified to be the app object in wsgi.py -# -# We set the following parameters: -# -# Parameter Environment Variable Default Value -# --------- -------------------- ------------- -# number of workers MODEL_SERVER_WORKERS the number of CPU cores -# timeout MODEL_SERVER_TIMEOUT 60 seconds - -from __future__ import print_function -import multiprocessing -import os -import signal -import subprocess -import sys - -cpu_count = multiprocessing.cpu_count() - -model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) -model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) - -def sigterm_handler(nginx_pid, gunicorn_pid): - try: - os.kill(nginx_pid, signal.SIGQUIT) - except OSError: - pass - try: - os.kill(gunicorn_pid, signal.SIGTERM) - except OSError: - pass - - sys.exit(0) - -def start_server(): - print('Starting the inference server with {} workers.'.format(model_server_workers)) - - - # link the log streams to stdout/err so they will be logged to the container logs - subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) - subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) - - nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) - gunicorn = subprocess.Popen(['gunicorn', - '--timeout', str(model_server_timeout), - '-k', 'gevent', - '-b', 'unix:/tmp/gunicorn.sock', - '-w', str(model_server_workers), - 'wsgi:app']) - - signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) - - # If either subprocess exits, so do we. - pids = set([nginx.pid, gunicorn.pid]) - while True: - pid, _ = os.wait() - if pid in pids: - break - - sigterm_handler(nginx.pid, gunicorn.pid) - print('Inference server exiting') - -# The main routine just invokes the start function. - -if __name__ == '__main__': - start_server() diff --git a/example/inference/decision_trees/train b/example/inference/decision_trees/train deleted file mode 100755 index f6a216b..0000000 --- a/example/inference/decision_trees/train +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python - -# A sample training component that trains a simple scikit-learn decision tree model. -# This implementation works in File mode and makes no assumptions about the input file names. -# Input is specified as CSV with a data point in each row and the labels in the first column. - -from __future__ import print_function - -import os -import json -import pickle -import sys -import traceback - -import pandas as pd - -from sklearn import tree - -# These are the paths to where SageMaker mounts interesting things in your container. - -prefix = '/opt/ml/' - -input_path = prefix + 'input/data' -output_path = os.path.join(prefix, 'output') -model_path = os.path.join(prefix, 'model') -param_path = os.path.join(prefix, 'input/config/hyperparameters.json') - -# This algorithm has a single channel of input data called 'training'. Since we run in -# File mode, the input files are copied to the directory specified here. -channel_name='training' -training_path = os.path.join(input_path, channel_name) - -# The function to execute the training. -def train(): - print('Starting the training.') - try: - # Read in any hyperparameters that the user passed with the training job - with open(param_path, 'r') as tc: - trainingParams = json.load(tc) - - # Take the set of files and read them all into a single pandas dataframe - input_files = [ os.path.join(training_path, file) for file in os.listdir(training_path) ] - if len(input_files) == 0: - raise ValueError(('There are no files in {}.\n' + - 'This usually indicates that the channel ({}) was incorrectly specified,\n' + - 'the data specification in S3 was incorrectly specified or the role specified\n' + - 'does not have permission to access the data.').format(training_path, channel_name)) - raw_data = [ pd.read_csv(file, header=None) for file in input_files ] - train_data = pd.concat(raw_data) - - # labels are in the first column - train_y = train_data.ix[:,0] - train_X = train_data.ix[:,1:] - - # Here we only support a single hyperparameter. Note that hyperparameters are always passed in as - # strings, so we need to do any necessary conversions. - max_leaf_nodes = trainingParams.get('max_leaf_nodes', None) - if max_leaf_nodes is not None: - max_leaf_nodes = int(max_leaf_nodes) - - # Now use scikit-learn's decision tree classifier to train the model. - clf = tree.DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes) - clf = clf.fit(train_X, train_y) - - # save the model - with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'w') as out: - pickle.dump(clf, out) - print('Training complete.') - except Exception as e: - # Write out an error file. This will be returned as the failureReason in the - # DescribeTrainingJob result. - trc = traceback.format_exc() - with open(os.path.join(output_path, 'failure'), 'w') as s: - s.write('Exception during training: ' + str(e) + '\n' + trc) - # Printing this causes the exception to be in the training job logs, as well. - print('Exception during training: ' + str(e) + '\n' + trc, file=sys.stderr) - # A non-zero exit code causes the training job to be marked as Failed. - sys.exit(255) - -if __name__ == '__main__': - train() - - # A zero exit code causes the job to be marked a Succeeded. - sys.exit(0) diff --git a/example/inference/decision_trees/wsgi.py b/example/inference/decision_trees/wsgi.py deleted file mode 100644 index 2884057..0000000 --- a/example/inference/decision_trees/wsgi.py +++ /dev/null @@ -1,7 +0,0 @@ -import predictor as myapp - -# This is just a simple wrapper for gunicorn to find your app. -# If you want to change the algorithm file, simply change "predictor" above to the -# new file. - -app = myapp.app diff --git a/example/train/Dockerfile b/example/train/Dockerfile deleted file mode 100644 index cf16121..0000000 --- a/example/train/Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -# Build an image that can do training and inference in SageMaker -# This is a Python 2 image that uses the nginx, gunicorn, flask stack -# for serving inferences in a stable way. - -FROM ubuntu:16.04 - -MAINTAINER Amazon AI - -RUN apt-get -y update && apt-get install -y \ - wget \ - python \ - nginx \ - ca-certificates \ - build-essential \ - python-dev \ - python3-dev \ - && rm -rf /var/lib/apt/lists/* - -# Here we get all python packages. -# There's substantial overlap between scipy and numpy that we eliminate by -# linking them together. Likewise, pip leaves the install caches populated which uses -# a significant amount of space. These optimizations save a fair amount of space in the -# image, which reduces start up time. -RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py && \ - pip install numpy==1.14.5 scipy scikit-learn pandas flask gevent gunicorn && \ - (cd /usr/local/lib/python2.7/dist-packages/scipy/.libs; rm *; ln ../../numpy/.libs/* .) && \ - rm -rf /root/.cache - -# Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard -# output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE -# keeps Python from writing the .pyc files which are unnecessary in this case. We also update -# PATH so that the train and serve programs are found when the container is invoked. - -ENV PYTHONUNBUFFERED=TRUE -ENV PYTHONDONTWRITEBYTECODE=TRUE -ENV PATH="/opt/program:${PATH}" - -# Set up the program in the image -COPY decision_trees /opt/program -WORKDIR /opt/program - diff --git a/example/train/decision_trees/nginx.conf b/example/train/decision_trees/nginx.conf deleted file mode 100644 index cb581ac..0000000 --- a/example/train/decision_trees/nginx.conf +++ /dev/null @@ -1,38 +0,0 @@ -worker_processes 1; -daemon off; # Prevent forking - - -pid /tmp/nginx.pid; -error_log /var/log/nginx/error.log; - -events { - # defaults -} - -http { - include /etc/nginx/mime.types; - default_type application/octet-stream; - access_log /var/log/nginx/access.log combined; - - upstream gunicorn { - server unix:/tmp/gunicorn.sock; - } - - server { - listen 8080 deferred; - client_max_body_size 5m; - - keepalive_timeout 5; - - location ~ ^/(ping|invocations) { - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header Host $http_host; - proxy_redirect off; - proxy_pass http://gunicorn; - } - - location / { - return 404 "{}"; - } - } -} diff --git a/example/train/decision_trees/predictor.py b/example/train/decision_trees/predictor.py deleted file mode 100644 index 3a2fbc1..0000000 --- a/example/train/decision_trees/predictor.py +++ /dev/null @@ -1,83 +0,0 @@ -# This is the file that implements a flask server to do inferences. It's the file that you will modify to -# implement the scoring for your own algorithm. - -from __future__ import print_function - -import os -import json -import pickle -import StringIO -import sys -import signal -import traceback - -import flask - -import pandas as pd - -prefix = '/opt/ml/' -model_path = os.path.join(prefix, 'model') - -# A singleton for holding the model. This simply loads the model and holds it. -# It has a predict function that does a prediction based on the model and the input data. - -class ScoringService(object): - model = None # Where we keep the model when it's loaded - - @classmethod - def get_model(cls): - """Get the model object for this instance, loading it if it's not already loaded.""" - if cls.model == None: - with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'r') as inp: - cls.model = pickle.load(inp) - return cls.model - - @classmethod - def predict(cls, input): - """For the input, do the predictions and return them. - - Args: - input (a pandas dataframe): The data on which to do the predictions. There will be - one prediction per row in the dataframe""" - clf = cls.get_model() - return clf.predict(input) - -# The flask app for serving predictions -app = flask.Flask(__name__) - -@app.route('/ping', methods=['GET']) -def ping(): - """Determine if the container is working and healthy. In this sample container, we declare - it healthy if we can load the model successfully.""" - health = ScoringService.get_model() is not None # You can insert a health check here - - status = 200 if health else 404 - return flask.Response(response='\n', status=status, mimetype='application/json') - -@app.route('/invocations', methods=['POST']) -def transformation(): - """Do an inference on a single batch of data. In this sample server, we take data as CSV, convert - it to a pandas data frame for internal use and then convert the predictions back to CSV (which really - just means one prediction per line, since there's a single column. - """ - data = None - - # Convert from CSV to pandas - if flask.request.content_type == 'text/csv': - data = flask.request.data.decode('utf-8') - s = StringIO.StringIO(data) - data = pd.read_csv(s, header=None) - else: - return flask.Response(response='This predictor only supports CSV data', status=415, mimetype='text/plain') - - print('Invoked with {} records'.format(data.shape[0])) - - # Do the prediction - predictions = ScoringService.predict(data) - - # Convert from numpy back to CSV - out = StringIO.StringIO() - pd.DataFrame({'results':predictions}).to_csv(out, header=False, index=False) - result = out.getvalue() - - return flask.Response(response=result, status=200, mimetype='text/csv') diff --git a/example/train/decision_trees/serve b/example/train/decision_trees/serve deleted file mode 100755 index 6747fbd..0000000 --- a/example/train/decision_trees/serve +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python - -# This file implements the scoring service shell. You don't necessarily need to modify it for various -# algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until -# gunicorn exits. -# -# The flask server is specified to be the app object in wsgi.py -# -# We set the following parameters: -# -# Parameter Environment Variable Default Value -# --------- -------------------- ------------- -# number of workers MODEL_SERVER_WORKERS the number of CPU cores -# timeout MODEL_SERVER_TIMEOUT 60 seconds - -from __future__ import print_function -import multiprocessing -import os -import signal -import subprocess -import sys - -cpu_count = multiprocessing.cpu_count() - -model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) -model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) - -def sigterm_handler(nginx_pid, gunicorn_pid): - try: - os.kill(nginx_pid, signal.SIGQUIT) - except OSError: - pass - try: - os.kill(gunicorn_pid, signal.SIGTERM) - except OSError: - pass - - sys.exit(0) - -def start_server(): - print('Starting the inference server with {} workers.'.format(model_server_workers)) - - - # link the log streams to stdout/err so they will be logged to the container logs - subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) - subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) - - nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) - gunicorn = subprocess.Popen(['gunicorn', - '--timeout', str(model_server_timeout), - '-k', 'gevent', - '-b', 'unix:/tmp/gunicorn.sock', - '-w', str(model_server_workers), - 'wsgi:app']) - - signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) - - # If either subprocess exits, so do we. - pids = set([nginx.pid, gunicorn.pid]) - while True: - pid, _ = os.wait() - if pid in pids: - break - - sigterm_handler(nginx.pid, gunicorn.pid) - print('Inference server exiting') - -# The main routine just invokes the start function. - -if __name__ == '__main__': - start_server() diff --git a/example/train/decision_trees/train b/example/train/decision_trees/train deleted file mode 100755 index f6a216b..0000000 --- a/example/train/decision_trees/train +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python - -# A sample training component that trains a simple scikit-learn decision tree model. -# This implementation works in File mode and makes no assumptions about the input file names. -# Input is specified as CSV with a data point in each row and the labels in the first column. - -from __future__ import print_function - -import os -import json -import pickle -import sys -import traceback - -import pandas as pd - -from sklearn import tree - -# These are the paths to where SageMaker mounts interesting things in your container. - -prefix = '/opt/ml/' - -input_path = prefix + 'input/data' -output_path = os.path.join(prefix, 'output') -model_path = os.path.join(prefix, 'model') -param_path = os.path.join(prefix, 'input/config/hyperparameters.json') - -# This algorithm has a single channel of input data called 'training'. Since we run in -# File mode, the input files are copied to the directory specified here. -channel_name='training' -training_path = os.path.join(input_path, channel_name) - -# The function to execute the training. -def train(): - print('Starting the training.') - try: - # Read in any hyperparameters that the user passed with the training job - with open(param_path, 'r') as tc: - trainingParams = json.load(tc) - - # Take the set of files and read them all into a single pandas dataframe - input_files = [ os.path.join(training_path, file) for file in os.listdir(training_path) ] - if len(input_files) == 0: - raise ValueError(('There are no files in {}.\n' + - 'This usually indicates that the channel ({}) was incorrectly specified,\n' + - 'the data specification in S3 was incorrectly specified or the role specified\n' + - 'does not have permission to access the data.').format(training_path, channel_name)) - raw_data = [ pd.read_csv(file, header=None) for file in input_files ] - train_data = pd.concat(raw_data) - - # labels are in the first column - train_y = train_data.ix[:,0] - train_X = train_data.ix[:,1:] - - # Here we only support a single hyperparameter. Note that hyperparameters are always passed in as - # strings, so we need to do any necessary conversions. - max_leaf_nodes = trainingParams.get('max_leaf_nodes', None) - if max_leaf_nodes is not None: - max_leaf_nodes = int(max_leaf_nodes) - - # Now use scikit-learn's decision tree classifier to train the model. - clf = tree.DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes) - clf = clf.fit(train_X, train_y) - - # save the model - with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'w') as out: - pickle.dump(clf, out) - print('Training complete.') - except Exception as e: - # Write out an error file. This will be returned as the failureReason in the - # DescribeTrainingJob result. - trc = traceback.format_exc() - with open(os.path.join(output_path, 'failure'), 'w') as s: - s.write('Exception during training: ' + str(e) + '\n' + trc) - # Printing this causes the exception to be in the training job logs, as well. - print('Exception during training: ' + str(e) + '\n' + trc, file=sys.stderr) - # A non-zero exit code causes the training job to be marked as Failed. - sys.exit(255) - -if __name__ == '__main__': - train() - - # A zero exit code causes the job to be marked a Succeeded. - sys.exit(0) diff --git a/example/train/decision_trees/wsgi.py b/example/train/decision_trees/wsgi.py deleted file mode 100644 index 2884057..0000000 --- a/example/train/decision_trees/wsgi.py +++ /dev/null @@ -1,7 +0,0 @@ -import predictor as myapp - -# This is just a simple wrapper for gunicorn to find your app. -# If you want to change the algorithm file, simply change "predictor" above to the -# new file. - -app = myapp.app diff --git a/sagebuild/SageMakerNotebook/index.js b/sagebuild/SageMakerNotebook/index.js index 48fa068..2504e85 100644 --- a/sagebuild/SageMakerNotebook/index.js +++ b/sagebuild/SageMakerNotebook/index.js @@ -81,7 +81,7 @@ module.exports={ },{ "Effect": "Allow", "Action": ["codecommit:*"], - "Resource":[{"Fn::GetAtt":["CodeRepo","Arn"]}] + "Resource":[{"Fn::GetAtt":["Variables","RepoArn"]}] },{ "Effect": "Allow", "Action": ["sagemaker:InvokeEndpoint"], diff --git a/sagebuild/SageMakerNotebook/notebooks/BYOD/bring_your_own_docker.ipynb b/sagebuild/SageMakerNotebook/notebooks/BYOD/bring_your_own_docker.ipynb deleted file mode 100644 index fcdfbf9..0000000 --- a/sagebuild/SageMakerNotebook/notebooks/BYOD/bring_your_own_docker.ipynb +++ /dev/null @@ -1,355 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# SageBuild Tutorial\n", - "\n", - "This notebook will walk you through on how to use Sagebuild to build and deploy custom models on-demand or in response to events. We will reuse the code from the \"scikit_bring_your_own\" example notebook.\n", - "\n", - "## Helpfull Links\n", - "* [Blog Post]() to see the details of how SageBuild works. \n", - "* [See here](/notebooks/sample-notebooks/advanced_functionality/scikit_bring_your_own/scikit_bring_your_own.ipynb) for details of how to write Dockerfiles for your own algorithms.\n", - "\n", - "## Table of Contents\n", - "1. [Setup](#SetUp)\n", - "2. [Deploy](#Deploy)\n", - "3. [Wait](#Wait)\n", - "4. [Use](#Use)\n", - "5. [Conclusion](#Conclusion)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## SetUp \n", - "The following sets up the packages and variables we need. Note, the region and StackName variables have been filled in for you by the cloudformation template." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import boto3\n", - "import json\n", - "from subprocess import check_output as run\n", - "from subprocess import STDOUT\n", - "from botocore.exceptions import ClientError\n", - "from time import sleep\n", - "import numpy as np\n", - "import pandas as pd\n", - "from io import StringIO\n", - "\n", - "cf = boto3.client('cloudformation')\n", - "sns = boto3.client('sns')\n", - "step = boto3.client('stepfunctions')\n", - "s3 = boto3.resource('s3')\n", - "ssm = boto3.client('ssm')\n", - "sagemaker = boto3.client('sagemaker-runtime')\n", - "Lambda=boto3.client('lambda')\n", - "\n", - "with open('../config.json') as json_file: \n", - " data = json.load(json_file)\n", - " \n", - "region=data['Region']\n", - "StackName=data['StackName']\n", - "data='iris.csv'\n", - "\n", - "#Get outputs from build stack\n", - "result=cf.describe_stacks(\n", - " StackName=StackName\n", - ")\n", - "#Put Outputs in a dict for easy use\n", - "outputs={}\n", - "for output in result['Stacks'][0]['Outputs']:\n", - " outputs[output['OutputKey']]=output['OutputValue']\n", - "print(\"Stack Outputs\")\n", - "print(json.dumps(outputs,indent=4))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We need to make sure the Sagebuild template is configured correctly for MXNET. the following code will set the stack configuration" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "params=result[\"Stacks\"][0][\"Parameters\"]\n", - "for n,i in enumerate(params):\n", - " if(i[\"ParameterKey\"]==\"ConfigFramework\"):\n", - " i[\"ParameterValue\"]=\"BYOD\" \n", - "\n", - "try:\n", - " cf.update_stack(\n", - " StackName=StackName,\n", - " UsePreviousTemplate=True,\n", - " Parameters=params,\n", - " Capabilities=[\n", - " 'CAPABILITY_NAMED_IAM',\n", - " ]\n", - " )\n", - " waiter = cf.get_waiter('stack_update_complete')\n", - " print(\"Waiting for stack update\")\n", - " waiter.wait(\n", - " StackName=StackName,\n", - " WaiterConfig={\n", - " 'Delay':10,\n", - " 'MaxAttempts':600\n", - " }\n", - " )\n", - "\n", - "except ClientError as e:\n", - " if(e.response[\"Error\"][\"Message\"]==\"No updates are to be performed.\"):\n", - " pass\n", - " else:\n", - " raise e\n", - "print(\"stack ready!\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## configuration\n", - "\n", - "Both the training-job and endpoint have various configuration parameters. The build generates those parameters by calling two lambda functions with the current build state and the parameters stored in SSM Parameter store. In the following we change the parameters in the Parameter store to match our build.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "store=outputs[\"ParameterStore\"]\n", - "result=ssm.get_parameter(Name=store)\n", - "\n", - "params=json.loads(result[\"Parameter\"][\"Value\"])\n", - "params[\"dockerfile_path_Training\"]=\"example/train\"\n", - "params[\"dockerfile_path_Inference\"]=\"example/inference\"\n", - "params[\"hyperparameters\"]={}\n", - "params[\"channels\"]={\n", - " \"training\":{\n", - " \"path\":\"training/iris\"\n", - " }\n", - "}\n", - "\n", - "ssm.put_parameter(\n", - " Name=store,\n", - " Type=\"String\",\n", - " Overwrite=True,\n", - " Value=json.dumps(params)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The follow shell commands will configure git to be able to access AWS CodeCommit and clone down the example repo. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#configure git to be able to access CodeCommit,uses SageMaker Instance's role for permissions.\n", - "!git config --global credential.helper '!aws codecommit credential-helper $@'\n", - "!git config --global credential.UseHttpPath true\n", - "\n", - "#clone down our example code\n", - "!git clone https://github.com/aws-samples/aws-sagemaker-build.git\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Deploy! \n", - "The following will \n", - "- add the CodeCommit repo created by the cloudformation template as a remote named deploy\n", - "- push example code to repo (will trigger a build)\n", - "- upload our data to the DataBucket created by the Cloudformation template (will trigger a build)\n", - "\n", - "Once a build has started no new build can be started till the first one finishes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#push our Dockerfile code to the \"deploy\" CodeCommit repo\n", - "run(\"cd aws-sagemaker-build && git remote add deploy {0}; git push deploy master\".format(outputs['RepoUrl']),\n", - " stderr=STDOUT,\n", - " shell=True) \n", - "print(\"code Pushed\")\n", - "\n", - "#upload the data to the DataBucket\n", - "object = s3.Object(outputs[\"DataBucket\"],'training/iris/data.csv')\n", - "object.upload_file(data) \n", - "print(\"data uploaded\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also trigger a build by publishing to the launch topic directly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "result=sns.publish(\n", - " TopicArn=outputs['LaunchTopic'],\n", - " Message=\"{}\" #message is not important, just publishing to topic starts build\n", - ")\n", - "print(\"message published\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Wait \n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can use the following code to get a notification " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "result=sns.subscribe(\n", - " TopicArn=outputs['TrainStatusTopic'],\n", - " Protocol=\"SMS\",\n", - " Endpoint=\"#-###-###-####\" #put your phone number here\n", - ")\n", - "print(\"subscribed to topic\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can get the status of StateMachine as it builds and deploys our custom model. We can then setup a some code to wait for our build to complete" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time \n", - "#list all executions for our StateMachine to get our current running one\n", - "result=step.list_executions(\n", - " stateMachineArn=outputs['StateMachine'],\n", - " statusFilter=\"RUNNING\"\n", - ")[\"executions\"]\n", - "print(result)\n", - "if len(result) > 0:\n", - " response = step.describe_execution(\n", - " executionArn=result[0]['executionArn']\n", - " )\n", - " status=response['status']\n", - " print(status,response['name'])\n", - " #poll status till execution finishes\n", - " while status == \"RUNNING\":\n", - " print('.',end=\"\")\n", - " sleep(5)\n", - " status=step.describe_execution(executionArn=result[0]['executionArn'])['status']\n", - " print()\n", - " print(status)\n", - "else:\n", - " print(\"no running tasks\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Use \n", - "Next we get some data and send to our newly deployed endpoint!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time \n", - "test_data=pd.read_csv(data, header=None).sample(10)\n", - "test_X=test_data.iloc[:,1:]\n", - "test_y=test_data.iloc[:,0]\n", - "\n", - "#convert test_X to csv\n", - "Body=str.encode(test_X.to_csv(header=False,index=False))\n", - "\n", - "result=sagemaker.invoke_endpoint(\n", - " EndpointName=outputs[\"SageMakerEndpoint\"],\n", - " Body=Body, \n", - " ContentType=\"text/csv\",\n", - " Accept=\"text/csv\"\n", - ")\n", - "\n", - "print(pd.read_csv(StringIO(result['Body'].read().decode('utf-8')),header=None))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Conclusion \n", - "\n", - "Hopefully SageBuild can help you develop and deploy SageMaker custom models faster and easier. If you have any problems please let us know in our github issues [here](https://github.com/aws-samples/aws-sagemaker-build/issues). Feel free to send us pull request too!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "conda_python3", - "language": "python", - "name": "conda_python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sagebuild/SageMakerNotebook/notebooks/BYOD/iris.csv b/sagebuild/SageMakerNotebook/notebooks/BYOD/iris.csv deleted file mode 100644 index 6abe4af..0000000 --- a/sagebuild/SageMakerNotebook/notebooks/BYOD/iris.csv +++ /dev/null @@ -1,150 +0,0 @@ -setosa,5.1,3.5,1.4,0.2 -setosa,4.9,3,1.4,0.2 -setosa,4.7,3.2,1.3,0.2 -setosa,4.6,3.1,1.5,0.2 -setosa,5,3.6,1.4,0.2 -setosa,5.4,3.9,1.7,0.4 -setosa,4.6,3.4,1.4,0.3 -setosa,5,3.4,1.5,0.2 -setosa,4.4,2.9,1.4,0.2 -setosa,4.9,3.1,1.5,0.1 -setosa,5.4,3.7,1.5,0.2 -setosa,4.8,3.4,1.6,0.2 -setosa,4.8,3,1.4,0.1 -setosa,4.3,3,1.1,0.1 -setosa,5.8,4,1.2,0.2 -setosa,5.7,4.4,1.5,0.4 -setosa,5.4,3.9,1.3,0.4 -setosa,5.1,3.5,1.4,0.3 -setosa,5.7,3.8,1.7,0.3 -setosa,5.1,3.8,1.5,0.3 -setosa,5.4,3.4,1.7,0.2 -setosa,5.1,3.7,1.5,0.4 -setosa,4.6,3.6,1,0.2 -setosa,5.1,3.3,1.7,0.5 -setosa,4.8,3.4,1.9,0.2 -setosa,5,3,1.6,0.2 -setosa,5,3.4,1.6,0.4 -setosa,5.2,3.5,1.5,0.2 -setosa,5.2,3.4,1.4,0.2 -setosa,4.7,3.2,1.6,0.2 -setosa,4.8,3.1,1.6,0.2 -setosa,5.4,3.4,1.5,0.4 -setosa,5.2,4.1,1.5,0.1 -setosa,5.5,4.2,1.4,0.2 -setosa,4.9,3.1,1.5,0.2 -setosa,5,3.2,1.2,0.2 -setosa,5.5,3.5,1.3,0.2 -setosa,4.9,3.6,1.4,0.1 -setosa,4.4,3,1.3,0.2 -setosa,5.1,3.4,1.5,0.2 -setosa,5,3.5,1.3,0.3 -setosa,4.5,2.3,1.3,0.3 -setosa,4.4,3.2,1.3,0.2 -setosa,5,3.5,1.6,0.6 -setosa,5.1,3.8,1.9,0.4 -setosa,4.8,3,1.4,0.3 -setosa,5.1,3.8,1.6,0.2 -setosa,4.6,3.2,1.4,0.2 -setosa,5.3,3.7,1.5,0.2 -setosa,5,3.3,1.4,0.2 -versicolor,7,3.2,4.7,1.4 -versicolor,6.4,3.2,4.5,1.5 -versicolor,6.9,3.1,4.9,1.5 -versicolor,5.5,2.3,4,1.3 -versicolor,6.5,2.8,4.6,1.5 -versicolor,5.7,2.8,4.5,1.3 -versicolor,6.3,3.3,4.7,1.6 -versicolor,4.9,2.4,3.3,1 -versicolor,6.6,2.9,4.6,1.3 -versicolor,5.2,2.7,3.9,1.4 -versicolor,5,2,3.5,1 -versicolor,5.9,3,4.2,1.5 -versicolor,6,2.2,4,1 -versicolor,6.1,2.9,4.7,1.4 -versicolor,5.6,2.9,3.6,1.3 -versicolor,6.7,3.1,4.4,1.4 -versicolor,5.6,3,4.5,1.5 -versicolor,5.8,2.7,4.1,1 -versicolor,6.2,2.2,4.5,1.5 -versicolor,5.6,2.5,3.9,1.1 -versicolor,5.9,3.2,4.8,1.8 -versicolor,6.1,2.8,4,1.3 -versicolor,6.3,2.5,4.9,1.5 -versicolor,6.1,2.8,4.7,1.2 -versicolor,6.4,2.9,4.3,1.3 -versicolor,6.6,3,4.4,1.4 -versicolor,6.8,2.8,4.8,1.4 -versicolor,6.7,3,5,1.7 -versicolor,6,2.9,4.5,1.5 -versicolor,5.7,2.6,3.5,1 -versicolor,5.5,2.4,3.8,1.1 -versicolor,5.5,2.4,3.7,1 -versicolor,5.8,2.7,3.9,1.2 -versicolor,6,2.7,5.1,1.6 -versicolor,5.4,3,4.5,1.5 -versicolor,6,3.4,4.5,1.6 -versicolor,6.7,3.1,4.7,1.5 -versicolor,6.3,2.3,4.4,1.3 -versicolor,5.6,3,4.1,1.3 -versicolor,5.5,2.5,4,1.3 -versicolor,5.5,2.6,4.4,1.2 -versicolor,6.1,3,4.6,1.4 -versicolor,5.8,2.6,4,1.2 -versicolor,5,2.3,3.3,1 -versicolor,5.6,2.7,4.2,1.3 -versicolor,5.7,3,4.2,1.2 -versicolor,5.7,2.9,4.2,1.3 -versicolor,6.2,2.9,4.3,1.3 -versicolor,5.1,2.5,3,1.1 -versicolor,5.7,2.8,4.1,1.3 -virginica,6.3,3.3,6,2.5 -virginica,5.8,2.7,5.1,1.9 -virginica,7.1,3,5.9,2.1 -virginica,6.3,2.9,5.6,1.8 -virginica,6.5,3,5.8,2.2 -virginica,7.6,3,6.6,2.1 -virginica,4.9,2.5,4.5,1.7 -virginica,7.3,2.9,6.3,1.8 -virginica,6.7,2.5,5.8,1.8 -virginica,7.2,3.6,6.1,2.5 -virginica,6.5,3.2,5.1,2 -virginica,6.4,2.7,5.3,1.9 -virginica,6.8,3,5.5,2.1 -virginica,5.7,2.5,5,2 -virginica,5.8,2.8,5.1,2.4 -virginica,6.4,3.2,5.3,2.3 -virginica,6.5,3,5.5,1.8 -virginica,7.7,3.8,6.7,2.2 -virginica,7.7,2.6,6.9,2.3 -virginica,6,2.2,5,1.5 -virginica,6.9,3.2,5.7,2.3 -virginica,5.6,2.8,4.9,2 -virginica,7.7,2.8,6.7,2 -virginica,6.3,2.7,4.9,1.8 -virginica,6.7,3.3,5.7,2.1 -virginica,7.2,3.2,6,1.8 -virginica,6.2,2.8,4.8,1.8 -virginica,6.1,3,4.9,1.8 -virginica,6.4,2.8,5.6,2.1 -virginica,7.2,3,5.8,1.6 -virginica,7.4,2.8,6.1,1.9 -virginica,7.9,3.8,6.4,2 -virginica,6.4,2.8,5.6,2.2 -virginica,6.3,2.8,5.1,1.5 -virginica,6.1,2.6,5.6,1.4 -virginica,7.7,3,6.1,2.3 -virginica,6.3,3.4,5.6,2.4 -virginica,6.4,3.1,5.5,1.8 -virginica,6,3,4.8,1.8 -virginica,6.9,3.1,5.4,2.1 -virginica,6.7,3.1,5.6,2.4 -virginica,6.9,3.1,5.1,2.3 -virginica,5.8,2.7,5.1,1.9 -virginica,6.8,3.2,5.9,2.3 -virginica,6.7,3.3,5.7,2.5 -virginica,6.7,3,5.2,2.3 -virginica,6.3,2.5,5,1.9 -virginica,6.5,3,5.2,2 -virginica,6.2,3.4,5.4,2.3 -virginica,5.9,3,5.1,1.8 diff --git a/sagebuild/SageMakerNotebook/notebooks/tensorflow/tensorflow_distributed_mnist.ipynb b/sagebuild/SageMakerNotebook/notebooks/tensorflow/tensorflow_distributed_mnist.ipynb index f5b0a7f..a21c5f3 100644 --- a/sagebuild/SageMakerNotebook/notebooks/tensorflow/tensorflow_distributed_mnist.ipynb +++ b/sagebuild/SageMakerNotebook/notebooks/tensorflow/tensorflow_distributed_mnist.ipynb @@ -314,7 +314,7 @@ " print(\"========================================\")\n", " label = np.argmax(mnist.test.labels[i])\n", " print(\"label is {}\".format(label))\n", - " prediction = predict_response['outputs']['classes']['int64Val'][0]\n", + " prediction = predict_response['outputs']['classes']['int64_val'][0]\n", " print(\"prediction is {}\".format(prediction))" ] }, diff --git a/sagebuild/alexa/index.js b/sagebuild/alexa/index.js index 9ae242d..a09bb9b 100644 --- a/sagebuild/alexa/index.js +++ b/sagebuild/alexa/index.js @@ -64,7 +64,7 @@ function lambda(name){ START_TOPIC:{"Ref":"LaunchTopic"} } }, - "MemorySize": "128", + "MemorySize": 128, "Role": {"Fn::GetAtt": ["AlexaLambdaRole","Arn"]}, "Runtime": "nodejs6.10", "Timeout": 60 diff --git a/sagebuild/bin/check.js b/sagebuild/bin/check.js index 1288304..d18fc22 100755 --- a/sagebuild/bin/check.js +++ b/sagebuild/bin/check.js @@ -16,6 +16,7 @@ async function run(){ var obj=require('../') var template=JSON.stringify(obj,null,2) + fs.writeFileSync(`${__dirname}/../build/template.json`,JSON.stringify(require('../'),null,2)) await s3.putObject({ Bucket:bucket, Key:"sagebuild.json", diff --git a/sagebuild/cfn/index.js b/sagebuild/cfn/index.js index 73c2f8c..e8de1a5 100644 --- a/sagebuild/cfn/index.js +++ b/sagebuild/cfn/index.js @@ -80,7 +80,7 @@ function lambda(name){ "ZipFile":result.code }, "Handler": "index.handler", - "MemorySize": "128", + "MemorySize": 128, "Role": {"Fn::GetAtt": ["CFNLambdaRole","Arn"]}, "Runtime": "nodejs6.10", "Timeout": 60 diff --git a/sagebuild/codebuild/index.js b/sagebuild/codebuild/index.js index 4cefb19..7438abb 100644 --- a/sagebuild/codebuild/index.js +++ b/sagebuild/codebuild/index.js @@ -17,7 +17,7 @@ module.exports={ "ServiceToken": { "Fn::GetAtt" : ["CodeCommitTriggerLambda", "Arn"] }, "repositoryName":{"Fn::GetAtt":["Variables","RepoName"]}, "trigger":{ - branches:{"Ref":"BranchBuildTrigger"}, + branches:[{"Ref":"BranchBuildTrigger"}], destinationArn:{"Ref":"LaunchTopic"}, events:["all"], name:{"Ref":"AWS::StackName"} diff --git a/sagebuild/index.js b/sagebuild/index.js index 2298c5e..136268a 100644 --- a/sagebuild/index.js +++ b/sagebuild/index.js @@ -23,6 +23,10 @@ module.exports={ ])), stateMachines.conditions, { + "BucketTrigger":{"Fn::And":[ + equal("BucketTriggerBuild","True"), + {"Condition":"CreateDataBucket"} + ]}, "ExternalHostingPolicy":notEmpty("ExternalHostingPolicy"), "ExternalTrainingPolicy":notEmpty("ExternalTrainingPolicy"), "ExternalEndpointConfigLambda":notEmpty("EndpointConfigLambda"), @@ -34,7 +38,7 @@ module.exports={ equal("ExternalCodeCommitRepo","CREATE_REPO"), equal("ExternalGithubRepo","USE_CODECOMMIT_REPO") ]}, - "CreateRepoTrigger":notEqual("ExternalGithubRepo","USE_CODECOMMIT_REPO"), + "CreateRepoTrigger":notEqual("BranchBuildTrigger","EMPTY"), "UseCodeBucket":notEmpty("ExternalCodeBucket"), "IsCodeCommitRepo":equal("ExternalGithubRepo","USE_CODECOMMIT_REPO"), "SubscribeToExternalTopic":notEqual("ExternalLaunchTopic","EMPTY"), @@ -109,6 +113,15 @@ module.exports={ {"Fn::Sub":"https://codecommit.us-east-1.amazonaws.com/v1/repos/${ExternalCodeCommitRepo}"} ]}, {"Ref":"ExternalGithubRepo"} + ]}, + "RepoArn":{"Fn::If":[ + "IsCodeCommitRepo", + {"Fn::If":[ + "CreateRepo", + {"Fn::GetAtt":["CodeRepo","Arn"]}, + {"Fn::Sub":"arn:aws:codecommit:${AWS::Region}:${AWS::AccountId}:${ExternalCodeCommitRepo}"} + ]}, + {"Ref":"AWS::NoValue"} ]} } }}, diff --git a/sagebuild/info/interface.js b/sagebuild/info/interface.js index dd56386..ca317ce 100644 --- a/sagebuild/info/interface.js +++ b/sagebuild/info/interface.js @@ -4,7 +4,7 @@ module.exports={ "Parameters":["ExternalTrainingPolicy","ExternalHostingPolicy","ConfigFramework","ConfigDeploy","Type","Parameters"] },{ "Label":{"default":"Data Bucket Configuration"}, - "Parameters":["ExternalDataBucket","ExternalLaunchTopic"] + "Parameters":["ExternalDataBucket","ExternalLaunchTopic","BucketTriggerBuild"] },{ "Label":{"default":"Repository Configuration"}, "Parameters":["BranchBuildTrigger","ExternalCodeCommitRepo","ExternalGithubRepo","ExternalCodeBucket"] @@ -16,6 +16,7 @@ module.exports={ "Parameters":["EndpointConfigLambda","TrainingConfigLambda","ModelConfigLambda"] }], "ParameterLabels":{ + "BucketTriggerBuild":{"default":"Data Bucket Trigger"}, "ExternalTrainingPolicy":{"default":"Additional Training IAM Policy"}, "ExternalHostingPolicy":{"default":"Additional Hosting IAM Policy"}, "ConfigFramework":{"default":"Configuration"}, diff --git a/sagebuild/info/parameters.js b/sagebuild/info/parameters.js index 65342ec..4f7ecb9 100644 --- a/sagebuild/info/parameters.js +++ b/sagebuild/info/parameters.js @@ -43,6 +43,12 @@ module.exports={ "Default":"BYOD", "AllowedValues":frameworkConfigs }, + "BucketTriggerBuild":{ + "Type":"String", + "Description":"If new data uploaded to data bucket should trigger a rebuild", + "Default":"False", + "AllowedValues":["True","False"] + }, "ConfigDeploy":{ "Type":"String", "Default":"SAGEMAKER", @@ -87,9 +93,9 @@ module.exports={ "Description":"(Optional) Http clone URL of a Github repository that contians Dockerfile code." }, "BranchBuildTrigger":{ - "Type":"CommaDelimitedList", - "Default":"master", - "Description":"(Optional) Comma seperated list of branchs in the code repository that trigger a build when changed" + "Type":"String", + "Default":"EMPTY", + "Description":"(Optional) branch in the code repository that triggers a build when changed, leave value to EMPTY to not create a build trigger" }, "EndpointConfigLambda":{ "Type":"String", diff --git a/sagebuild/step_function/index.js b/sagebuild/step_function/index.js index d351b76..1bee229 100644 --- a/sagebuild/step_function/index.js +++ b/sagebuild/step_function/index.js @@ -63,7 +63,7 @@ module.exports=Object.assign( }, "ParameterStoreOverride":{ "Type": "Custom::ParamterUpdate", - "DependsOn":["ParameterStore","CFNLambdaPolicy"], + "DependsOn":["CFNLambdaPolicy"], "Properties": { "ServiceToken": { "Fn::GetAtt" : ["ParameterUpdateLambda", "Arn"] }, "name":{"Ref":"ParameterStore"}, @@ -82,7 +82,7 @@ module.exports=Object.assign( "DependsOn":["CFNLambdaPolicy","EndpointConfigClear"], "Properties": { "ServiceToken": { "Fn::GetAtt" : ["SageMakerClearModelLambda", "Arn"] }, - "name":{"Fn::Sub":"${AWS::StackName}-"} + "name":{"Fn::Sub":"${AWS::StackName}"} } }, "EndpointConfigClear":{ @@ -90,7 +90,7 @@ module.exports=Object.assign( "DependsOn":["CFNLambdaPolicy","EndpointClear"], "Properties": { "ServiceToken": { "Fn::GetAtt" : ["SageMakerClearEndpointConfigLambda", "Arn"] }, - "name":{"Fn::Sub":"${AWS::StackName}-"} + "name":{"Fn::Sub":"${AWS::StackName}"} } }, "EndpointClear":{ diff --git a/sagebuild/step_function/lambdas/config/amazon/ModelConfig.js b/sagebuild/step_function/lambdas/config/amazon/ModelConfig.js index 74af747..2836615 100644 --- a/sagebuild/step_function/lambdas/config/amazon/ModelConfig.js +++ b/sagebuild/step_function/lambdas/config/amazon/ModelConfig.js @@ -5,14 +5,19 @@ var s3=new aws.S3() exports.handler=(event,context,callback)=>{ console.log("EVENT:",JSON.stringify(event,null,2)) - + if(event.status.training.ModelArtifacts){ + var ModelDataUrl=event.status.training.ModelArtifacts.S3ModelArtifacts + }else{ + var ModelDataUrl=`${event.status.training.TrainingJobDefinition.OutputDataConfig.S3OutputPath}/${event.status.training.BestTrainingJob.TrainingJobName}/output/model.tar.gz` + } + var key= `versions/inference/v${event.params.version}.py` callback(null,{ ExecutionRoleArn:event.params.modelrole, ModelName:`${event.params.name}-${event.params.id}`, PrimaryContainer:{ Image:create_image_uri(event.params), - ModelDataUrl:event.status.training.ModelArtifacts.S3ModelArtifacts, + ModelDataUrl, Environment:event.params.modelhostingenvironment || {} }, Tags:[{ diff --git a/sagebuild/step_function/lambdas/config/byod/ModelConfig.js b/sagebuild/step_function/lambdas/config/byod/ModelConfig.js index c933199..9493405 100644 --- a/sagebuild/step_function/lambdas/config/byod/ModelConfig.js +++ b/sagebuild/step_function/lambdas/config/byod/ModelConfig.js @@ -4,13 +4,21 @@ var sagemaker=new aws.SageMaker() exports.handler=(event,context,callback)=>{ console.log("EVENT:",JSON.stringify(event,null,2)) + if(event.params.ModelArtifacts){ + var ModelDataUrl=event.params.ModelArtifacts + }else if(event.status.training.ModelArtifacts){ + var ModelDataUrl=event.status.training.ModelArtifacts.S3ModelArtifacts + }else{ + var ModelDataUrl=`${event.status.training.TrainingJobDefinition.OutputDataConfig.S3OutputPath}/${event.status.training.BestTrainingJob.TrainingJobName}/output/model.tar.gz` + } + try{ callback(null,{ ExecutionRoleArn:event.params.modelrole, ModelName:`${event.params.name}-${event.params.id}`, PrimaryContainer:{ Image:event.params.InferenceImage || `${event.params.accountid}.dkr.ecr.${process.env.AWS_REGION}.amazonaws.com/${event.params.ecrrepo}:Inference_v${event.params.version}`, - ModelDataUrl:event.status.training.ModelArtifacts.S3ModelArtifacts, + ModelDataUrl, Environment:event.params.modelhostingenvironment }, Tags:[{ diff --git a/sagebuild/step_function/lambdas/config/index.js b/sagebuild/step_function/lambdas/config/index.js index b6c5215..194b704 100644 --- a/sagebuild/step_function/lambdas/config/index.js +++ b/sagebuild/step_function/lambdas/config/index.js @@ -21,7 +21,7 @@ function lambda(name){ "ZipFile":info.code }, "Handler":"index.handler", - "MemorySize": "128", + "MemorySize": 128, "Role": {"Fn::GetAtt": ["StepLambdaRole","Arn"]}, "Runtime":info.runtime, "Timeout": 60 diff --git a/sagebuild/step_function/lambdas/config/tensorflow/ModelConfig.js b/sagebuild/step_function/lambdas/config/tensorflow/ModelConfig.js index 3341bcd..930b2fa 100644 --- a/sagebuild/step_function/lambdas/config/tensorflow/ModelConfig.js +++ b/sagebuild/step_function/lambdas/config/tensorflow/ModelConfig.js @@ -5,9 +5,12 @@ var s3=new aws.S3() exports.handler=(event,context,callback)=>{ console.log("EVENT:",JSON.stringify(event,null,2)) - var ModelDataUrl=event.status.training.ModelArtifacts.S3ModelArtifacts || - `${event.status.training.TrainingJobDefinition.OutputDataConfig.S3OutputPath}/${event.status.training.BestTrainingJob.TrainingJobName}/output/model.tar.gz` - + if(event.status.training.ModelArtifacts){ + var ModelDataUrl=event.status.training.ModelArtifacts.S3ModelArtifacts + }else{ + var ModelDataUrl=`${event.status.training.TrainingJobDefinition.OutputDataConfig.S3OutputPath}/${event.status.training.BestTrainingJob.TrainingJobName}/output/model.tar.gz` + } + var key= `versions/inference/v${event.params.version}.py` s3.copyObject({ CopySource:event.params.hostsourcefile.match(/s3:\/\/(.*)/)[1], diff --git a/sagebuild/step_function/lambdas/core/index.js b/sagebuild/step_function/lambdas/core/index.js index 5568e2f..be6395e 100644 --- a/sagebuild/step_function/lambdas/core/index.js +++ b/sagebuild/step_function/lambdas/core/index.js @@ -27,7 +27,7 @@ function lambda(name){ "ZipFile":info.code }, "Handler":"index.handler", - "MemorySize": "128", + "MemorySize": 128, "Role": {"Fn::GetAtt": ["StepLambdaRole","Arn"]}, "Runtime": info.runtime, "Timeout": 60 diff --git a/sagebuild/step_function/lambdas/core/sagemaker/HPOStatus.js b/sagebuild/step_function/lambdas/core/sagemaker/HPOStatus.js index 558c8b1..57f1909 100644 --- a/sagebuild/step_function/lambdas/core/sagemaker/HPOStatus.js +++ b/sagebuild/step_function/lambdas/core/sagemaker/HPOStatus.js @@ -4,9 +4,9 @@ var sagemaker=new aws.SageMaker() exports.handler=(event,context,cb)=>{ console.log("EVENT:",JSON.stringify(event,null,2)) - + sagemaker.describeHyperParameterTuningJob({ - HyperParameterTuningJobName:event.args.training.TrainingJobName.slice(0,32) + HyperParameterTuningJobName:event.outputs.training.HyperParameterTuningJobArn.split('/')[1] }).promise() .then(result=>{ cb(null,result) diff --git a/sagebuild/step_function/lambdas/core/sagemaker/StartHPO.js b/sagebuild/step_function/lambdas/core/sagemaker/StartHPO.js index 33f4a72..dcad8ef 100644 --- a/sagebuild/step_function/lambdas/core/sagemaker/StartHPO.js +++ b/sagebuild/step_function/lambdas/core/sagemaker/StartHPO.js @@ -1,9 +1,11 @@ var aws=require('aws-sdk') aws.config.region=process.env.AWS_REGION +var crypto = require('crypto'); var sagemaker=new aws.SageMaker() exports.handler=(event,context,cb)=>{ console.log("EVENT:",JSON.stringify(event,null,2)) + var shasum = crypto.createHash('sha1'); try{ event.args.training.Tags=event.args.training.Tags || [] event.args.training.Tags.push({ @@ -44,6 +46,8 @@ exports.handler=(event,context,cb)=>{ hyperParams.static[key]=val } }) + shasum.update(old_args.TrainingJobName); + args={ HyperParameterTuningJobConfig:{ HyperParameterTuningJobObjective:{ @@ -61,15 +65,11 @@ exports.handler=(event,context,cb)=>{ }, Strategy:"Bayesian" }, - HyperParameterTuningJobName:old_args.TrainingJobName.slice(0,32), + HyperParameterTuningJobName:shasum.digest('hex').slice(0,32), TrainingJobDefinition:{ AlgorithmSpecification:{ TrainingImage:old_args.AlgorithmSpecification.TrainingImage, - TrainingInputMode:old_args.AlgorithmSpecification.TrainingInputMode, - MetricDefinitions:[{ - Name:event.params.tuningobjective.Name, - Regex:event.params.tuningobjective.Regex - }] + TrainingInputMode:old_args.AlgorithmSpecification.TrainingInputMode }, InputDataConfig:old_args.InputDataConfig, OutputDataConfig:old_args.OutputDataConfig, @@ -80,6 +80,12 @@ exports.handler=(event,context,cb)=>{ }, Tags:old_args.Tags } + if(event.params.tuningobjective.Regex){ + args.TrainingJobDefinition.AlgorithmSpecification.MetricDefinitions=[{ + Name:event.params.tuningobjective.Name, + Regex:event.params.tuningobjective.Regex + }] + } console.log(JSON.stringify(args,null,2)) sagemaker.createHyperParameterTuningJob(args).promise() .then(result=>cb(null,result)) diff --git a/sagebuild/step_function/launch/index.js b/sagebuild/step_function/launch/index.js index d724bc1..3e3c7d4 100644 --- a/sagebuild/step_function/launch/index.js +++ b/sagebuild/step_function/launch/index.js @@ -83,7 +83,7 @@ module.exports={ "Action" : "sns:Publish", "Resource" : { "Ref" : "LaunchTopic" }, "Condition":{ - ArnLike:{"AWS:SourceArn":{"Fn::GetAtt":["CodeRepo","Arn"]}} + ArnLike:{"AWS:SourceArn":{"Fn::GetAtt":["Variables","RepoArn"]}} } }] }, @@ -93,7 +93,7 @@ module.exports={ "DataBucketSNS":{ "Type": "Custom::S3Notification", "DependsOn":["CFNLambdaPolicy","LaunchTopicPolicy"], - Condition:"CreateDataBucket", + Condition:"BucketTrigger", "Properties": { "ServiceToken": { "Fn::GetAtt" : ["S3NotificationLambda", "Arn"] }, "Bucket":{"Fn::GetAtt":["Variables","DataBucket"]}, @@ -121,7 +121,7 @@ module.exports={ } }, "Handler": "index.handler", - "MemorySize": "128", + "MemorySize": 128, "Role": {"Fn::GetAtt": ["LaunchLambdaRole","Arn"]}, "Runtime": "nodejs6.10", "Timeout": 60 @@ -139,7 +139,7 @@ module.exports={ } }, "Handler": "index.handler", - "MemorySize": "128", + "MemorySize": 128, "Role": {"Fn::GetAtt": ["RollbackLambdaRole","Arn"]}, "Runtime": "nodejs6.10", "Timeout": 60 diff --git a/sagebuild/step_function/stateMachines/DockerTrainDeploy/train.js b/sagebuild/step_function/stateMachines/DockerTrainDeploy/train.js index ddfe801..fda7395 100644 --- a/sagebuild/step_function/stateMachines/DockerTrainDeploy/train.js +++ b/sagebuild/step_function/stateMachines/DockerTrainDeploy/train.js @@ -15,6 +15,10 @@ module.exports=Object.assign( },{ Variable:`$.params.train`, BooleanEquals:false, + Next:`getModelConfig` + },{ + Variable:`$.params.skiptrain`, + BooleanEquals:true, Next:`getArtifact` }], Default:`getTrainingConfig`