Skip to content
Permalink
Browse files

Deployment improvement - Add gunicorn worker per instance option for …

…sagemaker (#457)

* add gunicorn worker count per instance for sagemaker deployment

* write out each py file instad of template string

* Add comments for deployment proto

* Add additional unit tests

* Move sagemaker default values to CLI

* update failed tests

* fix test
  • Loading branch information
yubozhao authored and parano committed Jan 4, 2020
1 parent 2637d92 commit 654fe7da45e6d1892f2d4ed6a36edcac7ff5af9b
@@ -54,6 +54,10 @@
logger = logging.getLogger(__name__)


DEFAULT_SAGEMAKER_INSTANCE_TYPE = 'ml.m4.xlarge'
DEFAULT_SAGEMAKER_INSTANCE_COUNT = 1


def parse_key_value_pairs(key_value_pairs_str):
result = {}
if key_value_pairs_str:
@@ -210,18 +214,28 @@ def deployment():
@click.option(
'--instance-type',
help='Type of instance will be used for inference. Option applicable to '
'platform: AWS SageMaker, AWS Lambda',
'platform: AWS SageMaker. Default to "m1.m4.xlarge"',
type=click.STRING,
default=DEFAULT_SAGEMAKER_INSTANCE_TYPE,
)
@click.option(
'--instance-count',
help='Number of instance will be used. Option applicable to platform: AWS '
'SageMaker',
'SageMaker. Default value is 1',
type=click.INT,
default=DEFAULT_SAGEMAKER_INSTANCE_COUNT,
)
@click.option(
'--num-of-gunicorn-workers-per-instance',
help='Number of gunicorn worker will be used per instance. Option applicable '
'to platform: AWS SageMaker. Default value for gunicorn worker is based on '
'the instance\' cpu core counts. The formula is num_of_cpu/2 + 1',
type=click.INT,
)
@click.option(
'--api-name',
help='User defined API function will be used for inference. Option applicable'
'to platform: AWS SageMaker',
help='User defined API function will be used for inference. Required for AWS '
'SageMaker',
)
@click.option(
'--kube-namespace',
@@ -275,6 +289,7 @@ def create(
region,
instance_type,
instance_count,
num_of_gunicorn_workers_per_instance,
api_name,
kube_namespace,
replicas,
@@ -292,6 +307,7 @@ def create(
'region': region,
'instance_type': instance_type,
'instance_count': instance_count,
'num_of_gunicorn_workers_per_instance': num_of_gunicorn_workers_per_instance, # noqa E501
'api_name': api_name,
'kube_namespace': kube_namespace,
'replicas': replicas,
@@ -82,7 +82,3 @@ default_region = us-west-2
[google-cloud]
default_region = us-west2

[sagemaker]
default_instance_type = ml.m4.xlarge
default_instance_count = 1

@@ -44,11 +44,6 @@
BentoMLException,
AWSServiceError,
)
from bentoml.deployment.sagemaker.templates import (
DEFAULT_NGINX_CONFIG,
DEFAULT_WSGI_PY,
DEFAULT_SERVE_SCRIPT,
)
from bentoml.deployment.operator import DeploymentOperatorBase
from bentoml.proto.deployment_pb2 import (
ApplyDeploymentResponse,
@@ -352,12 +347,15 @@ def _init_sagemaker_project(sagemaker_project_dir, bento_path):

with open(os.path.join(sagemaker_project_dir, 'Dockerfile-sagemaker'), "w") as f:
f.write(BENTO_SERVICE_SAGEMAKER_DOCKERFILE)
with open(os.path.join(sagemaker_project_dir, "nginx.conf"), "w") as f:
f.write(DEFAULT_NGINX_CONFIG)
with open(os.path.join(sagemaker_project_dir, "wsgi.py"), "w") as f:
f.write(DEFAULT_WSGI_PY)
with open(os.path.join(sagemaker_project_dir, "serve"), "w") as f:
f.write(DEFAULT_SERVE_SCRIPT)

nginx_conf_path = os.path.join(os.path.dirname(__file__), 'sagemaker_nginx.conf')
shutil.copy(nginx_conf_path, os.path.join(sagemaker_project_dir, 'nginx.conf'))

wsgi_py_path = os.path.join(os.path.dirname(__file__), 'sagemaker_wsgi.py')
shutil.copy(wsgi_py_path, os.path.join(sagemaker_project_dir, 'wsgi.py'))

serve_file_path = os.path.join(os.path.dirname(__file__), 'sagemaker_serve.py')
shutil.copy(serve_file_path, os.path.join(sagemaker_project_dir, 'serve'))

# permission 755 is required for entry script 'serve'
permission = "755"
@@ -367,7 +365,7 @@ def _init_sagemaker_project(sagemaker_project_dir, bento_path):


def _create_sagemaker_model(
sagemaker_client, sagemaker_model_name, ecr_image_path, bento_service_api_name
sagemaker_client, sagemaker_model_name, ecr_image_path, spec
):
execution_role_arn = get_arn_role_from_current_aws_user()

@@ -377,21 +375,20 @@ def _create_sagemaker_model(
"ContainerHostname": sagemaker_model_name,
"Image": ecr_image_path,
"Environment": {
"API_NAME": bento_service_api_name,
"API_NAME": spec.api_name,
"BENTO_SERVER_TIMEOUT": config().get('apiserver', 'default_timeout'),
},
},
"ExecutionRoleArn": execution_role_arn,
}
default_worker_count = config().getint(
'apiserver', 'default_gunicorn_workers_count'
)
if default_worker_count > 0:

# Will set envvar, if user defined gunicorn workers per instance. EnvVar needs
# to be string instead of the int.
if spec.num_of_gunicorn_workers_per_instance:
sagemaker_model_info['PrimaryContainer']['Environment'][
'BENTO_SERVER_WORKERS'
] = default_worker_count
'GUNICORN_WORKER_COUNT'
] = str(spec.num_of_gunicorn_workers_per_instance)

logger.debug("Creating sagemaker model %s", sagemaker_model_name)
try:
create_model_response = sagemaker_client.create_model(**sagemaker_model_info)
except ClientError as e:
@@ -504,10 +501,7 @@ def _add(self, deployment_pb, bento_pb, bento_path):
) = _get_sagemaker_resource_names(deployment_pb)

_create_sagemaker_model(
sagemaker_client,
sagemaker_model_name,
ecr_image_path,
sagemaker_config.api_name,
sagemaker_client, sagemaker_model_name, ecr_image_path, sagemaker_config
)
_create_sagemaker_endpoint_config(
sagemaker_client,
@@ -0,0 +1,38 @@
worker_processes 1;
daemon off; # Prevent forking

pid /tmp/nginx.pid;
error_log /var/log/nginx/error.log;

events {
# defaults
}

http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
access_log /var/log/nginx/access.log combined;

upstream gunicorn {
server unix:/tmp/gunicorn.sock;
}

server {
listen 8080 deferred;
client_max_body_size 500m;

keepalive_timeout 5;
proxy_read_timeout 1200s;

location ~ ^/(ping|invocations) {
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $http_host;
proxy_redirect off;
proxy_pass http://gunicorn;
}

location / {
return 404 "{}";
}
}
}
@@ -0,0 +1,69 @@
#!/usr/bin/env python

# This implement the sagemaker serving service shell. It starts nginx and gunicorn.
# Parameter Env Var Default Value
# number of workers BENTO_SERVER_TIMEOUT 60s
# timeout GUNICORN_WORKER_COUNT number of cpu cores / 2 + 1
# api name API_NAME None

import subprocess
import os
import signal
import sys

from bentoml.server.utils import get_gunicorn_num_of_workers

bento_server_timeout = os.environ.get('BENTO_SERVER_TIMEOUT', 60)
bento_server_workers = int(
os.environ.get('GUNICORN_WORKER_COUNT', get_gunicorn_num_of_workers())
)


def sigterm_handler(nginx_pid, gunicorn_pid):
try:
os.kill(nginx_pid, signal.SIGQUIT)
except OSError:
pass
try:
os.kill(gunicorn_pid, signal.SIGTERM)
except OSError:
pass

sys.exit(0)


def _serve():
# link the log streams to stdout/err so they will be logged to the container logs
subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log'])
subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log'])

nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf'])
gunicorn_app = subprocess.Popen(
[
'gunicorn',
'--timeout',
str(bento_server_timeout),
'-k',
'gevent',
'-b',
'unix:/tmp/gunicorn.sock',
'-w',
str(bento_server_workers),
'wsgi:app',
]
)
signal.signal(
signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn_app.pid)
)

pids = set([nginx.pid, gunicorn_app.pid])
while True:
pid, _ = os.wait()
if pid in pids:
break
sigterm_handler(nginx.pid, gunicorn_app.pid)
print('Inference server exiting')


if __name__ == '__main__':
_serve()
@@ -0,0 +1,23 @@
# Copyright 2019 Atalaya Tech, Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from bentoml.bundler import load
from bentoml.server.bento_sagemaker_server import BentoSagemakerServer

api_name = os.environ.get('API_NAME', None)
model_service = load('/opt/program')
server = BentoSagemakerServer(model_service, api_name)
app = server.app

0 comments on commit 654fe7d

Please sign in to comment.
You can’t perform that action at this time.