Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement deployment update for sagemaker #461

Merged
merged 21 commits into from
Jan 9, 2020
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions bentoml/cli/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
get_deployment,
describe_deployment,
list_deployments,
update_deployment,
)
from bentoml.yatai import get_yatai_service

Expand Down Expand Up @@ -346,6 +347,7 @@ def create(
),
CLI_COLOR_ERROR,
)
return
else:
if wait:
result_state = get_state_after_await_action_complete(
Expand All @@ -372,6 +374,133 @@ def create(
_echo('Successfully created deployment {}'.format(name), CLI_COLOR_SUCCESS)
_print_deployment_info(result.deployment, output)

@deployment.command(help='Update existing deployment')
@click.argument("name", type=click.STRING, required=True)
@click.option(
'-n',
'--namespace',
type=click.STRING,
help='Deployment namespace managed by BentoML, default value is "default" which'
'can be changed in BentoML configuration file',
)
@click.option(
'-b',
'--bento',
'--bento-service-bundle',
type=click.STRING,
callback=parse_bento_tag_callback,
help='Target BentoService to be deployed, referenced by its name and version '
'in format of name:version. For example: "iris_classifier:v1.2.0"',
)
@click.option(
'--instance-type',
help='Type of instance will be used for inference. Option applicable to '
'platform: AWS SageMaker. Default to "m1.m4.xlarge"',
type=click.STRING,
)
@click.option(
'--instance-count',
help='Number of instance will be used. Option applicable to platform: AWS '
'SageMaker. Default value is 1',
type=click.INT,
)
@click.option(
'--num-of-gunicorn-workers-per-instance',
help='Number of gunicorn worker will be used per instance. Option applicable '
'to platform: AWS SageMaker. Default value for gunicorn worker is '
'based on the instance\' cpu core counts. The formula is num_of_cpu/2 + 1',
type=click.INT,
)
@click.option(
'--api-name',
help='User defined API function will be used for inference. Required for AWS '
'SageMaker',
)
@click.option('-o', '--output', type=click.Choice(['json', 'yaml']), default='json')
@click.option(
'--wait/--no-wait',
default=True,
help='Wait for apply action to complete or encounter an error.'
'If set to no-wait, CLI will return immediately. The default value is wait',
)
def update(
name,
namespace,
bento,
instance_type,
instance_count,
num_of_gunicorn_workers_per_instance,
api_name,
output,
wait,
):
yatai_service = get_yatai_service()
get_deployment_result = get_deployment(namespace, name, yatai_service)
if get_deployment_result.status.status_code != status_pb2.Status.OK:
get_deployment_status = get_deployment_result.status
_echo(
f'Failed to updated deployment {name}.'
f'{status_pb2.Status.Code.Name(get_deployment_status.status_code)}'
f':{get_deployment_status.error_message}',
CLI_COLOR_ERROR,
)
return
current_deployment = get_deployment_result.deployment
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this logic should not be in CLI, move them to yataiservice python_api

track_cli(
'deploy-update',
deploy_platform=DeploymentSpec.DeploymentOperator.Name(
current_deployment.spec.operator
),
)
bento_name, bento_version = bento.split(':')
updated_spec = {
'bento_name': bento_name,
'bento_version': bento_version,
'instance_type': instance_type,
'instance_count': instance_count,
'num_of_gunicorn_workers_per_instance': num_of_gunicorn_workers_per_instance, # noqa E501
'api_name': api_name,
}
try:
result = update_deployment(current_deployment, updated_spec, yatai_service)
except BentoMLException as e:
_echo(f'Failed to update deployment {name}: {str(e)}', CLI_COLOR_ERROR)
return
if result.status.status_code != status_pb2.Status.OK:
update_deployment_status = result.status
_echo(
f'Failed to update deployment {name}. '
f'{status_pb2.Status.Code.Name(update_deployment_status.status_code)}:'
f'{update_deployment_status.error_message}',
CLI_COLOR_ERROR,
)
return
else:
if wait:
result_state = get_state_after_await_action_complete(
yatai_service=yatai_service,
name=name,
namespace=namespace,
message='Updating deployment',
)
if result_state.status.status_code != status_pb2.Status.OK:
describe_deployment_status = result_state.status
_echo(
f'Updated deployment {name}. Failed to retrieve latest status. '
f'{status_pb2.Status.Code.Name(describe_deployment_status.status_code)}:' # noqa E501
f'{describe_deployment_status.error_message}'
)
return
result.deployment.state.CopyFrom(result_state.state)
track_cli(
'deploy-update-success',
deploy_platform=DeploymentSpec.DeploymentOperator.Name(
current_deployment.spec.operator
),
)
_echo(f'Successfully updated deployment {name}', CLI_COLOR_SUCCESS)
_print_deployment_info(result.deployment, output)

@deployment.command(help='Apply model service deployment from yaml file')
@click.option(
'-f',
Expand Down
2 changes: 1 addition & 1 deletion bentoml/deployment/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def add(self, deployment_pb):
"""

@abstractmethod
def update(self, deployment_pb):
def update(self, deployment_pb, previous_deployment):
"""
Update existing deployment based on deployment_pb spec
"""
Expand Down
172 changes: 167 additions & 5 deletions bentoml/deployment/sagemaker/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,19 @@ def _create_sagemaker_endpoint(sagemaker_client, endpoint_name, endpoint_config_
)


def _update_sagemaker_endpoint(sagemaker_client, endpoint_name, endpoint_config_name):
try:
logger.debug("Updating sagemaker endpoint %s", endpoint_name)
update_endpoint_response = sagemaker_client.update_endpoint(
EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name
)
logger.debug("AWS update endpoint response: %s", str(update_endpoint_response))
except ClientError as e:
raise _aws_client_error_to_bentoml_exception(
e, "Failed to update sagemaker endpoint"
)


class SageMakerDeploymentOperator(DeploymentOperatorBase):
def add(self, deployment_pb):
try:
Expand Down Expand Up @@ -526,11 +539,160 @@ def _add(self, deployment_pb, bento_pb, bento_path):

return ApplyDeploymentResponse(status=Status.OK(), deployment=deployment_pb)

def update(self, deployment_pb):
raise NotImplementedError(
"Updating AWS SageMaker deployment is not supported in current version of "
"BentoML"
)
def update(self, deployment_pb, previous_deployment):
try:
ensure_docker_available_or_raise()
deployment_spec = deployment_pb.spec
bento_pb = self.yatai_service.GetBento(
GetBentoRequest(
bento_name=deployment_spec.bento_name,
bento_version=deployment_spec.bento_version,
)
)
if bento_pb.bento.uri.type not in (BentoUri.LOCAL, BentoUri.S3):
raise BentoMLException(
'BentoML currently not support {} repository'.format(
BentoUri.StorageType.Name(bento_pb.bento.uri.type)
)
)
return self._update(
deployment_pb, previous_deployment, bento_pb, bento_pb.bento.uri.uri
)
except BentoMLException as error:
deployment_pb.state.state = DeploymentState.ERROR
deployment_pb.state.error_message = (
f'Error updating SageMaker deployment: {str(error)}'
)
return ApplyDeploymentResponse(
status=error.status_proto, deployment=deployment_pb
)

def _update(self, deployment_pb, current_deployment, bento_pb, bento_path):
if loader._is_remote_path(bento_path):
with loader._resolve_remote_bundle_path(bento_path) as local_path:
return self._update(
deployment_pb, current_deployment, bento_pb, local_path
)
updated_deployment_spec = deployment_pb.spec
updated_sagemaker_config = updated_deployment_spec.sagemaker_operator_config
sagemaker_client = boto3.client('sagemaker', updated_sagemaker_config.region)

try:
raise_if_api_names_not_found_in_bento_service_metadata(
bento_pb.bento.bento_service_metadata,
[updated_sagemaker_config.api_name],
)
describe_latest_deployment_state = self.describe(deployment_pb)
current_deployment_spec = current_deployment.spec
current_sagemaker_config = current_deployment_spec.sagemaker_operator_config
latest_deployment_state = json.loads(
describe_latest_deployment_state.state.info_json
)

current_ecr_image_tag = latest_deployment_state['ProductionVariants'][0][
'DeployedImages'
][0]['SpecifiedImage']
if (
updated_deployment_spec.bento_name != current_deployment_spec.bento_name
or updated_deployment_spec.bento_version
!= current_deployment_spec.bento_version
):
logger.debug(
'BentoService tag is different from current deployment, '
'creating new docker image and push to ECR'
)
with TempDirectory() as temp_dir:
sagemaker_project_dir = os.path.join(
temp_dir, updated_deployment_spec.bento_name
)
_init_sagemaker_project(sagemaker_project_dir, bento_path)
ecr_image_path = create_and_push_docker_image_to_ecr(
updated_sagemaker_config.region,
updated_deployment_spec.bento_name,
updated_deployment_spec.bento_version,
sagemaker_project_dir,
)
else:
logger.debug('Using existing ECR image for Sagemaker model')
ecr_image_path = current_ecr_image_tag

(
updated_sagemaker_model_name,
updated_sagemaker_endpoint_config_name,
sagemaker_endpoint_name,
) = _get_sagemaker_resource_names(deployment_pb)
(
current_sagemaker_model_name,
current_sagemaker_endpoint_config_name,
_,
) = _get_sagemaker_resource_names(current_deployment)

if (
updated_sagemaker_config.api_name != current_sagemaker_config.api_name
or updated_sagemaker_config.num_of_gunicorn_workers_per_instance
!= current_sagemaker_config.num_of_gunicorn_workers_per_instance
or ecr_image_path != current_ecr_image_tag
):
logger.debug(
'Sagemaker model requires update. Delete current sagemaker model %s'
'and creating new model %s',
current_sagemaker_model_name,
updated_sagemaker_model_name,
)
_delete_sagemaker_model_if_exist(
sagemaker_client, current_sagemaker_model_name
)
_create_sagemaker_model(
sagemaker_client,
updated_sagemaker_model_name,
ecr_image_path,
updated_sagemaker_config,
)
# When bento service tag is not changed, we need to delete the current
# endpoint configuration in order to create new one to avoid name collation
if (
current_sagemaker_endpoint_config_name
== updated_sagemaker_endpoint_config_name
):
logger.debug(
'Current sagemaker config name %s is same as updated one, '
'delete it before create new endpoint config',
current_sagemaker_endpoint_config_name,
)
_delete_sagemaker_endpoint_config_if_exist(
sagemaker_client, current_sagemaker_endpoint_config_name
)
logger.debug(
'Create new endpoint configuration %s',
updated_sagemaker_endpoint_config_name,
)
_create_sagemaker_endpoint_config(
sagemaker_client,
updated_sagemaker_model_name,
updated_sagemaker_endpoint_config_name,
updated_sagemaker_config,
)
logger.debug(
'Updating endpoint to new endpoint configuration %s',
updated_sagemaker_endpoint_config_name,
)
_update_sagemaker_endpoint(
sagemaker_client,
sagemaker_endpoint_name,
updated_sagemaker_endpoint_config_name,
)
logger.debug(
'Delete old sagemaker endpoint config %s',
current_sagemaker_endpoint_config_name,
)
_delete_sagemaker_endpoint_config_if_exist(
sagemaker_client, current_sagemaker_endpoint_config_name
)
except AWSServiceError as e:
_try_clean_up_sagemaker_deployment_resource(deployment_pb)
raise e

return ApplyDeploymentResponse(status=Status.OK(), deployment=deployment_pb)

def delete(self, deployment_pb):
try:
Expand Down
7 changes: 5 additions & 2 deletions bentoml/utils/validator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@
from cerberus import Validator

from bentoml.utils import ProtoMessageToDict
from bentoml.proto.deployment_pb2 import DeploymentSpec
from bentoml.proto.deployment_pb2 import DeploymentSpec, DeploymentState

deployment_schema = {
'name': {'type': 'string', 'required': True, 'minlength': 4},
'namespace': {'type': 'string', 'required': True, 'minlength': 3},
'labels': {'type': 'dict', 'allow_unknown': True},
'annotations': {'type': 'dict', 'allow_unknown': True},
'created_at': {'type': 'string'},
'last_updated_at': {'type': 'string'},
'spec': {
'type': 'dict',
'required': True,
Expand Down Expand Up @@ -80,9 +82,10 @@
'state': {
'type': 'dict',
'schema': {
'state': {'type': 'integer'},
'state': {'type': 'string', 'allowed': DeploymentState.State.keys()},
'error_message': {'type': 'string'},
'info_json': {'type': 'string'},
'timestamp': {'type': 'string'},
},
},
}
Expand Down
Loading