diff --git a/src/bentoml/__init__.py b/src/bentoml/__init__.py index 1cbc610e842..6d6566ee37e 100644 --- a/src/bentoml/__init__.py +++ b/src/bentoml/__init__.py @@ -99,6 +99,7 @@ from . import server # Server API from . import monitoring # Monitoring API from . import cloud # Cloud API + from . import deployment # deployment API # isort: on from _bentoml_impl.client import AsyncHTTPClient @@ -166,7 +167,7 @@ exceptions = _LazyLoader("bentoml.exceptions", globals(), "bentoml.exceptions") monitoring = _LazyLoader("bentoml.monitoring", globals(), "bentoml.monitoring") cloud = _LazyLoader("bentoml.cloud", globals(), "bentoml.cloud") - + deployment = _LazyLoader("bentoml.deployment", globals(), "bentoml.deployment") del _LazyLoader _NEW_SDK_ATTRS = [ @@ -258,6 +259,7 @@ def __getattr__(name: str) -> Any: # integrations "ray", "cloud", + "deployment", "triton", "monitor", "load_config", diff --git a/src/bentoml/_internal/cloud/__init__.py b/src/bentoml/_internal/cloud/__init__.py index 97e73790b1e..f7d45507503 100644 --- a/src/bentoml/_internal/cloud/__init__.py +++ b/src/bentoml/_internal/cloud/__init__.py @@ -1,4 +1,3 @@ from .base import CloudClient as CloudClient from .bentocloud import BentoCloudClient as BentoCloudClient -from .deployment import Resource as Resource from .yatai import YataiClient as YataiClient diff --git a/src/bentoml/_internal/cloud/base.py b/src/bentoml/_internal/cloud/base.py index d1173f71e31..c8cdd162a56 100644 --- a/src/bentoml/_internal/cloud/base.py +++ b/src/bentoml/_internal/cloud/base.py @@ -59,30 +59,33 @@ def write(self, data: bytes) -> t.Any: # type: ignore # python buffer types ar class CloudClient(ABC): - log_progress = Progress(TextColumn("{task.description}")) - - spinner_progress = Progress( - TextColumn(" "), - TimeElapsedColumn(), - TextColumn("[bold purple]{task.fields[action]}"), - SpinnerColumn("simpleDots"), - ) - - transmission_progress = Progress( - TextColumn("[bold blue]{task.description}", justify="right"), - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>3.1f}%", - "•", - DownloadColumn(), - "•", - TransferSpeedColumn(), - "•", - TimeRemainingColumn(), - ) - - progress_group = Group( - Panel(Group(log_progress, spinner_progress)), transmission_progress - ) + # Moved atrributes to __init__ because otherwise it will keep all the log when running SDK. + def __init__(self): + self.log_progress = Progress(TextColumn("{task.description}")) + + self.spinner_progress = Progress( + TextColumn(" "), + TimeElapsedColumn(), + TextColumn("[bold purple]{task.fields[action]}"), + SpinnerColumn("simpleDots"), + ) + + self.transmission_progress = Progress( + TextColumn("[bold blue]{task.description}", justify="right"), + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>3.1f}%", + "•", + DownloadColumn(), + "•", + TransferSpeedColumn(), + "•", + TimeRemainingColumn(), + ) + + self.progress_group = Group( + Panel(Group(self.log_progress, self.spinner_progress)), + self.transmission_progress, + ) @contextmanager def spin(self, *, text: str): diff --git a/src/bentoml/_internal/cloud/bentocloud.py b/src/bentoml/_internal/cloud/bentocloud.py index 0709843864d..861f9f4d1ed 100644 --- a/src/bentoml/_internal/cloud/bentocloud.py +++ b/src/bentoml/_internal/cloud/bentocloud.py @@ -10,7 +10,7 @@ from tempfile import NamedTemporaryFile import fs -import requests +import httpx from rich.live import Live from simple_di import Provide from simple_di import inject @@ -29,40 +29,36 @@ from .base import CallbackIOWrapper from .base import CloudClient from .config import get_rest_api_client -from .deployment import Deployment -from .schemas import BentoApiSchema -from .schemas import BentoManifestSchema -from .schemas import BentoRunnerResourceSchema -from .schemas import BentoRunnerSchema -from .schemas import BentoUploadStatus -from .schemas import CompleteMultipartUploadSchema -from .schemas import CompletePartSchema -from .schemas import CreateBentoRepositorySchema -from .schemas import CreateBentoSchema -from .schemas import CreateModelRepositorySchema -from .schemas import CreateModelSchema -from .schemas import FinishUploadBentoSchema -from .schemas import FinishUploadModelSchema -from .schemas import LabelItemSchema -from .schemas import ModelManifestSchema -from .schemas import ModelUploadStatus -from .schemas import PreSignMultipartUploadUrlSchema -from .schemas import TransmissionStrategy -from .schemas import UpdateBentoSchema +from .schemas.modelschemas import BentoApiSchema +from .schemas.modelschemas import BentoRunnerResourceSchema +from .schemas.modelschemas import BentoRunnerSchema +from .schemas.schemasv1 import BentoManifestSchema +from .schemas.schemasv1 import BentoUploadStatus +from .schemas.schemasv1 import CompleteMultipartUploadSchema +from .schemas.schemasv1 import CompletePartSchema +from .schemas.schemasv1 import CreateBentoRepositorySchema +from .schemas.schemasv1 import CreateBentoSchema +from .schemas.schemasv1 import CreateModelRepositorySchema +from .schemas.schemasv1 import CreateModelSchema +from .schemas.schemasv1 import FinishUploadBentoSchema +from .schemas.schemasv1 import FinishUploadModelSchema +from .schemas.schemasv1 import LabelItemSchema +from .schemas.schemasv1 import ModelManifestSchema +from .schemas.schemasv1 import ModelUploadStatus +from .schemas.schemasv1 import PreSignMultipartUploadUrlSchema +from .schemas.schemasv1 import TransmissionStrategy +from .schemas.schemasv1 import UpdateBentoSchema if t.TYPE_CHECKING: from concurrent.futures import Future from rich.progress import TaskID - from .schemas import BentoWithRepositoryListSchema - from .schemas import ModelWithRepositoryListSchema + from .schemas.schemasv1 import BentoWithRepositoryListSchema + from .schemas.schemasv1 import ModelWithRepositoryListSchema class BentoCloudClient(CloudClient): - def __init__(self): - self.deployment = Deployment() - def push_bento( self, bento: Bento, @@ -124,18 +120,18 @@ def push_model(model: Model) -> None: futures: t.Iterator[None] = executor.map(push_model, models) list(futures) with self.spin(text=f'Fetching Bento repository "{name}"'): - bento_repository = yatai_rest_client.get_bento_repository( + bento_repository = yatai_rest_client.v1.get_bento_repository( bento_repository_name=name ) if not bento_repository: with self.spin(text=f'Bento repository "{name}" not found, creating now..'): - bento_repository = yatai_rest_client.create_bento_repository( + bento_repository = yatai_rest_client.v1.create_bento_repository( req=CreateBentoRepositorySchema(name=name, description="") ) with self.spin( text=f'Try fetching Bento "{bento.tag}" from remote Bento store..' ): - remote_bento = yatai_rest_client.get_bento( + remote_bento = yatai_rest_client.v1.get_bento( bento_repository_name=name, version=version ) if ( @@ -144,7 +140,7 @@ def push_model(model: Model) -> None: and remote_bento.upload_status == BentoUploadStatus.SUCCESS ): self.log_progress.add_task( - f'[bold blue]Push failed: Bento "{bento.tag}" already exists in remote Bento store' + f'[bold blue]Push skipped: Bento "{bento.tag}" already exists in remote Bento store' ) return labels: list[LabelItemSchema] = [ @@ -182,7 +178,7 @@ def push_model(model: Model) -> None: with self.spin( text=f'Registering Bento "{bento.tag}" with remote Bento store..' ): - remote_bento = yatai_rest_client.create_bento( + remote_bento = yatai_rest_client.v1.create_bento( bento_repository_name=bento_repository.name, req=CreateBentoSchema( description="", @@ -194,7 +190,7 @@ def push_model(model: Model) -> None: ) else: with self.spin(text=f'Updating Bento "{bento.tag}"..'): - remote_bento = yatai_rest_client.update_bento( + remote_bento = yatai_rest_client.v1.update_bento( bento_repository_name=bento_repository.name, version=version, req=UpdateBentoSchema( @@ -212,7 +208,7 @@ def push_model(model: Model) -> None: with self.spin( text=f'Getting a presigned upload url for bento "{bento.tag}" ..' ): - remote_bento = yatai_rest_client.presign_bento_upload_url( + remote_bento = yatai_rest_client.v1.presign_bento_upload_url( bento_repository_name=bento_repository.name, version=version ) if remote_bento.presigned_upload_url: @@ -240,7 +236,7 @@ def filter_( tar_io.seek(0, 0) with self.spin(text=f'Start uploading bento "{bento.tag}"..'): - yatai_rest_client.start_upload_bento( + yatai_rest_client.v1.start_upload_bento( bento_repository_name=bento_repository.name, version=version ) @@ -255,7 +251,7 @@ def filter_( if transmission_strategy == "proxy": try: - yatai_rest_client.upload_bento( + yatai_rest_client.v1.upload_bento( bento_repository_name=bento_repository.name, version=version, data=tar_io, @@ -275,7 +271,7 @@ def filter_( ) try: if presigned_upload_url is not None: - resp = requests.put(presigned_upload_url, data=tar_io) + resp = httpx.put(presigned_upload_url, content=tar_io) if resp.status_code != 200: finish_req = FinishUploadBentoSchema( status=BentoUploadStatus.FAILED, @@ -285,9 +281,11 @@ def filter_( with self.spin( text=f'Start multipart uploading Bento "{bento.tag}"...' ): - remote_bento = yatai_rest_client.start_bento_multipart_upload( - bento_repository_name=bento_repository.name, - version=version, + remote_bento = ( + yatai_rest_client.v1.start_bento_multipart_upload( + bento_repository_name=bento_repository.name, + version=version, + ) ) if not remote_bento.upload_id: raise BentoMLException( @@ -305,7 +303,7 @@ def chunk_upload( text=f'({chunk_number}/{chunks_count}) Presign multipart upload url of Bento "{bento.tag}"...' ): remote_bento = ( - yatai_rest_client.presign_bento_multipart_upload_url( + yatai_rest_client.v1.presign_bento_multipart_upload_url( bento_repository_name=bento_repository.name, version=version, req=PreSignMultipartUploadUrlSchema( @@ -330,8 +328,8 @@ def chunk_upload( ) with CallbackIOWrapper(chunk, read_cb=io_cb) as chunk_io: - resp = requests.put( - remote_bento.presigned_upload_url, data=chunk_io + resp = httpx.put( + remote_bento.presigned_upload_url, content=chunk_io ) if resp.status_code != 200: return FinishUploadBentoSchema( @@ -375,7 +373,7 @@ def chunk_upload( text=f'Completing multipart upload of Bento "{bento.tag}"...' ): remote_bento = ( - yatai_rest_client.complete_bento_multipart_upload( + yatai_rest_client.v1.complete_bento_multipart_upload( bento_repository_name=bento_repository.name, version=version, req=CompleteMultipartUploadSchema( @@ -395,7 +393,7 @@ def chunk_upload( f'[bold red]Failed to upload Bento "{bento.tag}"' ) with self.spin(text="Submitting upload status to remote Bento store"): - yatai_rest_client.finish_upload_bento( + yatai_rest_client.v1.finish_upload_bento( bento_repository_name=bento_repository.name, version=version, req=finish_req, @@ -460,7 +458,7 @@ def _do_pull_bento( yatai_rest_client = get_rest_api_client(context) with self.spin(text=f'Fetching bento "{_tag}"'): - remote_bento = yatai_rest_client.get_bento( + remote_bento = yatai_rest_client.v1.get_bento( bento_repository_name=name, version=version ) if not remote_bento: @@ -498,7 +496,7 @@ def pull_model(model_tag: Tag): with self.spin( text=f'Getting a presigned download url for bento "{_tag}"' ): - remote_bento = yatai_rest_client.presign_bento_download_url( + remote_bento = yatai_rest_client.v1.presign_bento_download_url( name, version ) if remote_bento.presigned_download_url: @@ -506,7 +504,7 @@ def pull_model(model_tag: Tag): transmission_strategy = "presigned_url" if transmission_strategy == "proxy": - response = yatai_rest_client.download_bento( + response = yatai_rest_client.v1.download_bento( bento_repository_name=name, version=version, ) @@ -515,31 +513,32 @@ def pull_model(model_tag: Tag): with self.spin( text=f'Getting a presigned download url for bento "{_tag}"' ): - remote_bento = yatai_rest_client.presign_bento_download_url( + remote_bento = yatai_rest_client.v1.presign_bento_download_url( name, version ) presigned_download_url = remote_bento.presigned_download_url - response = requests.get(presigned_download_url, stream=True) - if response.status_code != 200: - raise BentoMLException( - f'Failed to download bento "{_tag}": {response.text}' - ) - total_size_in_bytes = int(response.headers.get("content-length", 0)) - block_size = 1024 # 1 Kibibyte with NamedTemporaryFile() as tar_file: - self.transmission_progress.update( - download_task_id, - completed=0, - total=total_size_in_bytes, - visible=True, - ) - self.transmission_progress.start_task(download_task_id) - for data in response.iter_content(block_size): + with httpx.stream("GET", presigned_download_url) as response: + if response.status_code != 200: + raise BentoMLException( + f'Failed to download bento "{_tag}": {response.text}' + ) + total_size_in_bytes = int(response.headers.get("content-length", 0)) + block_size = 1024 # 1 Kibibyte self.transmission_progress.update( - download_task_id, advance=len(data) + download_task_id, + completed=0, + total=total_size_in_bytes, + visible=True, ) - tar_file.write(data) + self.transmission_progress.start_task(download_task_id) + for data in response.iter_bytes(block_size): + self.transmission_progress.update( + download_task_id, advance=len(data) + ) + tar_file.write(data) + self.log_progress.add_task( f'[bold green]Finished downloading all bento "{_tag}" files' ) @@ -604,18 +603,18 @@ def _do_push_model( raise BentoMLException(f'Model "{model.tag}" version cannot be None') info = model.info with self.spin(text=f'Fetching model repository "{name}"'): - model_repository = yatai_rest_client.get_model_repository( + model_repository = yatai_rest_client.v1.get_model_repository( model_repository_name=name ) if not model_repository: with self.spin(text=f'Model repository "{name}" not found, creating now..'): - model_repository = yatai_rest_client.create_model_repository( + model_repository = yatai_rest_client.v1.create_model_repository( req=CreateModelRepositorySchema(name=name, description="") ) with self.spin( text=f'Try fetching model "{model.tag}" from remote model store..' ): - remote_model = yatai_rest_client.get_model( + remote_model = yatai_rest_client.v1.get_model( model_repository_name=name, version=version ) if ( @@ -635,7 +634,7 @@ def _do_push_model( with self.spin( text=f'Registering model "{model.tag}" with remote model store..' ): - remote_model = yatai_rest_client.create_model( + remote_model = yatai_rest_client.v1.create_model( model_repository_name=model_repository.name, req=CreateModelSchema( description="", @@ -663,7 +662,7 @@ def _do_push_model( with self.spin( text=f'Getting a presigned upload url for Model "{model.tag}" ..' ): - remote_model = yatai_rest_client.presign_model_upload_url( + remote_model = yatai_rest_client.v1.presign_model_upload_url( model_repository_name=model_repository.name, version=version ) if remote_model.presigned_upload_url: @@ -682,7 +681,7 @@ def io_cb(x: int): tar.add(model.path, arcname="./") tar_io.seek(0, 0) with self.spin(text=f'Start uploading model "{model.tag}"..'): - yatai_rest_client.start_upload_model( + yatai_rest_client.v1.start_upload_model( model_repository_name=model_repository.name, version=version ) file_size = tar_io.getbuffer().nbytes @@ -696,7 +695,7 @@ def io_cb(x: int): if transmission_strategy == "proxy": try: - yatai_rest_client.upload_model( + yatai_rest_client.v1.upload_model( model_repository_name=model_repository.name, version=version, data=tar_io, @@ -716,7 +715,7 @@ def io_cb(x: int): ) try: if presigned_upload_url is not None: - resp = requests.put(presigned_upload_url, data=tar_io) + resp = httpx.put(presigned_upload_url, content=tar_io) if resp.status_code != 200: finish_req = FinishUploadModelSchema( status=ModelUploadStatus.FAILED, @@ -726,9 +725,11 @@ def io_cb(x: int): with self.spin( text=f'Start multipart uploading Model "{model.tag}"...' ): - remote_model = yatai_rest_client.start_model_multipart_upload( - model_repository_name=model_repository.name, - version=version, + remote_model = ( + yatai_rest_client.v1.start_model_multipart_upload( + model_repository_name=model_repository.name, + version=version, + ) ) if not remote_model.upload_id: raise BentoMLException( @@ -746,7 +747,7 @@ def chunk_upload( text=f'({chunk_number}/{chunks_count}) Presign multipart upload url of model "{model.tag}"...' ): remote_model = ( - yatai_rest_client.presign_model_multipart_upload_url( + yatai_rest_client.v1.presign_model_multipart_upload_url( model_repository_name=model_repository.name, version=version, req=PreSignMultipartUploadUrlSchema( @@ -772,8 +773,8 @@ def chunk_upload( ) with CallbackIOWrapper(chunk, read_cb=io_cb) as chunk_io: - resp = requests.put( - remote_model.presigned_upload_url, data=chunk_io + resp = httpx.put( + remote_model.presigned_upload_url, content=chunk_io ) if resp.status_code != 200: return FinishUploadModelSchema( @@ -817,7 +818,7 @@ def chunk_upload( text=f'Completing multipart upload of model "{model.tag}"...' ): remote_model = ( - yatai_rest_client.complete_model_multipart_upload( + yatai_rest_client.v1.complete_model_multipart_upload( model_repository_name=model_repository.name, version=version, req=CompleteMultipartUploadSchema( @@ -837,11 +838,12 @@ def chunk_upload( f'[bold red]Failed to upload model "{model.tag}"' ) with self.spin(text="Submitting upload status to remote model store"): - yatai_rest_client.finish_upload_model( + yatai_rest_client.v1.finish_upload_model( model_repository_name=model_repository.name, version=version, req=finish_req, ) + if finish_req.status != ModelUploadStatus.SUCCESS: self.log_progress.add_task( f'[bold red]Failed pushing model "{model.tag}" : {finish_req.reason}' @@ -903,7 +905,7 @@ def _do_pull_model( name = _tag.name version = _tag.version if version in (None, "latest"): - latest_model = yatai_rest_client.get_latest_model(name, query=query) + latest_model = yatai_rest_client.v1.get_latest_model(name, query=query) if latest_model is None: raise BentoMLException( f'Model "{_tag}" not found on remote model store, you may need to specify a version' @@ -929,7 +931,9 @@ def _do_pull_model( ) with self.spin(text=f'Getting a presigned download url for model "{_tag}"..'): - remote_model = yatai_rest_client.presign_model_download_url(name, version) + remote_model = yatai_rest_client.v1.presign_model_download_url( + name, version + ) if not remote_model: raise BentoMLException(f'Model "{_tag}" not found on remote model store') @@ -942,7 +946,7 @@ def _do_pull_model( transmission_strategy = remote_model.transmission_strategy else: with self.spin(text=f'Getting a presigned download url for model "{_tag}"'): - remote_model = yatai_rest_client.presign_model_download_url( + remote_model = yatai_rest_client.v1.presign_model_download_url( name, version ) if remote_model.presigned_download_url: @@ -950,7 +954,7 @@ def _do_pull_model( transmission_strategy = "presigned_url" if transmission_strategy == "proxy": - response = yatai_rest_client.download_model( + response = yatai_rest_client.v1.download_model( model_repository_name=name, version=version ) else: @@ -958,30 +962,33 @@ def _do_pull_model( with self.spin( text=f'Getting a presigned download url for model "{_tag}"' ): - remote_model = yatai_rest_client.presign_model_download_url( + remote_model = yatai_rest_client.v1.presign_model_download_url( name, version ) presigned_download_url = remote_model.presigned_download_url - response = requests.get(presigned_download_url, stream=True) - if response.status_code != 200: - raise BentoMLException( - f'Failed to download model "{_tag}": {response.text}' + with NamedTemporaryFile() as tar_file: + with httpx.stream("GET", presigned_download_url) as response: + if response.status_code != 200: + raise BentoMLException( + f'Failed to download model "{_tag}": {response.text}' + ) + + total_size_in_bytes = int(response.headers.get("content-length", 0)) + block_size = 1024 # 1 Kibibyte + self.transmission_progress.update( + download_task_id, + description=f'Downloading model "{_tag}"', + total=total_size_in_bytes, + visible=True, ) + self.transmission_progress.start_task(download_task_id) + for data in response.iter_bytes(block_size): + self.transmission_progress.update( + download_task_id, advance=len(data) + ) + tar_file.write(data) - total_size_in_bytes = int(response.headers.get("content-length", 0)) - block_size = 1024 # 1 Kibibyte - with NamedTemporaryFile() as tar_file: - self.transmission_progress.update( - download_task_id, - description=f'Downloading model "{_tag}"', - total=total_size_in_bytes, - visible=True, - ) - self.transmission_progress.start_task(download_task_id) - for data in response.iter_content(block_size): - self.transmission_progress.update(download_task_id, advance=len(data)) - tar_file.write(data) self.log_progress.add_task( f'[bold green]Finished downloading model "{_tag}" files' ) @@ -1005,7 +1012,7 @@ def _do_pull_model( def list_bentos(self, context: str | None = None) -> BentoWithRepositoryListSchema: yatai_rest_client = get_rest_api_client(context) - res = yatai_rest_client.get_bentos_list() + res = yatai_rest_client.v1.get_bentos_list() if res is None: raise BentoMLException("List bentos request failed") @@ -1017,7 +1024,7 @@ def list_bentos(self, context: str | None = None) -> BentoWithRepositoryListSche def list_models(self, context: str | None = None) -> ModelWithRepositoryListSchema: yatai_rest_client = get_rest_api_client(context) - res = yatai_rest_client.get_models_list() + res = yatai_rest_client.v1.get_models_list() if res is None: raise BentoMLException("List models request failed") diff --git a/src/bentoml/_internal/cloud/client.py b/src/bentoml/_internal/cloud/client.py index 60e1fb49d96..16e62eb6813 100644 --- a/src/bentoml/_internal/cloud/client.py +++ b/src/bentoml/_internal/cloud/client.py @@ -4,53 +4,50 @@ import typing as t from urllib.parse import urljoin -import requests +import httpx from ...exceptions import CloudRESTApiClientError from ..configuration import BENTOML_VERSION -from .schemas import BentoRepositorySchema -from .schemas import BentoSchema -from .schemas import BentoWithRepositoryListSchema -from .schemas import ClusterFullSchema -from .schemas import ClusterListSchema -from .schemas import CompleteMultipartUploadSchema -from .schemas import CreateBentoRepositorySchema -from .schemas import CreateBentoSchema -from .schemas import CreateDeploymentSchema -from .schemas import CreateModelRepositorySchema -from .schemas import CreateModelSchema -from .schemas import DeploymentListSchema -from .schemas import DeploymentSchema -from .schemas import FinishUploadBentoSchema -from .schemas import FinishUploadModelSchema -from .schemas import ModelRepositorySchema -from .schemas import ModelSchema -from .schemas import ModelWithRepositoryListSchema -from .schemas import OrganizationSchema -from .schemas import PreSignMultipartUploadUrlSchema -from .schemas import UpdateBentoSchema -from .schemas import UpdateDeploymentSchema -from .schemas import UserSchema -from .schemas import schema_from_json -from .schemas import schema_from_object -from .schemas import schema_to_json +from .schemas.schemasv1 import BentoRepositorySchema +from .schemas.schemasv1 import BentoSchema +from .schemas.schemasv1 import BentoWithRepositoryListSchema +from .schemas.schemasv1 import ClusterFullSchema +from .schemas.schemasv1 import ClusterListSchema +from .schemas.schemasv1 import CompleteMultipartUploadSchema +from .schemas.schemasv1 import CreateBentoRepositorySchema +from .schemas.schemasv1 import CreateBentoSchema +from .schemas.schemasv1 import CreateDeploymentSchema as CreateDeploymentSchemaV1 +from .schemas.schemasv1 import CreateModelRepositorySchema +from .schemas.schemasv1 import CreateModelSchema +from .schemas.schemasv1 import DeploymentFullSchema +from .schemas.schemasv1 import DeploymentListSchema +from .schemas.schemasv1 import FinishUploadBentoSchema +from .schemas.schemasv1 import FinishUploadModelSchema +from .schemas.schemasv1 import ModelRepositorySchema +from .schemas.schemasv1 import ModelSchema +from .schemas.schemasv1 import ModelWithRepositoryListSchema +from .schemas.schemasv1 import OrganizationSchema +from .schemas.schemasv1 import PreSignMultipartUploadUrlSchema +from .schemas.schemasv1 import UpdateBentoSchema +from .schemas.schemasv1 import UpdateDeploymentSchema +from .schemas.schemasv1 import UserSchema +from .schemas.schemasv2 import CreateDeploymentSchema as CreateDeploymentSchemaV2 +from .schemas.schemasv2 import DeploymentFullSchema as DeploymentFullSchemaV2 +from .schemas.schemasv2 import DeploymentListSchema as DeploymentListSchemaV2 +from .schemas.schemasv2 import UpdateDeploymentSchema as UpdateDeploymentSchemaV2 +from .schemas.utils import schema_from_json +from .schemas.utils import schema_from_object +from .schemas.utils import schema_to_json logger = logging.getLogger(__name__) -class RestApiClient: - def __init__(self, endpoint: str, api_token: str) -> None: +class BaseRestApiClient: + def __init__(self, endpoint: str, session: httpx.Client) -> None: self.endpoint = endpoint - self.session = requests.Session() - self.session.headers.update( - { - "X-YATAI-API-TOKEN": api_token, - "Content-Type": "application/json", - "X-Bentoml-Version": BENTOML_VERSION, - } - ) + self.session = session - def _is_not_found(self, resp: requests.Response) -> bool: + def _is_not_found(self, resp: httpx.Response) -> bool: # We used to return 400 for record not found, handle both cases return ( resp.status_code == 404 @@ -58,12 +55,14 @@ def _is_not_found(self, resp: requests.Response) -> bool: and "record not found" in resp.text ) - def _check_resp(self, resp: requests.Response) -> None: + def _check_resp(self, resp: httpx.Response) -> None: if resp.status_code != 200: raise CloudRESTApiClientError( f"request failed with status code {resp.status_code}: {resp.text}" ) + +class RestApiClientV1(BaseRestApiClient): def get_current_user(self) -> UserSchema | None: url = urljoin(self.endpoint, "/api/v1/auth/current") resp = self.session.get(url) @@ -96,7 +95,7 @@ def create_bento_repository( self, req: CreateBentoRepositorySchema ) -> BentoRepositorySchema: url = urljoin(self.endpoint, "/api/v1/bento_repositories") - resp = self.session.post(url, data=schema_to_json(req)) + resp = self.session.post(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, BentoRepositorySchema) @@ -117,7 +116,7 @@ def create_bento( url = urljoin( self.endpoint, f"/api/v1/bento_repositories/{bento_repository_name}/bentos" ) - resp = self.session.post(url, data=schema_to_json(req)) + resp = self.session.post(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, BentoSchema) @@ -128,7 +127,7 @@ def update_bento( self.endpoint, f"/api/v1/bento_repositories/{bento_repository_name}/bentos/{version}", ) - resp = self.session.patch(url, data=schema_to_json(req)) + resp = self.session.patch(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, BentoSchema) @@ -175,7 +174,7 @@ def presign_bento_multipart_upload_url( self.endpoint, f"/api/v1/bento_repositories/{bento_repository_name}/bentos/{version}/presign_multipart_upload_url", ) - resp = self.session.patch(url, data=schema_to_json(req)) + resp = self.session.patch(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, BentoSchema) @@ -189,7 +188,7 @@ def complete_bento_multipart_upload( self.endpoint, f"/api/v1/bento_repositories/{bento_repository_name}/bentos/{version}/complete_multipart_upload", ) - resp = self.session.patch(url, data=schema_to_json(req)) + resp = self.session.patch(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, BentoSchema) @@ -211,7 +210,7 @@ def finish_upload_bento( self.endpoint, f"/api/v1/bento_repositories/{bento_repository_name}/bentos/{version}/finish_upload", ) - resp = self.session.patch(url, data=schema_to_json(req)) + resp = self.session.patch(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, BentoSchema) @@ -224,7 +223,7 @@ def upload_bento( ) resp = self.session.put( url, - data=data, + content=data, headers=dict( self.session.headers, **{"Content-Type": "application/octet-stream"} ), @@ -234,14 +233,14 @@ def upload_bento( def download_bento( self, bento_repository_name: str, version: str - ) -> requests.Response: + ) -> httpx.Response: url = urljoin( self.endpoint, f"/api/v1/bento_repositories/{bento_repository_name}/bentos/{version}/download", ) - resp = self.session.get(url, stream=True) - self._check_resp(resp) - return resp + with self.session.stream("GET", url) as resp: + self._check_resp(resp) + return resp def get_model_repository( self, model_repository_name: str @@ -259,7 +258,7 @@ def create_model_repository( self, req: CreateModelRepositorySchema ) -> ModelRepositorySchema: url = urljoin(self.endpoint, "/api/v1/model_repositories") - resp = self.session.post(url, data=schema_to_json(req)) + resp = self.session.post(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, ModelRepositorySchema) @@ -280,7 +279,7 @@ def create_model( url = urljoin( self.endpoint, f"/api/v1/model_repositories/{model_repository_name}/models" ) - resp = self.session.post(url, data=schema_to_json(req)) + resp = self.session.post(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, ModelSchema) @@ -327,7 +326,7 @@ def presign_model_multipart_upload_url( self.endpoint, f"/api/v1/model_repositories/{model_repository_name}/models/{version}/presign_multipart_upload_url", ) - resp = self.session.patch(url, data=schema_to_json(req)) + resp = self.session.patch(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, ModelSchema) @@ -341,7 +340,7 @@ def complete_model_multipart_upload( self.endpoint, f"/api/v1/model_repositories/{model_repository_name}/models/{version}/complete_multipart_upload", ) - resp = self.session.patch(url, data=schema_to_json(req)) + resp = self.session.patch(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, ModelSchema) @@ -363,7 +362,7 @@ def finish_upload_model( self.endpoint, f"/api/v1/model_repositories/{model_repository_name}/models/{version}/finish_upload", ) - resp = self.session.patch(url, data=schema_to_json(req)) + resp = self.session.patch(url, content=schema_to_json(req)) self._check_resp(resp) return schema_from_json(resp.text, ModelSchema) @@ -376,7 +375,7 @@ def upload_model( ) resp = self.session.put( url, - data=data, + content=data, headers=dict( self.session.headers, **{"Content-Type": "application/octet-stream"} ), @@ -386,14 +385,14 @@ def upload_model( def download_model( self, model_repository_name: str, version: str - ) -> requests.Response: + ) -> httpx.Response: url = urljoin( self.endpoint, f"/api/v1/model_repositories/{model_repository_name}/models/{version}/download", ) - resp = self.session.get(url, stream=True) - self._check_resp(resp) - return resp + with self.session.stream("GET", url) as resp: + self._check_resp(resp) + return resp def get_bento_repositories_list( self, bento_repository_name: str @@ -421,7 +420,7 @@ def get_models_list(self) -> ModelWithRepositoryListSchema | None: self._check_resp(resp) return schema_from_json(resp.text, ModelWithRepositoryListSchema) - def get_deployment_list( + def get_cluster_deployment_list( self, cluster_name: str, **params: str | int | None ) -> DeploymentListSchema | None: url = urljoin(self.endpoint, f"/api/v1/clusters/{cluster_name}/deployments") @@ -431,17 +430,27 @@ def get_deployment_list( self._check_resp(resp) return schema_from_json(resp.text, DeploymentListSchema) + def get_organization_deployment_list( + self, **params: str | int | None + ) -> DeploymentListSchema | None: + url = urljoin(self.endpoint, "/api/v1/deployments") + resp = self.session.get(url, params=params) + if self._is_not_found(resp): + return None + self._check_resp(resp) + return schema_from_json(resp.text, DeploymentListSchema) + def create_deployment( - self, cluster_name: str, create_schema: CreateDeploymentSchema - ) -> DeploymentSchema | None: + self, cluster_name: str, create_schema: CreateDeploymentSchemaV1 + ) -> DeploymentFullSchema | None: url = urljoin(self.endpoint, f"/api/v1/clusters/{cluster_name}/deployments") - resp = self.session.post(url, data=schema_to_json(create_schema)) + resp = self.session.post(url, content=schema_to_json(create_schema)) self._check_resp(resp) - return schema_from_json(resp.text, DeploymentSchema) + return schema_from_json(resp.text, DeploymentFullSchema) def get_deployment( self, cluster_name: str, kube_namespace: str, deployment_name: str - ) -> DeploymentSchema | None: + ) -> DeploymentFullSchema | None: url = urljoin( self.endpoint, f"/api/v1/clusters/{cluster_name}/namespaces/{kube_namespace}/deployments/{deployment_name}", @@ -450,7 +459,7 @@ def get_deployment( if self._is_not_found(resp): return None self._check_resp(resp) - return schema_from_json(resp.text, DeploymentSchema) + return schema_from_json(resp.text, DeploymentFullSchema) def update_deployment( self, @@ -458,20 +467,20 @@ def update_deployment( kube_namespace: str, deployment_name: str, update_schema: UpdateDeploymentSchema, - ) -> DeploymentSchema | None: + ) -> DeploymentFullSchema | None: url = urljoin( self.endpoint, f"/api/v1/clusters/{cluster_name}/namespaces/{kube_namespace}/deployments/{deployment_name}", ) - resp = self.session.patch(url, data=schema_to_json(update_schema)) + resp = self.session.patch(url, content=schema_to_json(update_schema)) if self._is_not_found(resp): return None self._check_resp(resp) - return schema_from_json(resp.text, DeploymentSchema) + return schema_from_json(resp.text, DeploymentFullSchema) def terminate_deployment( self, cluster_name: str, kube_namespace: str, deployment_name: str - ) -> DeploymentSchema | None: + ) -> DeploymentFullSchema | None: url = urljoin( self.endpoint, f"/api/v1/clusters/{cluster_name}/namespaces/{kube_namespace}/deployments/{deployment_name}/terminate", @@ -480,11 +489,11 @@ def terminate_deployment( if self._is_not_found(resp): return None self._check_resp(resp) - return schema_from_json(resp.text, DeploymentSchema) + return schema_from_json(resp.text, DeploymentFullSchema) def delete_deployment( self, cluster_name: str, kube_namespace: str, deployment_name: str - ) -> DeploymentSchema | None: + ) -> DeploymentFullSchema | None: url = urljoin( self.endpoint, f"/api/v1/clusters/{cluster_name}/namespaces/{kube_namespace}/deployments/{deployment_name}", @@ -493,7 +502,7 @@ def delete_deployment( if self._is_not_found(resp): return None self._check_resp(resp) - return schema_from_json(resp.text, DeploymentSchema) + return schema_from_json(resp.text, DeploymentFullSchema) def get_cluster_list( self, params: dict[str, str | int] | None = None @@ -527,3 +536,112 @@ def get_latest_model( self._check_resp(resp) models = resp.json()["items"] return schema_from_object(models[0], ModelSchema) if models else None + + +class RestApiClientV2(BaseRestApiClient): + def create_deployment( + self, create_schema: CreateDeploymentSchemaV2, cluster_name: str + ) -> DeploymentFullSchemaV2: + url = urljoin(self.endpoint, "/api/v2/deployments") + resp = self.session.post( + url, content=schema_to_json(create_schema), params={"cluster": cluster_name} + ) + self._check_resp(resp) + return schema_from_json(resp.text, DeploymentFullSchemaV2) + + def update_deployment( + self, + update_schema: UpdateDeploymentSchemaV2, + cluster_name: str, + deployment_name: str, + ) -> DeploymentFullSchemaV2 | None: + url = urljoin( + self.endpoint, + f"/api/v2/deployments/{deployment_name}", + ) + data = schema_to_json(update_schema) + resp = self.session.put(url, content=data, params={"cluster": cluster_name}) + if self._is_not_found(resp): + return None + self._check_resp(resp) + return schema_from_json(resp.text, DeploymentFullSchemaV2) + + def get_deployment( + self, cluster_name: str, deployment_name: str + ) -> DeploymentFullSchemaV2 | None: + url = urljoin( + self.endpoint, + f"/api/v2/deployments/{deployment_name}", + ) + resp = self.session.get(url, params={"cluster": cluster_name}) + if self._is_not_found(resp): + return None + self._check_resp(resp) + return schema_from_json(resp.text, DeploymentFullSchemaV2) + + def list_deployment( + self, + cluster_name: str | None = None, + all: bool | None = None, + # if both of the above is none, list default cluster's deployments + count: int | None = None, + q: str | None = None, + search: str | None = None, + start: int | None = None, + ) -> DeploymentListSchemaV2 | None: + url = urljoin(self.endpoint, "/api/v2/deployments") + resp = self.session.get( + url, + params={ + "cluster": cluster_name, + "all": all, + "count": count, + "q": q, + "search": search, + "start": start, + }, + ) + if self._is_not_found(resp): + return None + self._check_resp(resp) + return schema_from_json(resp.text, DeploymentListSchemaV2) + + def terminate_deployment( + self, cluster_name: str, deployment_name: str + ) -> DeploymentFullSchemaV2 | None: + url = urljoin( + self.endpoint, + f"/api/v2/deployments/{deployment_name}/terminate", + ) + resp = self.session.post(url, params={"cluster": cluster_name}) + if self._is_not_found(resp): + return None + self._check_resp(resp) + return schema_from_json(resp.text, DeploymentFullSchemaV2) + + def delete_deployment( + self, cluster_name: str, deployment_name: str + ) -> DeploymentFullSchemaV2 | None: + url = urljoin( + self.endpoint, + f"/api/v2/deployments/{deployment_name}", + ) + resp = self.session.delete(url, params={"cluster": cluster_name}) + if self._is_not_found(resp): + return None + self._check_resp(resp) + return schema_from_json(resp.text, DeploymentFullSchemaV2) + + +class RestApiClient: + def __init__(self, endpoint: str, api_token: str) -> None: + self.session = httpx.Client() + self.session.headers.update( + { + "X-YATAI-API-TOKEN": api_token, + "Content-Type": "application/json", + "X-Bentoml-Version": BENTOML_VERSION, + } + ) + self.v2 = RestApiClientV2(endpoint, self.session) + self.v1 = RestApiClientV1(endpoint, self.session) diff --git a/src/bentoml/_internal/cloud/config.py b/src/bentoml/_internal/cloud/config.py index 633aed22581..d2c22d67736 100644 --- a/src/bentoml/_internal/cloud/config.py +++ b/src/bentoml/_internal/cloud/config.py @@ -34,7 +34,7 @@ def get_rest_api_client(self) -> RestApiClient: def get_email(self) -> str: if not self.email: cli = self.get_rest_api_client() - user = cli.get_current_user() + user = cli.v1.get_current_user() if user is None: raise CloudRESTApiClientError( "Unable to get current user from yatai server" diff --git a/src/bentoml/_internal/cloud/deployment.py b/src/bentoml/_internal/cloud/deployment.py index ba043f80843..1f4174ddb44 100644 --- a/src/bentoml/_internal/cloud/deployment.py +++ b/src/bentoml/_internal/cloud/deployment.py @@ -1,29 +1,35 @@ from __future__ import annotations -import json import logging +import time import typing as t import attr +import yaml from deepmerge.merger import Merger +from simple_di import Provide +from simple_di import inject + +if t.TYPE_CHECKING: + from _bentoml_impl.client import AsyncHTTPClient + from _bentoml_impl.client import SyncHTTPClient + from bentoml._internal.bento.bento import BentoStore + from bentoml._internal.cloud.bentocloud import BentoCloudClient + from ...exceptions import BentoMLException +from ...exceptions import NotFound +from ..configuration.containers import BentoMLContainer from ..tag import Tag from ..utils import bentoml_cattr -from ..utils import first_not_none from ..utils import resolve_user_filepath from .config import get_rest_api_client -from .schemas import CreateDeploymentSchema -from .schemas import DeploymentListSchema -from .schemas import DeploymentMode -from .schemas import DeploymentSchema -from .schemas import DeploymentTargetCanaryRule -from .schemas import DeploymentTargetConfig -from .schemas import DeploymentTargetHPAConf -from .schemas import DeploymentTargetRunnerConfig -from .schemas import DeploymentTargetType -from .schemas import FullDeploymentSchema -from .schemas import UpdateDeploymentSchema +from .schemas.modelschemas import AccessControl +from .schemas.modelschemas import DeploymentStatus +from .schemas.modelschemas import DeploymentTargetHPAConf +from .schemas.schemasv2 import CreateDeploymentSchema as CreateDeploymentSchemaV2 +from .schemas.schemasv2 import DeploymentSchema +from .schemas.schemasv2 import UpdateDeploymentSchema as UpdateDeploymentSchemaV2 logger = logging.getLogger(__name__) @@ -37,28 +43,138 @@ ) -@attr.define -class Resource: - @classmethod - def for_hpa_conf(cls, **kwargs: t.Any) -> DeploymentTargetHPAConf: - return bentoml_cattr.structure(kwargs, DeploymentTargetHPAConf) - - @classmethod - def for_runner(cls, **kwargs: t.Any) -> DeploymentTargetRunnerConfig: - exclusive_api_server_key = { - v for v in kwargs if v not in attr.fields_dict(DeploymentTargetRunnerConfig) - } - return bentoml_cattr.structure( - {k: v for k, v in kwargs.items() if k not in exclusive_api_server_key}, - DeploymentTargetRunnerConfig, +@inject +def get_real_bento_tag( + project_path: str | None = None, + bento: str | Tag | None = None, + context: str | None = None, + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + _cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client], +) -> Tag: + if project_path: + from bentoml.bentos import build_bentofile + + bento_obj = build_bentofile(build_ctx=project_path, _bento_store=_bento_store) + _cloud_client.push_bento(bento=bento_obj, context=context) + return bento_obj.tag + elif bento: + bento = Tag.from_taglike(bento) + try: + bento_obj = _bento_store.get(bento) + except NotFound as e: + # "bento repo needs to exist if it is latest" + if bento.version is None or bento.version == "latest": + raise e + bento_obj = None + + # try to push if bento exists, otherwise expects bentocloud to have it + if bento_obj: + _cloud_client.push_bento(bento=bento_obj, context=context) + bento = bento_obj.tag + return bento + else: + raise BentoMLException( + "Create a deployment needs a target; project path or bento is necessary" ) - @classmethod - def for_api_server(cls, **kwargs: t.Any) -> DeploymentTargetConfig: - return bentoml_cattr.structure(kwargs, DeploymentTargetConfig) + +@attr.define +class DeploymentInfo: + __omit_if_default__ = True + name: str + created_at: str + bento: Tag + status: DeploymentStatus + admin_console: str + endpoint: t.Optional[str] + config: dict[str, t.Any] + + def to_dict(self) -> t.Dict[str, t.Any]: + return bentoml_cattr.unstructure(self) +@attr.define class Deployment: + context: t.Optional[str] + cluster_name: str + name: str + _schema: DeploymentSchema = attr.field(alias="_schema", repr=False) + _urls: t.Optional[list[str]] = attr.field(alias="_urls", default=None) + + @staticmethod + def _fix_scaling( + scaling: DeploymentTargetHPAConf | None, + ) -> DeploymentTargetHPAConf: + if scaling is None: + return DeploymentTargetHPAConf(1, 1) + if scaling.min_replicas is None: + scaling.min_replicas = 1 + if scaling.max_replicas is None: + scaling.max_replicas = max(scaling.min_replicas, 1) + # one edge case: + if scaling.min_replicas > scaling.max_replicas: + scaling.min_replicas = scaling.max_replicas + logger.warning( + "min scaling value is greater than max scaling value, setting min scaling to max scaling value" + ) + if scaling.min_replicas < 0: + raise BentoMLException( + "min scaling values must be greater than or equal to 0" + ) + if scaling.max_replicas <= 0: + raise BentoMLException("max scaling values must be greater than 0") + return scaling + + @staticmethod + def _validate_input_on_distributed( + config_struct: UpdateDeploymentSchemaV2, distributed: bool + ) -> None: + if distributed: + if config_struct.instance_type is not None: + raise BentoMLException( + "The 'instance_type' field is not allowed for distributed deployments. Please specify it per service in the services field." + ) + if ( + config_struct.scaling is not None + and config_struct.scaling != DeploymentTargetHPAConf() + ): + raise BentoMLException( + "The 'scaling' field is not allowed for distributed deployments. Please specify it per service in the services field." + ) + if config_struct.deployment_strategy is not None: + raise BentoMLException( + "The 'deployment_strategy' field is not allowed for distributed deployments. Please specify it per service in the services field." + ) + if config_struct.extras is not None: + raise BentoMLException( + "The 'extras' field is not allowed for distributed deployments. Please specify it per service in the services field." + ) + if config_struct.cold_start_timeout is not None: + raise BentoMLException( + "The 'cold_start_timeout' field is not allowed for distributed deployments. Please specify it per service in the services field." + ) + elif not distributed: + if config_struct.services != {}: + raise BentoMLException( + "The 'services' field is only allowed for distributed deployments." + ) + + @classmethod + def _fix_and_validate_schema( + cls, config_struct: UpdateDeploymentSchemaV2, distributed: bool + ): + cls._validate_input_on_distributed(config_struct, distributed) + # fix scaling + if distributed: + if len(config_struct.services) == 0: + raise BentoMLException("The configuration for services is mandatory") + for _, svc in config_struct.services.items(): + svc.scaling = cls._fix_scaling(svc.scaling) + else: + config_struct.scaling = cls._fix_scaling(config_struct.scaling) + if config_struct.access_type is None: + config_struct.access_type = AccessControl.PUBLIC + @classmethod def _get_default_kube_namespace( cls, @@ -66,7 +182,7 @@ def _get_default_kube_namespace( context: str | None = None, ) -> str: cloud_rest_client = get_rest_api_client(context) - res = cloud_rest_client.get_cluster(cluster_name) + res = cloud_rest_client.v1.get_cluster(cluster_name) if not res: raise BentoMLException("Cannot get default kube namespace") return res.config.default_deployment_kube_namespace @@ -74,330 +190,140 @@ def _get_default_kube_namespace( @classmethod def _get_default_cluster(cls, context: str | None = None) -> str: cloud_rest_client = get_rest_api_client(context) - res = cloud_rest_client.get_cluster_list(params={"count": 1}) + res = cloud_rest_client.v1.get_cluster_list(params={"count": 1}) if not res: raise BentoMLException("Failed to get list of clusters.") if not res.items: raise BentoMLException("Cannot get default clusters.") return res.items[0].name - @classmethod - def _create_deployment( - cls, - create_deployment_schema: CreateDeploymentSchema, - context: str | None = None, - cluster_name: str | None = None, - ) -> DeploymentSchema: - cloud_rest_client = get_rest_api_client(context) - if cluster_name is None: - cluster_name = cls._get_default_cluster(context) - if create_deployment_schema.kube_namespace is None: - create_deployment_schema.kube_namespace = cls._get_default_kube_namespace( - cluster_name, context - ) - for target in create_deployment_schema.targets: - if ( - cloud_rest_client.get_bento(target.bento_repository, target.bento) - is None - ): - raise BentoMLException( - f"Create deployment: {target.bento_repository}:{target.bento} does not exist" - ) - if ( - cloud_rest_client.get_deployment( - cluster_name, - create_deployment_schema.kube_namespace, - create_deployment_schema.name, - ) - is not None - ): - raise BentoMLException("Create deployment: Deployment already exists") - res = cloud_rest_client.create_deployment( - cluster_name, create_deployment_schema - ) + def _refetch(self) -> None: + cloud_rest_client = get_rest_api_client(self.context) + res = cloud_rest_client.v2.get_deployment(self.cluster_name, self.name) if res is None: - raise BentoMLException("Create deployment request failed") - logger.debug("Deployment Schema: %s", create_deployment_schema) - return res + raise NotFound(f"deployment {self.name} is not found") + self._schema = res + self._urls = res.urls - @classmethod - def _update_deployment( - cls, - deployment_name: str, - update_deployment_schema: UpdateDeploymentSchema, - kube_namespace: str | None = None, - context: str | None = None, - cluster_name: str | None = None, - ) -> DeploymentSchema: - cloud_rest_client = get_rest_api_client(context) - if cluster_name is None: - cluster_name = cls._get_default_cluster(context) - if kube_namespace is None: - kube_namespace = cls._get_default_kube_namespace(cluster_name, context) - for target in update_deployment_schema.targets: - if ( - cloud_rest_client.get_bento(target.bento_repository, target.bento) - is None - ): - raise BentoMLException( - f"Update deployment: {target.bento_repository}:{target.bento} does not exist" - ) - cloud_rest_client.get_deployment( - cluster_name, - kube_namespace, - deployment_name, + def _conver_schema_to_update_schema(self) -> dict[str, t.Any]: + if self._schema.latest_revision is None: + raise BentoMLException( + f"Deployment {self._schema.name} has no latest revision" ) - - res = cloud_rest_client.update_deployment( - cluster_name, kube_namespace, deployment_name, update_deployment_schema + target_schema = self._schema.latest_revision.targets[0] + if target_schema is None: + raise BentoMLException(f"Deployment {self._schema.name} has no target") + if target_schema.config is None: + raise BentoMLException(f"Deployment {self._schema.name} has no config") + if target_schema.bento is None: + raise BentoMLException(f"Deployment {self._schema.name} has no bento") + update_schema = UpdateDeploymentSchemaV2( + services=target_schema.config.services, + instance_type=target_schema.config.instance_type, + deployment_strategy=target_schema.config.deployment_strategy, + scaling=target_schema.config.scaling, + envs=target_schema.config.envs, + extras=target_schema.config.extras, + access_type=target_schema.config.access_type, + bentoml_config_overrides=target_schema.config.bentoml_config_overrides, + bento=target_schema.bento.repository.name + ":" + target_schema.bento.name, + cold_start_timeout=target_schema.config.cold_start_timeout, ) - if res is None: - raise BentoMLException("Update deployment request failed") - logger.debug("%s is created.", deployment_name) - logger.debug("Deployment Schema: %s", update_deployment_schema) - return res - - @classmethod - def update( - cls, - deployment_name: str, - bento: Tag | str | None = None, - description: str | None = None, - expose_endpoint: bool | None = None, - cluster_name: str | None = None, - kube_namespace: str | None = None, - resource_instance: str | None = None, - hpa_conf: DeploymentTargetHPAConf | None = None, - runners_config: dict[str, DeploymentTargetRunnerConfig] | None = None, - api_server_config: DeploymentTargetConfig | None = None, - mode: DeploymentMode | None = None, - type: DeploymentTargetType | None = None, - context: str | None = None, - labels: dict[str, str] | None = None, - canary_rules: t.List[DeploymentTargetCanaryRule] | None = None, - latest_bento: bool = False, - ) -> DeploymentSchema: - from bentoml import get as get_bento - - if mode is None: - mode = DeploymentMode.Function - if type is None: - type = DeploymentTargetType.STABLE - - if cluster_name is None: - cluster_name = cls._get_default_cluster(context) - if kube_namespace is None: - kube_namespace = cls._get_default_kube_namespace(cluster_name, context) + return bentoml_cattr.unstructure(update_schema) - base_schema = cls.get(deployment_name, context, cluster_name, kube_namespace) - # Deployment target always has length of 1 - if base_schema.latest_revision is None: + def _conver_schema_to_bento(self) -> Tag: + if self._schema.latest_revision is None: raise BentoMLException( - f"Deployment {deployment_name} has no latest revision" - ) - deployment_target = base_schema.latest_revision.targets[0] - - if bento is None: - # NOTE: bento.repository.name is the bento.name, and bento.name is the bento.version - # from bentocloud to bentoml.Tag concept - bento = deployment_target.bento.repository.name - bento = Tag.from_taglike(bento) - if latest_bento and bento.version is None or bento.version == "latest": - bento = get_bento(bento).tag - elif bento.version is None: - bento.version = deployment_target.bento.name - - updated_config = bentoml_cattr.unstructure(deployment_target.config) - if hpa_conf is not None: - hpa_conf_dct = bentoml_cattr.unstructure(hpa_conf) - if "hpa_conf" in updated_config: - if updated_config["hpa_conf"] is None: - updated_config["hpa_conf"] = {} - config_merger.merge(updated_config["hpa_conf"], hpa_conf_dct) - if "runners" in updated_config and updated_config["runners"] is not None: - for _, runner in updated_config["runners"].items(): - if runner["hpa_conf"] is None: - runner["hpa_conf"] = {} - config_merger.merge(runner["hpa_conf"], hpa_conf_dct) - if resource_instance is not None: - updated_config["resource_instance"] = resource_instance - if updated_config.get("runners") is not None: - for runner in updated_config["runners"].values(): - runner["resource_instance"] = resource_instance - if expose_endpoint is not None: - updated_config["enable_ingress"] = expose_endpoint - - if api_server_config is not None: - if runners_config is not None: - api_server_config.runners = runners_config - config_merger.merge( - updated_config, bentoml_cattr.unstructure(api_server_config) + f"Deployment {self._schema.name} has no latest revision" ) - elif runners_config is not None: - config_merger.merge( - updated_config, - { - "runners": { - k: bentoml_cattr.unstructure(v) - for k, v in runners_config.items() - } - }, - ) - - dct_update: dict[str, t.Any] = { - "mode": first_not_none(mode, base_schema.mode), - "labels": first_not_none( - [{"key": key, "value": value} for key, value in labels.items()] - if labels - else None, - [bentoml_cattr.unstructure(i) for i in base_schema.labels], - ), - "description": description, - } - update_target = { - "type": first_not_none(type, deployment_target.type), - "bento": first_not_none(bento.version, deployment_target.bento.name), - "bento_repository": first_not_none( - bento.name, deployment_target.bento.repository.name - ), - "config": updated_config, - } - - rules = first_not_none( - [bentoml_cattr.unstructure(i) for i in canary_rules] - if canary_rules - else None, - [bentoml_cattr.unstructure(i) for i in deployment_target.canary_rules] - if deployment_target.canary_rules - else None, + target_schema = self._schema.latest_revision.targets[0] + if target_schema is None: + raise BentoMLException(f"Deployment {self._schema.name} has no target") + if target_schema.bento is None: + raise BentoMLException(f"Deployment {self._schema.name} has no bento") + return Tag.from_taglike( + target_schema.bento.repository.name + ":" + target_schema.bento.name ) - if rules: - update_target["canary_rules"] = rules - - # update the target - dct_update["targets"] = [update_target] - return cls._update_deployment( - deployment_name=deployment_name, - update_deployment_schema=bentoml_cattr.structure( - dct_update, UpdateDeploymentSchema - ), - context=context, - cluster_name=cluster_name, - kube_namespace=kube_namespace, + @property + def info(self) -> DeploymentInfo: + schema = self._conver_schema_to_update_schema() + del schema["bento"] + return DeploymentInfo( + name=self.name, + bento=self._conver_schema_to_bento(), + status=self._schema.status, + admin_console=self.get_bento_cloud_url(), + endpoint=self._urls[0] if self._urls else None, + config=schema, + created_at=self._schema.created_at.strftime("%Y-%m-%d %H:%M:%S"), ) - @classmethod - def create( - cls, - deployment_name: str, - bento: Tag | str, - description: str | None = None, - expose_endpoint: bool | None = None, - cluster_name: str | None = None, - kube_namespace: str | None = None, - resource_instance: str | None = None, - hpa_conf: DeploymentTargetHPAConf | None = None, - runners_config: dict[str, DeploymentTargetRunnerConfig] | None = None, - api_server_config: DeploymentTargetConfig | None = None, - mode: DeploymentMode | None = None, - type: DeploymentTargetType | None = None, - context: str | None = None, - labels: dict[str, str] | None = None, - canary_rules: t.List[DeploymentTargetCanaryRule] | None = None, - ) -> DeploymentSchema: - if mode is None: - mode = DeploymentMode.Function - if type is None: - type = DeploymentTargetType.STABLE - bento_tag = Tag.from_taglike(bento) + def get_config(self) -> dict[str, t.Any]: + self._refetch() + res = self._conver_schema_to_update_schema() + # bento should not be in the deployment config + del res["bento"] + return res - dct: dict[str, t.Any] = { - "name": deployment_name, - "kube_namespace": kube_namespace, - "mode": mode, - "description": description, - } - if labels: - dct["labels"] = [ - {"key": key, "value": value} for key, value in labels.items() - ] - if api_server_config is None: - _config: dict[str, t.Any] = { - "runners": { - k: bentoml_cattr.unstructure(v) for k, v in runners_config.items() - } - if runners_config - else None, - } - else: - api_server_config.runners = runners_config - _config = bentoml_cattr.unstructure(api_server_config) - - create_target: dict[str, t.Any] = { - "type": type, - "bento_repository": bento_tag.name, - "bento": bento_tag.version, - "config": _config, - } - if canary_rules: - create_target["canary_rules"] = [ - bentoml_cattr.unstructure(i) for i in canary_rules - ] - - # Only change the value by the top-level param if it is not provided already in api_server_config or runner_config - if hpa_conf: - hpa_conf_dct = bentoml_cattr.unstructure(hpa_conf) - _config_hpa_conf = _config.get("hpa_conf", None) - if _config_hpa_conf is None: - _config_hpa_conf = {} - for k, v in hpa_conf_dct.items(): - if k not in _config_hpa_conf: - _config_hpa_conf[k] = v - else: - logger.warning( - "Key %s is already set in API server config and will not be overwritten with hpa_conf.%s", - k, - k, - ) - _config["hpa_conf"] = _config_hpa_conf - if "runners" in _config and _config["runners"] is not None: - _runner_config = _config["runners"] - for runner in _runner_config.values(): - _runner_hpa_conf = runner.get("hpa_conf", None) - if _runner_hpa_conf is None: - _runner_hpa_conf = {} - for k, v in hpa_conf_dct.items(): - if k not in _runner_hpa_conf: - _runner_hpa_conf[k] = v - else: - logger.warning( - "Key %s is already set in runner config and will not be overwritten with hpa_conf.%s", - k, - k, - ) - runner["hpa_conf"] = _runner_hpa_conf - - if resource_instance: - if "resource_instance" not in _config: - _config["resource_instance"] = resource_instance - if "runners" in _config and _config["runners"] is not None: - for runner in _config["runners"].values(): - if "resource_instance" not in runner: - runner["resource_instance"] = resource_instance - - if expose_endpoint is not None and _config.get("enable_ingress", None) is None: - _config["enable_ingress"] = expose_endpoint - - # setup the create target itself - dct["targets"] = [create_target] - - return cls._create_deployment( - context=context, - cluster_name=cluster_name, - create_deployment_schema=bentoml_cattr.structure( - dct, CreateDeploymentSchema - ), + def get_bento(self) -> str: + self._refetch() + return str(self._conver_schema_to_bento()) + + def get_status(self) -> str: + self._refetch() + return self._schema.status.value + + def get_client( + self, + is_async: bool = False, + media_type: str = "application/json", + token: str | None = None, + ) -> SyncHTTPClient: + from _bentoml_impl.client import SyncHTTPClient + + self._refetch() + if self._schema.status != DeploymentStatus.Running: + raise BentoMLException(f"Deployment status is {self._schema.status}") + if self._urls is None or len(self._urls) != 1: + raise BentoMLException("Deployment url is not ready") + return SyncHTTPClient(self._urls[0], media_type=media_type, token=token) + + def get_bento_cloud_url(self) -> str: + client = get_rest_api_client(self.context) + namespace = self._get_default_kube_namespace(self.cluster_name, self.context) + return f"{client.v1.endpoint}/clusters/{self.cluster_name}/namespaces/{namespace}/deployments/{self.name}" + + def get_async_client( + self, + media_type: str = "application/json", + token: str | None = None, + ) -> AsyncHTTPClient: + from _bentoml_impl.client import AsyncHTTPClient + + self._refetch() + if self._schema.status != DeploymentStatus.Running: + raise BentoMLException(f"Deployment status is {self._schema.status}") + if self._urls is None or len(self._urls) != 1: + raise BentoMLException("Deployment url is not ready") + return AsyncHTTPClient(self._urls[0], media_type=media_type, token=token) + + def wait_until_ready(self, timeout: int = 300, check_interval: int = 5) -> None: + start_time = time.time() + while time.time() - start_time < timeout: + status = self.get_status() + if status == DeploymentStatus.Running.value: + logger.info( + f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] Deployment '{self.name}' is ready." + ) + return + logger.info( + f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] Waiting for deployment '{self.name}' to be ready. Current status: '{status}'." + ) + time.sleep(check_interval) + + raise TimeoutError( + f"Timed out waiting for deployment '{self.name}' to be ready." ) @classmethod @@ -405,158 +331,344 @@ def list( cls, context: str | None = None, cluster_name: str | None = None, - query: str | None = None, search: str | None = None, - count: int | None = None, - start: int | None = None, - ) -> DeploymentListSchema: + ) -> list[Deployment]: cloud_rest_client = get_rest_api_client(context) if cluster_name is None: - cluster_name = cls._get_default_cluster(context) - if query or start or count or search: - params = {"start": start, "count": count, "search": search, "q": query} - res = cloud_rest_client.get_deployment_list(cluster_name, **params) + res_count = cloud_rest_client.v2.list_deployment(all=True, search=search) + if res_count is None: + raise BentoMLException("List deployments request failed") + if res_count.total == 0: + return [] + res = cloud_rest_client.v2.list_deployment( + search=search, count=res_count.total, all=True + ) if res is None: raise BentoMLException("List deployments request failed") - return res else: - all_deployment = cloud_rest_client.get_deployment_list(cluster_name) - if all_deployment is None: + res_count = cloud_rest_client.v2.list_deployment( + cluster_name, search=search + ) + if res_count is None: + raise NotFound(f"Cluster {cluster_name} is not found") + if res_count.total == 0: + return [] + res = cloud_rest_client.v2.list_deployment( + cluster_name, search=search, count=res_count.total + ) + if res is None: raise BentoMLException("List deployments request failed") - return all_deployment + return [ + Deployment( + name=schema.name, + context=context, + cluster_name=schema.cluster.name, + _schema=schema, + ) + for schema in res.items + ] @classmethod - def create_from_file( + def create( cls, - path_or_stream: str | t.TextIO, + bento: Tag, + access_type: str | None = None, + name: str | None = None, + cluster_name: str | None = None, + scaling_min: int | None = None, + scaling_max: int | None = None, + instance_type: str | None = None, + strategy: str | None = None, + envs: t.List[dict[str, t.Any]] | None = None, + extras: dict[str, t.Any] | None = None, + config_dct: dict[str, t.Any] | None = None, + config_file: str | t.TextIO | None = None, path_context: str | None = None, context: str | None = None, - ) -> DeploymentSchema: - if isinstance(path_or_stream, str): - real_path = resolve_user_filepath(path_or_stream, path_context) + ) -> Deployment: + cloud_rest_client = get_rest_api_client(context) + dct: dict[str, t.Any] = { + "bento": str(bento), + } + if name: + dct["name"] = name + else: + # the cloud takes care of the name + dct["name"] = "" + + if config_dct: + merging_dct = config_dct + pass + elif isinstance(config_file, str): + real_path = resolve_user_filepath(config_file, path_context) try: with open(real_path, "r") as file: - data = json.load(file) + merging_dct = yaml.safe_load(file) except FileNotFoundError: raise ValueError(f"File not found: {real_path}") - except json.JSONDecodeError as e: - raise ValueError(f"Error decoding JSON file: {real_path}\n{e}") + except yaml.YAMLError as exc: + logger.error("Error while parsing YAML file: %s", exc) + raise except Exception as e: raise ValueError( f"An error occurred while reading the file: {real_path}\n{e}" ) + elif config_file is not None: + try: + merging_dct = yaml.safe_load(config_file) + except yaml.YAMLError as exc: + logger.error("Error while parsing YAML config-file stream: %s", exc) + raise else: - # load the data from trusted stream - data = json.load(path_or_stream) - deployment_schema = bentoml_cattr.structure(data, FullDeploymentSchema) - return cls._create_deployment( - create_deployment_schema=deployment_schema, + merging_dct = { + "scaling": {"min_replicas": scaling_min, "max_replicas": scaling_max}, + "instance_type": instance_type, + "deployment_strategy": strategy, + "envs": envs, + "extras": extras, + "access_type": access_type, + "cluster": cluster_name, + } + dct.update(merging_dct) + + # add cluster + if "cluster" not in dct or dct["cluster"] is None: + cluster_name = cls._get_default_cluster(context) + dct["cluster"] = cluster_name + + if "distributed" not in dct: + dct["distributed"] = ( + "services" in dct + and dct["services"] is not None + and dct["services"] != {} + ) + + config_struct = bentoml_cattr.structure(dct, CreateDeploymentSchemaV2) + cls._fix_and_validate_schema(config_struct, dct["distributed"]) + + res = cloud_rest_client.v2.create_deployment( + create_schema=config_struct, cluster_name=config_struct.cluster + ) + logger.debug("Deployment Schema: %s", config_struct) + return Deployment( context=context, - cluster_name=deployment_schema.cluster_name, + cluster_name=config_struct.cluster, + name=res.name, + _schema=res, ) @classmethod - def update_from_file( + def update( cls, - path_or_stream: str | t.TextIO, + name: str, + bento: Tag | str | None = None, + access_type: str | None = None, + cluster_name: str | None = None, + scaling_min: int | None = None, + scaling_max: int | None = None, + instance_type: str | None = None, + strategy: str | None = None, + envs: t.List[dict[str, t.Any]] | None = None, + extras: dict[str, t.Any] | None = None, + config_dct: dict[str, t.Any] | None = None, + config_file: str | t.TextIO | None = None, path_context: str | None = None, context: str | None = None, - ) -> DeploymentSchema: - if isinstance(path_or_stream, str): - real_path = resolve_user_filepath(path_or_stream, path_context) + ) -> Deployment: + deployment = Deployment.get( + name=name, context=context, cluster_name=cluster_name + ) + orig_dct = deployment._conver_schema_to_update_schema() + distributed = deployment._schema.distributed + cloud_rest_client = get_rest_api_client(context) + if bento: + orig_dct["bento"] = str(bento) + + if config_dct: + merging_dct = config_dct + pass + elif isinstance(config_file, str): + real_path = resolve_user_filepath(config_file, path_context) try: with open(real_path, "r") as file: - data = json.load(file) + merging_dct = yaml.safe_load(file) except FileNotFoundError: raise ValueError(f"File not found: {real_path}") - except json.JSONDecodeError as e: - raise ValueError(f"Error decoding JSON file: {real_path}\n{e}") + except yaml.YAMLError as exc: + logger.error("Error while parsing YAML file: %s", exc) + raise except Exception as e: raise ValueError( f"An error occurred while reading the file: {real_path}\n{e}" ) + elif config_file is not None: + try: + merging_dct = yaml.safe_load(config_file) + except yaml.YAMLError as exc: + logger.error("Error while parsing YAML config-file stream: %s", exc) + raise + else: - data = json.load(path_or_stream) - deployment_schema = bentoml_cattr.structure(data, FullDeploymentSchema) - return cls._update_deployment( - deployment_name=deployment_schema.name, - update_deployment_schema=deployment_schema, - context=context, - cluster_name=deployment_schema.cluster_name, - kube_namespace=deployment_schema.kube_namespace, + merging_dct: dict[str, t.Any] = {"scaling": {}} + if scaling_min is not None: + merging_dct["scaling"]["min_replicas"] = scaling_min + if scaling_max is not None: + merging_dct["scaling"]["max_replicas"] = scaling_max + if instance_type is not None: + merging_dct["instance_type"] = instance_type + + if strategy is not None: + merging_dct["deployment_strategy"] = strategy + + if envs is not None: + merging_dct["envs"] = envs + + if extras is not None: + merging_dct["extras"] = extras + + if access_type is not None: + merging_dct["access_type"] = access_type + + config_merger.merge(orig_dct, merging_dct) + + config_struct = bentoml_cattr.structure(orig_dct, UpdateDeploymentSchemaV2) + + cls._fix_and_validate_schema(config_struct, distributed) + + res = cloud_rest_client.v2.update_deployment( + cluster_name=deployment.cluster_name, + deployment_name=name, + update_schema=config_struct, ) + if res is None: + raise NotFound(f"deployment {name} is not found") + logger.debug("Deployment Schema: %s", config_struct) + deployment._schema = res + deployment._urls = res.urls + return deployment @classmethod - def get( + def apply( cls, - deployment_name: str, - context: str | None = None, + name: str, + bento: Tag | None = None, cluster_name: str | None = None, - kube_namespace: str | None = None, - ) -> DeploymentSchema: + config_dct: dict[str, t.Any] | None = None, + config_file: str | None = None, + path_context: str | None = None, + context: str | None = None, + ) -> Deployment: + try: + deployment = Deployment.get( + name=name, context=context, cluster_name=cluster_name + ) + except NotFound as e: + if bento is not None: + return cls.create( + bento=bento, + name=name, + cluster_name=cluster_name, + config_dct=config_dct, + config_file=config_file, + path_context=path_context, + context=context, + ) + else: + raise e cloud_rest_client = get_rest_api_client(context) - if cluster_name is None: - cluster_name = cls._get_default_cluster(context) - if kube_namespace is None: - kube_namespace = cls._get_default_kube_namespace(cluster_name, context) - res = cloud_rest_client.get_deployment( - cluster_name, kube_namespace, deployment_name + if bento is None: + bento = deployment._conver_schema_to_bento() + + schema_dct: dict[str, t.Any] = {"bento": str(bento)} + distributed = deployment._schema.distributed + + if config_file: + real_path = resolve_user_filepath(config_file, path_context) + try: + with open(real_path, "r") as file: + config_dct = yaml.safe_load(file) + except FileNotFoundError: + raise ValueError(f"File not found: {real_path}") + except yaml.YAMLError as exc: + logger.error("Error while parsing YAML file: %s", exc) + raise + except Exception as e: + raise ValueError( + f"An error occurred while reading the file: {real_path}\n{e}" + ) + if config_dct is None: + raise BentoMLException("Apply a deployment needs a configuration input") + + schema_dct.update(config_dct) + config_struct = bentoml_cattr.structure(schema_dct, UpdateDeploymentSchemaV2) + cls._fix_and_validate_schema(config_struct, distributed) + + res = cloud_rest_client.v2.update_deployment( + deployment_name=name, + update_schema=config_struct, + cluster_name=deployment.cluster_name, ) if res is None: - raise BentoMLException("Get deployment request failed") - return res + raise NotFound(f"deployment {name} is not found") + logger.debug("Deployment Schema: %s", config_struct) + deployment._schema = res + deployment._urls = res.urls + return deployment @classmethod - def delete( + def get( cls, - deployment_name: str, + name: str, context: str | None = None, cluster_name: str | None = None, - kube_namespace: str | None = None, - ) -> DeploymentSchema: - cloud_rest_client = get_rest_api_client(context) + ) -> Deployment: if cluster_name is None: cluster_name = cls._get_default_cluster(context) - if kube_namespace is None: - kube_namespace = cls._get_default_kube_namespace(cluster_name, context) - res = cloud_rest_client.get_deployment( - cluster_name, - kube_namespace, - deployment_name, - ) + cloud_rest_client = get_rest_api_client(context) + res = cloud_rest_client.v2.get_deployment(cluster_name, name) if res is None: - raise BentoMLException("Delete deployment: Deployment does not exist") + raise NotFound(f"deployment {name} is not found") - res = cloud_rest_client.delete_deployment( - cluster_name, kube_namespace, deployment_name + deployment = Deployment( + context=context, + cluster_name=cluster_name, + name=name, + _schema=res, + _urls=res.urls, ) - if res is None: - raise BentoMLException("Delete deployment request failed") - return res + return deployment @classmethod def terminate( cls, - deployment_name: str, + name: str, context: str | None = None, cluster_name: str | None = None, - kube_namespace: str | None = None, - ) -> DeploymentSchema: + ) -> Deployment: cloud_rest_client = get_rest_api_client(context) if cluster_name is None: cluster_name = cls._get_default_cluster(context) - if kube_namespace is None: - kube_namespace = cls._get_default_kube_namespace(cluster_name, context) - res = cloud_rest_client.get_deployment( - cluster_name, - kube_namespace, - deployment_name, - ) + res = cloud_rest_client.v2.terminate_deployment(cluster_name, name) if res is None: - raise BentoMLException("Teminate deployment: Deployment does not exist") - res = cloud_rest_client.terminate_deployment( - cluster_name, kube_namespace, deployment_name + raise NotFound(f"Deployment {name} is not found") + return Deployment( + name=name, + cluster_name=cluster_name, + context=context, + _schema=res, + _urls=res.urls, ) + + @classmethod + def delete( + cls, + name: str, + context: str | None = None, + cluster_name: str | None = None, + ) -> None: + cloud_rest_client = get_rest_api_client(context) + if cluster_name is None: + cluster_name = cls._get_default_cluster(context) + res = cloud_rest_client.v2.delete_deployment(cluster_name, name) if res is None: - raise BentoMLException("Terminate deployment request failed") - return res + raise NotFound(f"Deployment {name} is not found") diff --git a/src/bentoml/_internal/cloud/schemas/__init__.py b/src/bentoml/_internal/cloud/schemas/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/bentoml/_internal/cloud/schemas.py b/src/bentoml/_internal/cloud/schemas/modelschemas.py similarity index 51% rename from src/bentoml/_internal/cloud/schemas.py rename to src/bentoml/_internal/cloud/schemas/modelschemas.py index f8e7ef90bbc..2a2dfdc05c4 100644 --- a/src/bentoml/_internal/cloud/schemas.py +++ b/src/bentoml/_internal/cloud/schemas/modelschemas.py @@ -1,98 +1,20 @@ from __future__ import annotations -import json import typing as t -from datetime import datetime from enum import Enum from typing import TYPE_CHECKING import attr -import cattr -from dateutil.parser import parse -from bentoml._internal.tag import Tag +from bentoml._internal.cloud.schemas.utils import dict_options_converter -from ..bento.bento import BentoServiceInfo +from ...bento.bento import BentoServiceInfo time_format = "%Y-%m-%d %H:%M:%S.%f" T = t.TypeVar("T") -def datetime_encoder(time_obj: t.Optional[datetime]) -> t.Optional[str]: - if not time_obj: - return None - return time_obj.strftime(time_format) - - -def datetime_decoder(datetime_str: t.Optional[str], _: t.Any) -> t.Optional[datetime]: - if not datetime_str: - return None - return parse(datetime_str) - - -def tag_encoder(tag_obj: t.Optional[Tag]) -> t.Optional[str]: - if not tag_obj: - return None - return str(tag_obj) - - -def tag_decoder(tag_str: t.Optional[str], _: t.Any) -> t.Optional[Tag]: - if not tag_str: - return None - return Tag.from_str(tag_str) - - -def dict_options_converter( - options_type: type[T], -) -> t.Callable[[T | dict[str, T]], T]: - def _converter(value: T | dict[str, T] | None) -> T: - if value is None: - return options_type() - if isinstance(value, dict): - return options_type(**value) - return value - - return _converter - - -cloud_converter = cattr.Converter() - -cloud_converter.register_unstructure_hook(datetime, datetime_encoder) -cloud_converter.register_structure_hook(datetime, datetime_decoder) -cloud_converter.register_unstructure_hook(Tag, tag_encoder) -cloud_converter.register_structure_hook(Tag, tag_decoder) - - -def schema_from_json(json_content: str, cls: t.Type[T]) -> T: - dct = json.loads(json_content) - return cloud_converter.structure(dct, cls) - - -def schema_to_json(obj: t.Any) -> str: - res = cloud_converter.unstructure(obj, obj.__class__) - return json.dumps(res) - - -def schema_from_object(obj: t.Any, cls: t.Type[T]) -> T: - return cloud_converter.structure(obj, cls) - - -@attr.define -class BaseSchema: - uid: str - created_at: datetime - updated_at: t.Optional[datetime] - deleted_at: t.Optional[datetime] - - -@attr.define -class BaseListSchema: - start: int - count: int - total: int - - class ResourceType(Enum): USER = "user" ORG = "organization" @@ -112,58 +34,6 @@ class ResourceType(Enum): ResourceInstance = "resource_instance" -@attr.define -class ResourceSchema(BaseSchema): - name: str - resource_type: ResourceType - labels: t.List[LabelItemSchema] - - -@attr.define -class UserSchema: - name: str - email: str - first_name: str - last_name: str - - def get_name(self) -> str: - if not self.first_name and not self.last_name: - return self.name - return f"{self.first_name} {self.last_name}".strip() - - -@attr.define -class OrganizationSchema(ResourceSchema): - description: str - - -@attr.define -class OrganizationListSchema(BaseListSchema): - items: t.List[OrganizationSchema] - - -@attr.define -class ClusterSchema(ResourceSchema): - description: str - creator: UserSchema - - -@attr.define -class ClusterConfigSchema: - default_deployment_kube_namespace: str - - -@attr.define -class ClusterListSchema(BaseListSchema): - items: t.List[ClusterSchema] - - -@attr.define -class CreateBentoRepositorySchema: - name: str - description: str - - class BentoImageBuildStatus(Enum): PENDING = "pending" BUILDING = "building" @@ -203,10 +73,10 @@ class BentoRunnerSchema: @attr.define class BentoManifestSchema: - name: str service: str bentoml_version: str size_bytes: int + name: t.Optional[str] = attr.field(default=None) apis: t.Dict[str, BentoApiSchema] = attr.field(factory=dict) models: t.List[str] = attr.field(factory=list) runners: t.Optional[t.List[BentoRunnerSchema]] = attr.field(factory=list) @@ -220,90 +90,6 @@ class BentoManifestSchema: TransmissionStrategy = str -@attr.define -class BentoSchema(ResourceSchema): - description: str - version: str - image_build_status: BentoImageBuildStatus - upload_status: BentoUploadStatus - upload_finished_reason: str - presigned_upload_url: str - presigned_download_url: str - manifest: BentoManifestSchema - transmission_strategy: t.Optional[TransmissionStrategy] = attr.field(default=None) - upload_id: t.Optional[str] = attr.field(default=None) - - upload_started_at: t.Optional[datetime] = attr.field(default=None) - upload_finished_at: t.Optional[datetime] = attr.field(default=None) - build_at: datetime = attr.field(factory=datetime.now) - - -@attr.define -class BentoRepositorySchema(ResourceSchema): - description: str - latest_bento: t.Optional[BentoSchema] - - -@attr.define -class BentoWithRepositorySchema(BentoSchema): - repository: BentoRepositorySchema = attr.field(default=None) - - -@attr.define -class BentoWithRepositoryListSchema(BaseListSchema): - items: t.List[BentoWithRepositorySchema] = attr.field(factory=list) - - -@attr.define -class CreateBentoSchema: - description: str - version: str - manifest: BentoManifestSchema - build_at: datetime = attr.field(factory=datetime.now) - labels: t.List[LabelItemSchema] = attr.field(factory=list) - - -@attr.define -class UpdateBentoSchema: - manifest: t.Optional[BentoManifestSchema] = attr.field(default=None) - labels: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None) - - -@attr.define -class BentoFullSchema(BentoWithRepositorySchema): - models: t.List[ModelWithRepositorySchema] = attr.field(factory=list) - - -@attr.define -class PreSignMultipartUploadUrlSchema: - upload_id: str - part_number: int - - -@attr.define -class CompletePartSchema: - part_number: int - etag: str - - -@attr.define -class CompleteMultipartUploadSchema: - parts: t.List[CompletePartSchema] - upload_id: str - - -@attr.define -class FinishUploadBentoSchema: - status: t.Optional[BentoUploadStatus] - reason: t.Optional[str] - - -@attr.define -class CreateModelRepositorySchema: - name: str - description: str - - class ModelImageBuildStatus(Enum): PENDING = "pending" BUILDING = "building" @@ -329,66 +115,6 @@ class ModelManifestSchema: options: t.Dict[str, t.Any] = attr.field(factory=dict) -@attr.define -class ModelSchema(ResourceSchema): - description: str - version: str - image_build_status: ModelImageBuildStatus - upload_status: ModelUploadStatus - upload_finished_reason: str - presigned_upload_url: str - presigned_download_url: str - manifest: ModelManifestSchema - - transmission_strategy: t.Optional[TransmissionStrategy] = attr.field(default=None) - upload_id: t.Optional[str] = attr.field(default=None) - - upload_started_at: t.Optional[datetime] = attr.field(default=None) - upload_finished_at: t.Optional[datetime] = attr.field(default=None) - build_at: datetime = attr.field(factory=datetime.now) - - -@attr.define -class ModelRepositorySchema(ResourceSchema): - description: str - latest_model: t.Optional[ModelSchema] - - -@attr.define -class ModelWithRepositorySchema(ModelSchema): - repository: ModelRepositorySchema = attr.field(default=None) - - -@attr.define -class ModelWithRepositoryListSchema(BaseListSchema): - items: t.List[ModelWithRepositorySchema] = attr.field(factory=list) - - -@attr.define -class CreateModelSchema: - description: str - version: str - manifest: ModelManifestSchema - build_at: datetime = attr.field(factory=datetime.now) - labels: t.List[LabelItemSchema] = attr.field(factory=list) - - -@attr.define -class FinishUploadModelSchema: - status: t.Optional[ModelUploadStatus] - reason: t.Optional[str] - - -@attr.define -class BentoRepositoryListSchema(BaseListSchema): - items: t.List[BentoRepositorySchema] - - -@attr.define -class BentoListSchema(BaseListSchema): - items: t.List[BentoSchema] - - class DeploymentTargetCanaryRuleType(Enum): WEIGHT = "weight" HEADER = "header" @@ -415,6 +141,25 @@ class ApiServerBentoDeploymentOverrides: extraPodSpec: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None) +@attr.define +class ApiServerBentoFunctionOverrides: + __omit_if_default__ = True + __forbid_extra_keys__ = True + annotations: t.Optional[t.Dict[str, str]] = attr.field(default=None) + monitorExporter: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None) + extraPodMetadata: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None) + extraPodSpec: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None) + + +@attr.define +class RunnerBentoFunctionOverrides: + __omit_if_default__ = True + __forbid_extra_keys__ = True + annotations: t.Optional[t.Dict[str, str]] = attr.field(default=None) + extraPodMetadata: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None) + extraPodSpec: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None) + + @attr.define class RunnerBentoDeploymentOverrides: __omit_if_default__ = True @@ -484,10 +229,6 @@ class HPAPolicy: class DeploymentTargetHPAConf: __omit_if_default__ = True __forbid_extra_keys__ = True - cpu: t.Optional[int] = attr.field(default=None) - gpu: t.Optional[int] = attr.field(default=None) - memory: t.Optional[str] = attr.field(default=None) - qps: t.Optional[int] = attr.field(default=None) min_replicas: t.Optional[int] = attr.field(default=None) max_replicas: t.Optional[int] = attr.field(default=None) policy: t.Optional[HPAPolicy] = attr.field(default=None) @@ -534,6 +275,12 @@ class DeploymentStrategy(Enum): BestEffortControlledRollout = "BestEffortControlledRollout" +class AccessControl(Enum): + PUBLIC = "public" + PROTECTED = "protected" + PRIVATE = "private" + + @attr.define class DeploymentTargetRunnerConfig: __omit_if_default__ = True @@ -541,7 +288,7 @@ class DeploymentTargetRunnerConfig: resource_instance: t.Optional[str] = attr.field(default=None) resources: t.Optional[DeploymentTargetResources] = attr.field(default=None) hpa_conf: t.Optional[DeploymentTargetHPAConf] = attr.field(default=None) - envs: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None) + envs: t.Optional[t.List[t.Optional[LabelItemSchema]]] = attr.field(default=None) enable_stealing_traffic_debug_mode: t.Optional[bool] = attr.field(default=None) enable_debug_mode: t.Optional[bool] = attr.field(default=None) enable_debug_pod_receive_production_traffic: t.Optional[bool] = attr.field( @@ -551,6 +298,9 @@ class DeploymentTargetRunnerConfig: bento_deployment_overrides: t.Optional[RunnerBentoDeploymentOverrides] = attr.field( default=None ) + bento_function_overrides: t.Optional[RunnerBentoFunctionOverrides] = attr.field( + default=None + ) traffic_control: t.Optional[TrafficControlConfig] = attr.field(default=None) deployment_cold_start_wait_timeout: t.Optional[int] = attr.field(default=None) @@ -564,18 +314,18 @@ class DeploymentTargetType(Enum): class DeploymentTargetConfig: __omit_if_default__ = True __forbid_extra_keys__ = True - resources: DeploymentTargetResources = attr.field( + resources: t.Optional[DeploymentTargetResources] = attr.field( default=None, converter=dict_options_converter(DeploymentTargetResources) ) kubeResourceUid: str = attr.field(default="") # empty str kubeResourceVersion: str = attr.field(default="") resource_instance: t.Optional[str] = attr.field(default=None) hpa_conf: t.Optional[DeploymentTargetHPAConf] = attr.field(default=None) - envs: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None) + envs: t.Optional[t.List[t.Optional[LabelItemSchema]]] = attr.field(default=None) runners: t.Optional[t.Dict[str, DeploymentTargetRunnerConfig]] = attr.field( default=None ) - access_control: t.Optional[str] = attr.field(default=None) + access_control: t.Optional[AccessControl] = attr.field(default=None) enable_ingress: t.Optional[bool] = attr.field(default=None) # false for enables enable_stealing_traffic_debug_mode: t.Optional[bool] = attr.field(default=None) enable_debug_mode: t.Optional[bool] = attr.field(default=None) @@ -591,23 +341,38 @@ class DeploymentTargetConfig: bento_request_overrides: t.Optional[BentoRequestOverrides] = attr.field( default=None ) # Put into image builder + bento_function_overrides: t.Optional[ApiServerBentoFunctionOverrides] = attr.field( + default=None + ) traffic_control: t.Optional[TrafficControlConfig] = attr.field(default=None) deployment_cold_start_wait_timeout: t.Optional[int] = attr.field(default=None) + bentoml_config_overrides: t.Optional[dict[str, t.Any]] = attr.field(default=None) @attr.define -class CreateDeploymentTargetSchema: +class ExtraDeploymentOverrides: __omit_if_default__ = True __forbid_extra_keys__ = True - type: DeploymentTargetType # stable by default - bento_repository: str - bento: str - config: DeploymentTargetConfig - canary_rules: t.Optional[t.List[DeploymentTargetCanaryRule]] = attr.field( + bento_function_overrides: t.Optional[ApiServerBentoFunctionOverrides] = attr.field( + default=None + ) + bento_request_overrides: t.Optional[BentoRequestOverrides] = attr.field( default=None ) +@attr.define +class DeploymentServiceConfig: + __omit_if_default__ = True + __forbid_extra_keys__ = True + instance_type: t.Optional[str] = attr.field(default=None) + scaling: t.Optional[DeploymentTargetHPAConf] = attr.field(default=None) + envs: t.Optional[t.List[t.Optional[LabelItemSchema]]] = attr.field(default=None) + deployment_strategy: t.Optional[DeploymentStrategy] = attr.field(default=None) + extras: t.Optional[ExtraDeploymentOverrides] = attr.field(default=None) + cold_start_timeout: t.Optional[int] = attr.field(default=None) + + class DeploymentStatus(Enum): Unknown = "unknown" NonDeployed = "non-deployed" @@ -622,45 +387,9 @@ class DeploymentStatus(Enum): ImageBuildSucceeded = "image-build-succeeded" -@attr.define -class DeploymentSchema(ResourceSchema): - __omit_if_default__ = True - __forbid_extra_keys__ = True - creator: UserSchema - cluster: ClusterSchema - status: DeploymentStatus - kube_namespace: str - latest_revision: t.Optional[DeploymentRevisionSchema] = attr.field( - default=None - ) # Delete returns no latest revision - mode: t.Optional[DeploymentMode] = attr.field(default=None) - - -@attr.define -class DeploymentTargetSchema(ResourceSchema): - __omit_if_default__ = True - __forbid_extra_keys__ = True - creator: UserSchema - type: DeploymentTargetType - bento: BentoFullSchema - config: DeploymentTargetConfig - canary_rules: t.Optional[t.List[DeploymentTargetCanaryRule]] = attr.field( - default=None - ) - - -class DeploymentRevisionStatus(Enum): - ACTIVE = "active" - INACTIVE = "inactive" - - -@attr.define -class DeploymentRevisionSchema(ResourceSchema): - __omit_if_default__ = True - __forbid_extra_keys__ = True - creator: UserSchema - status: DeploymentRevisionStatus - targets: t.List[DeploymentTargetSchema] +class DeploymentMode(Enum): + Deployment = "deployment" + Function = "function" @attr.define @@ -671,57 +400,6 @@ class ResourceInstanceConfigSchema: node_selectors: t.Optional[t.Dict[str, str]] = attr.field(factory=dict) -@attr.define -class ResourceInstanceSchema(ResourceSchema): - display_name: str - description: str - config: ResourceInstanceConfigSchema - - -@attr.define -class ClusterFullSchema(ClusterSchema): - __omit_if_default__ = True - __forbid_extra_keys__ = True - organization: OrganizationSchema - kube_config: str - config: ClusterConfigSchema - grafana_root_path: str - resource_instances: t.List[ResourceInstanceSchema] - - -@attr.define -class DeploymentListSchema(BaseListSchema): - __omit_if_default__ = True - __forbid_extra_keys__ = True - items: t.List[DeploymentSchema] - - -class DeploymentMode(Enum): - Deployment = "deployment" - Function = "function" - - -@attr.define -class UpdateDeploymentSchema: - __omit_if_default__ = True - __forbid_extra_keys__ = True - targets: t.List[CreateDeploymentTargetSchema] - mode: t.Optional[DeploymentMode] = attr.field(default=None) - labels: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None) - description: t.Optional[str] = attr.field(default=None) - do_not_deploy: t.Optional[bool] = attr.field(default=None) - - -@attr.define -class CreateDeploymentSchema(UpdateDeploymentSchema): - __omit_if_default__ = True - __forbid_extra_keys__ = True - name: str = attr.field(default=None) - kube_namespace: t.Optional[str] = attr.field(default=None) - - -@attr.define -class FullDeploymentSchema(CreateDeploymentSchema): - __omit_if_default__ = True - __forbid_extra_keys__ = True - cluster_name: t.Optional[str] = attr.field(default=None) +class DeploymentRevisionStatus(Enum): + ACTIVE = "active" + INACTIVE = "inactive" diff --git a/src/bentoml/_internal/cloud/schemas/schemasv1.py b/src/bentoml/_internal/cloud/schemas/schemasv1.py new file mode 100644 index 00000000000..79c4d2ec554 --- /dev/null +++ b/src/bentoml/_internal/cloud/schemas/schemasv1.py @@ -0,0 +1,337 @@ +from __future__ import annotations + +import typing as t +from datetime import datetime + +import attr + +from bentoml._internal.cloud.schemas.modelschemas import BentoImageBuildStatus +from bentoml._internal.cloud.schemas.modelschemas import BentoManifestSchema +from bentoml._internal.cloud.schemas.modelschemas import BentoUploadStatus +from bentoml._internal.cloud.schemas.modelschemas import DeploymentMode +from bentoml._internal.cloud.schemas.modelschemas import DeploymentRevisionStatus +from bentoml._internal.cloud.schemas.modelschemas import DeploymentStatus +from bentoml._internal.cloud.schemas.modelschemas import DeploymentTargetCanaryRule +from bentoml._internal.cloud.schemas.modelschemas import DeploymentTargetConfig +from bentoml._internal.cloud.schemas.modelschemas import DeploymentTargetType +from bentoml._internal.cloud.schemas.modelschemas import LabelItemSchema +from bentoml._internal.cloud.schemas.modelschemas import ModelImageBuildStatus +from bentoml._internal.cloud.schemas.modelschemas import ModelManifestSchema +from bentoml._internal.cloud.schemas.modelschemas import ModelUploadStatus +from bentoml._internal.cloud.schemas.modelschemas import ResourceInstanceConfigSchema +from bentoml._internal.cloud.schemas.modelschemas import ResourceType +from bentoml._internal.cloud.schemas.modelschemas import TransmissionStrategy + + +@attr.define +class BaseSchema: + uid: str + created_at: datetime + updated_at: t.Optional[datetime] + deleted_at: t.Optional[datetime] + + +@attr.define +class BaseListSchema: + start: int + count: int + total: int + + +@attr.define +class ResourceSchema(BaseSchema): + name: str + resource_type: ResourceType + labels: t.List[LabelItemSchema] + + +@attr.define +class UserSchema: + name: str + email: str + first_name: str + last_name: str + + def get_name(self) -> str: + if not self.first_name and not self.last_name: + return self.name + return f"{self.first_name} {self.last_name}".strip() + + +@attr.define +class OrganizationSchema(ResourceSchema): + description: str + + +@attr.define +class OrganizationListSchema(BaseListSchema): + items: t.List[OrganizationSchema] + + +@attr.define +class ClusterSchema(ResourceSchema): + description: str + creator: UserSchema + + +@attr.define +class ClusterConfigSchema: + default_deployment_kube_namespace: str + + +@attr.define +class ClusterListSchema(BaseListSchema): + items: t.List[ClusterSchema] + + +@attr.define +class CreateBentoRepositorySchema: + name: str + description: str + + +@attr.define +class BentoSchema(ResourceSchema): + description: str + version: str + image_build_status: BentoImageBuildStatus + upload_status: BentoUploadStatus + upload_finished_reason: str + presigned_upload_url: str + presigned_download_url: str + manifest: t.Optional[BentoManifestSchema] = attr.field(default=None) + transmission_strategy: t.Optional[TransmissionStrategy] = attr.field(default=None) + upload_id: t.Optional[str] = attr.field(default=None) + + upload_started_at: t.Optional[datetime] = attr.field(default=None) + upload_finished_at: t.Optional[datetime] = attr.field(default=None) + build_at: datetime = attr.field(factory=datetime.now) + + +@attr.define +class BentoRepositorySchema(ResourceSchema): + description: str + latest_bento: t.Optional[BentoSchema] + + +@attr.define +class BentoWithRepositorySchema(BentoSchema): + repository: BentoRepositorySchema = attr.field(default=None) + + +@attr.define +class BentoWithRepositoryListSchema(BaseListSchema): + items: t.List[BentoWithRepositorySchema] = attr.field(factory=list) + + +@attr.define +class CreateBentoSchema: + description: str + version: str + manifest: t.Optional[BentoManifestSchema] = attr.field(default=None) + build_at: datetime = attr.field(factory=datetime.now) + labels: t.List[LabelItemSchema] = attr.field(factory=list) + + +@attr.define +class UpdateBentoSchema: + manifest: t.Optional[BentoManifestSchema] = attr.field(default=None) + labels: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None) + + +@attr.define +class BentoFullSchema(BentoWithRepositorySchema): + models: t.List[ModelWithRepositorySchema] = attr.field(factory=list) + + +@attr.define +class PreSignMultipartUploadUrlSchema: + upload_id: str + part_number: int + + +@attr.define +class CompletePartSchema: + part_number: int + etag: str + + +@attr.define +class CompleteMultipartUploadSchema: + parts: t.List[CompletePartSchema] + upload_id: str + + +@attr.define +class FinishUploadBentoSchema: + status: t.Optional[BentoUploadStatus] + reason: t.Optional[str] + + +@attr.define +class CreateModelRepositorySchema: + name: str + description: str + + +@attr.define +class ModelSchema(ResourceSchema): + description: str + version: str + image_build_status: ModelImageBuildStatus + upload_status: ModelUploadStatus + upload_finished_reason: str + presigned_upload_url: str + presigned_download_url: str + manifest: ModelManifestSchema + + transmission_strategy: t.Optional[TransmissionStrategy] = attr.field(default=None) + upload_id: t.Optional[str] = attr.field(default=None) + + upload_started_at: t.Optional[datetime] = attr.field(default=None) + upload_finished_at: t.Optional[datetime] = attr.field(default=None) + build_at: datetime = attr.field(factory=datetime.now) + + +@attr.define +class ModelRepositorySchema(ResourceSchema): + description: str + latest_model: t.Optional[ModelSchema] + + +@attr.define +class ModelWithRepositorySchema(ModelSchema): + repository: ModelRepositorySchema = attr.field(default=None) + + +@attr.define +class ModelWithRepositoryListSchema(BaseListSchema): + items: t.List[ModelWithRepositorySchema] = attr.field(factory=list) + + +@attr.define +class CreateModelSchema: + description: str + version: str + manifest: ModelManifestSchema + build_at: datetime = attr.field(factory=datetime.now) + labels: t.List[LabelItemSchema] = attr.field(factory=list) + + +@attr.define +class FinishUploadModelSchema: + status: t.Optional[ModelUploadStatus] + reason: t.Optional[str] + + +@attr.define +class BentoRepositoryListSchema(BaseListSchema): + items: t.List[BentoRepositorySchema] + + +@attr.define +class BentoListSchema(BaseListSchema): + items: t.List[BentoSchema] + + +@attr.define +class CreateDeploymentTargetSchema: + __omit_if_default__ = True + __forbid_extra_keys__ = True + type: DeploymentTargetType # stable by default + bento_repository: str + bento: str + config: DeploymentTargetConfig + canary_rules: t.Optional[t.List[DeploymentTargetCanaryRule]] = attr.field( + default=None + ) + + +@attr.define +class DeploymentSchema(ResourceSchema): + __omit_if_default__ = True + __forbid_extra_keys__ = True + creator: UserSchema + cluster: ClusterSchema + status: DeploymentStatus + kube_namespace: str + distributed: bool = attr.field(default=False) + latest_revision: t.Optional[DeploymentRevisionSchema] = attr.field( + default=None + ) # Delete returns no latest revision + mode: t.Optional[DeploymentMode] = attr.field(default=None) + + +@attr.define +class DeploymentTargetSchema(ResourceSchema): + __omit_if_default__ = True + __forbid_extra_keys__ = True + creator: UserSchema + type: DeploymentTargetType + bento: BentoFullSchema + config: DeploymentTargetConfig + canary_rules: t.Optional[t.List[DeploymentTargetCanaryRule]] = attr.field( + default=None + ) + + +@attr.define +class DeploymentRevisionSchema(ResourceSchema): + __omit_if_default__ = True + __forbid_extra_keys__ = True + creator: UserSchema + status: DeploymentRevisionStatus + targets: t.List[DeploymentTargetSchema] + + +@attr.define +class ResourceInstanceSchema(ResourceSchema): + display_name: str + description: str + config: ResourceInstanceConfigSchema + + +@attr.define +class ClusterFullSchema(ClusterSchema): + __omit_if_default__ = True + __forbid_extra_keys__ = True + organization: OrganizationSchema + kube_config: str + config: ClusterConfigSchema + grafana_root_path: str + resource_instances: t.List[ResourceInstanceSchema] + + +@attr.define +class DeploymentListSchema(BaseListSchema): + __omit_if_default__ = True + __forbid_extra_keys__ = True + items: t.List[DeploymentSchema] + + +@attr.define +class UpdateDeploymentSchema: + __omit_if_default__ = True + __forbid_extra_keys__ = True + targets: t.List[CreateDeploymentTargetSchema] + mode: t.Optional[DeploymentMode] = attr.field(default=None) + labels: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None) + description: t.Optional[str] = attr.field(default=None) + do_not_deploy: t.Optional[bool] = attr.field(default=None) + + +@attr.define(kw_only=True) +class CreateDeploymentSchema(UpdateDeploymentSchema): + __omit_if_default__ = True + __forbid_extra_keys__ = True + name: str + kube_namespace: str + # cluster: str + distributed: t.Optional[bool] = attr.field(default=False) + + +@attr.define(kw_only=True) +class DeploymentFullSchema(DeploymentSchema): + __omit_if_default__ = True + __forbid_extra_keys__ = True + urls: list[str] diff --git a/src/bentoml/_internal/cloud/schemas/schemasv2.py b/src/bentoml/_internal/cloud/schemas/schemasv2.py new file mode 100644 index 00000000000..d81a9e21e9f --- /dev/null +++ b/src/bentoml/_internal/cloud/schemas/schemasv2.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +import typing as t + +import attr + +from bentoml._internal.cloud.schemas.modelschemas import AccessControl +from bentoml._internal.cloud.schemas.modelschemas import DeploymentMode +from bentoml._internal.cloud.schemas.modelschemas import DeploymentRevisionStatus +from bentoml._internal.cloud.schemas.modelschemas import DeploymentServiceConfig +from bentoml._internal.cloud.schemas.modelschemas import DeploymentStatus +from bentoml._internal.cloud.schemas.schemasv1 import BaseListSchema +from bentoml._internal.cloud.schemas.schemasv1 import BentoWithRepositorySchema +from bentoml._internal.cloud.schemas.schemasv1 import ClusterSchema +from bentoml._internal.cloud.schemas.schemasv1 import ResourceSchema +from bentoml._internal.cloud.schemas.schemasv1 import UserSchema + + +@attr.define +class DeploymentTargetSchema(ResourceSchema): + creator: t.Optional[UserSchema] + config: t.Optional[DeploymentTargetConfig] + bento: t.Optional[BentoWithRepositorySchema] + + +@attr.define +class DeploymentTargetConfig(DeploymentServiceConfig): + kube_resource_uid: t.Optional[str] = attr.field(default=None) + kube_resource_version: t.Optional[str] = attr.field(default=None) + services: t.Dict[str, DeploymentServiceConfig] = attr.field(factory=dict) + access_type: t.Optional[AccessControl] = attr.field(default=None) + bentoml_config_overrides: t.Dict[str, t.Optional[t.Any]] = attr.field(factory=dict) + + +@attr.define +class DeploymentTargetListSchema(BaseListSchema): + items: t.List[t.Optional[DeploymentTargetSchema]] + + +@attr.define +class DeploymentRevisionSchema(ResourceSchema): + creator: t.Optional[UserSchema] + status: DeploymentRevisionStatus + targets: t.List[t.Optional[DeploymentTargetSchema]] + + +@attr.define +class DeploymentRevisionListSchema(BaseListSchema): + items: t.List[t.Optional[DeploymentRevisionSchema]] + + +@attr.define(kw_only=True) +class UpdateDeploymentSchema(DeploymentServiceConfig): + __omit_if_default__ = True + __forbid_extra_keys__ = False # distributed, cluster and name need to be ignored + bento: str + access_type: t.Optional[AccessControl] = attr.field(default=None) + description: t.Optional[str] = attr.field(default=None) + services: t.Dict[str, DeploymentServiceConfig] = attr.field(factory=dict) + bentoml_config_overrides: t.Dict[str, t.Any] = attr.field(factory=dict) + + +@attr.define(kw_only=True) +class CreateDeploymentSchema(UpdateDeploymentSchema): + __omit_if_default__ = True + __forbid_extra_keys__ = True + name: str + cluster: str + distributed: bool + + +@attr.define +class DeploymentSchema(ResourceSchema): + __omit_if_default__ = True + __forbid_extra_keys__ = True + status: DeploymentStatus + kube_namespace: str + creator: t.Optional[UserSchema] + cluster: t.Optional[ClusterSchema] + latest_revision: t.Optional[DeploymentRevisionSchema] + mode: t.Optional[DeploymentMode] = attr.field(default=None) + distributed: bool = attr.field(default=False) + + +@attr.define +class DeploymentFullSchema(DeploymentSchema): + urls: t.List[str] = attr.field(factory=list) + + +@attr.define +class DeploymentListSchema(BaseListSchema): + __omit_if_default__ = True + __forbid_extra_keys__ = True + items: t.List[DeploymentSchema] diff --git a/src/bentoml/_internal/cloud/schemas/utils.py b/src/bentoml/_internal/cloud/schemas/utils.py new file mode 100644 index 00000000000..37b54e6e9ec --- /dev/null +++ b/src/bentoml/_internal/cloud/schemas/utils.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import json +import typing as t +from datetime import datetime + +import cattr +from dateutil.parser import parse + +from bentoml._internal.tag import Tag + +time_format = "%Y-%m-%d %H:%M:%S.%f" +T = t.TypeVar("T") + + +def datetime_encoder(time_obj: t.Optional[datetime]) -> t.Optional[str]: + if not time_obj: + return None + return time_obj.strftime(time_format) + + +def datetime_decoder(datetime_str: t.Optional[str], _: t.Any) -> t.Optional[datetime]: + if not datetime_str: + return None + return parse(datetime_str) + + +def tag_encoder(tag_obj: t.Optional[Tag]) -> t.Optional[str]: + if not tag_obj: + return None + return str(tag_obj) + + +def tag_decoder(tag_str: t.Optional[str], _: t.Any) -> t.Optional[Tag]: + if not tag_str: + return None + return Tag.from_str(tag_str) + + +def dict_options_converter( + options_type: type[T], +) -> t.Callable[[T | dict[str, T]], T]: + def _converter(value: T | dict[str, T] | None) -> T: + if value is None: + return options_type() + if isinstance(value, dict): + return options_type(**value) + return value + + return _converter + + +cloud_converter = cattr.Converter() + +cloud_converter.register_unstructure_hook(datetime, datetime_encoder) +cloud_converter.register_structure_hook(datetime, datetime_decoder) +cloud_converter.register_unstructure_hook(Tag, tag_encoder) +cloud_converter.register_structure_hook(Tag, tag_decoder) + + +def schema_from_json(json_content: str, cls: t.Type[T]) -> T: + dct = json.loads(json_content) + return cloud_converter.structure(dct, cls) + + +def schema_to_json(obj: t.Any) -> str: + res = cloud_converter.unstructure(obj, obj.__class__) + return json.dumps(res) + + +def schema_from_object(obj: t.Any, cls: t.Type[T]) -> T: + return cloud_converter.structure(obj, cls) diff --git a/src/bentoml/_internal/cloud/yatai.py b/src/bentoml/_internal/cloud/yatai.py index eb9ecd0c055..7ce202cf390 100644 --- a/src/bentoml/_internal/cloud/yatai.py +++ b/src/bentoml/_internal/cloud/yatai.py @@ -10,7 +10,7 @@ from tempfile import NamedTemporaryFile import fs -import requests +import httpx from rich.live import Live from simple_di import Provide from simple_di import inject @@ -29,25 +29,25 @@ from .base import CallbackIOWrapper from .base import CloudClient from .config import get_rest_api_client -from .schemas import BentoApiSchema -from .schemas import BentoManifestSchema -from .schemas import BentoRunnerResourceSchema -from .schemas import BentoRunnerSchema -from .schemas import BentoUploadStatus -from .schemas import CompleteMultipartUploadSchema -from .schemas import CompletePartSchema -from .schemas import CreateBentoRepositorySchema -from .schemas import CreateBentoSchema -from .schemas import CreateModelRepositorySchema -from .schemas import CreateModelSchema -from .schemas import FinishUploadBentoSchema -from .schemas import FinishUploadModelSchema -from .schemas import LabelItemSchema -from .schemas import ModelManifestSchema -from .schemas import ModelUploadStatus -from .schemas import PreSignMultipartUploadUrlSchema -from .schemas import TransmissionStrategy -from .schemas import UpdateBentoSchema +from .schemas.modelschemas import BentoApiSchema +from .schemas.modelschemas import BentoRunnerResourceSchema +from .schemas.modelschemas import BentoRunnerSchema +from .schemas.schemasv1 import BentoManifestSchema +from .schemas.schemasv1 import BentoUploadStatus +from .schemas.schemasv1 import CompleteMultipartUploadSchema +from .schemas.schemasv1 import CompletePartSchema +from .schemas.schemasv1 import CreateBentoRepositorySchema +from .schemas.schemasv1 import CreateBentoSchema +from .schemas.schemasv1 import CreateModelRepositorySchema +from .schemas.schemasv1 import CreateModelSchema +from .schemas.schemasv1 import FinishUploadBentoSchema +from .schemas.schemasv1 import FinishUploadModelSchema +from .schemas.schemasv1 import LabelItemSchema +from .schemas.schemasv1 import ModelManifestSchema +from .schemas.schemasv1 import ModelUploadStatus +from .schemas.schemasv1 import PreSignMultipartUploadUrlSchema +from .schemas.schemasv1 import TransmissionStrategy +from .schemas.schemasv1 import UpdateBentoSchema if t.TYPE_CHECKING: from concurrent.futures import Future @@ -152,12 +152,14 @@ def push_model(model: Model) -> None: for r in info.runners ] manifest = BentoManifestSchema( + name=info.name, service=info.service, bentoml_version=info.bentoml_version, apis=apis, models=models, runners=runners, size_bytes=bento.total_size(), + config=info.config, ) if not remote_bento: with self.spin(text=f'Registering Bento "{bento.tag}" with Yatai..'): @@ -254,7 +256,7 @@ def filter_( ) try: if presigned_upload_url is not None: - resp = requests.put(presigned_upload_url, data=tar_io) + resp = httpx.put(presigned_upload_url, data=tar_io) if resp.status_code != 200: finish_req = FinishUploadBentoSchema( status=BentoUploadStatus.FAILED, @@ -309,7 +311,7 @@ def chunk_upload( ) with CallbackIOWrapper(chunk, read_cb=io_cb) as chunk_io: - resp = requests.put( + resp = httpx.put( remote_bento.presigned_upload_url, data=chunk_io ) if resp.status_code != 200: @@ -498,27 +500,27 @@ def pull_model(model_tag: Tag): name, version ) presigned_download_url = remote_bento.presigned_download_url - response = requests.get(presigned_download_url, stream=True) - if response.status_code != 200: - raise BentoMLException( - f'Failed to download bento "{_tag}": {response.text}' - ) - total_size_in_bytes = int(response.headers.get("content-length", 0)) - block_size = 1024 # 1 Kibibyte with NamedTemporaryFile() as tar_file: - self.transmission_progress.update( - download_task_id, - completed=0, - total=total_size_in_bytes, - visible=True, - ) - self.transmission_progress.start_task(download_task_id) - for data in response.iter_content(block_size): + with httpx.stream("GET", presigned_download_url) as response: + if response.status_code != 200: + raise BentoMLException( + f'Failed to download bento "{_tag}": {response.text}' + ) + total_size_in_bytes = int(response.headers.get("content-length", 0)) + block_size = 1024 # 1 Kibibyte self.transmission_progress.update( - download_task_id, advance=len(data) + download_task_id, + completed=0, + total=total_size_in_bytes, + visible=True, ) - tar_file.write(data) + self.transmission_progress.start_task(download_task_id) + for data in response.iter_bytes(block_size): + self.transmission_progress.update( + download_task_id, advance=len(data) + ) + tar_file.write(data) self.log_progress.add_task( f'[bold green]Finished downloading all bento "{_tag}" files' ) @@ -690,7 +692,7 @@ def io_cb(x: int): ) try: if presigned_upload_url is not None: - resp = requests.put(presigned_upload_url, data=tar_io) + resp = httpx.put(presigned_upload_url, data=tar_io) if resp.status_code != 200: finish_req = FinishUploadModelSchema( status=ModelUploadStatus.FAILED, @@ -746,8 +748,8 @@ def chunk_upload( ) with CallbackIOWrapper(chunk, read_cb=io_cb) as chunk_io: - resp = requests.put( - remote_model.presigned_upload_url, data=chunk_io + resp = httpx.put( + remote_model.presigned_upload_url, content=chunk_io ) if resp.status_code != 200: return FinishUploadModelSchema( @@ -937,25 +939,26 @@ def _do_pull_model( ) presigned_download_url = remote_model.presigned_download_url - response = requests.get(presigned_download_url, stream=True) - if response.status_code != 200: - raise BentoMLException( - f'Failed to download model "{_tag}": {response.text}' - ) - - total_size_in_bytes = int(response.headers.get("content-length", 0)) - block_size = 1024 # 1 Kibibyte with NamedTemporaryFile() as tar_file: - self.transmission_progress.update( - download_task_id, - description=f'Downloading model "{_tag}"', - total=total_size_in_bytes, - visible=True, - ) - self.transmission_progress.start_task(download_task_id) - for data in response.iter_content(block_size): - self.transmission_progress.update(download_task_id, advance=len(data)) - tar_file.write(data) + with httpx.stream("GET", presigned_download_url) as response: + if response.status_code != 200: + raise BentoMLException( + f'Failed to download model "{_tag}": {response.text}' + ) + total_size_in_bytes = int(response.headers.get("content-length", 0)) + block_size = 1024 # 1 Kibibyte + self.transmission_progress.update( + download_task_id, + description=f'Downloading model "{_tag}"', + total=total_size_in_bytes, + visible=True, + ) + self.transmission_progress.start_task(download_task_id) + for data in response.iter_bytes(block_size): + self.transmission_progress.update( + download_task_id, advance=len(data) + ) + tar_file.write(data) self.log_progress.add_task( f'[bold green]Finished downloading model "{_tag}" files' ) diff --git a/src/bentoml/cloud.py b/src/bentoml/cloud.py index 66e9c1bca1c..283a76c1f10 100644 --- a/src/bentoml/cloud.py +++ b/src/bentoml/cloud.py @@ -1,3 +1,12 @@ from ._internal.cloud import BentoCloudClient as BentoCloudClient -from ._internal.cloud import Resource as Resource from ._internal.cloud import YataiClient as YataiClient + +deprecated_names = ["Resource"] + + +def __getattr__(name: str): + if name in deprecated_names: + raise AttributeError( + f"{name} is deprecated, please use bentoml.deloyment instead" + ) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/bentoml/deployment.py b/src/bentoml/deployment.py new file mode 100644 index 00000000000..22fd456f541 --- /dev/null +++ b/src/bentoml/deployment.py @@ -0,0 +1,492 @@ +""" +User facing python APIs for deployment +""" + +from __future__ import annotations + +import typing as t + +from simple_di import Provide +from simple_di import inject + +from bentoml._internal.cloud.deployment import Deployment +from bentoml._internal.cloud.deployment import get_real_bento_tag +from bentoml._internal.tag import Tag +from bentoml.cloud import BentoCloudClient +from bentoml.exceptions import BentoMLException + +from ._internal.configuration.containers import BentoMLContainer + +if t.TYPE_CHECKING: + from ._internal.bento import BentoStore + + +@t.overload +def create( + name: str | None = ..., + path_context: str | None = ..., + context: str | None = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: str | None = ..., + cluster_name: str | None = ..., + access_type: str | None = ..., + scaling_min: int | None = ..., + scaling_max: int | None = ..., + instance_type: str | None = ..., + strategy: str | None = ..., + envs: t.List[dict[str, t.Any]] | None = ..., + extras: dict[str, t.Any] | None = ..., +) -> Deployment: + ... + + +@t.overload +def create( + name: str | None = ..., + path_context: str | None = ..., + context: str | None = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + bento: Tag | str | None = ..., + cluster_name: str | None = ..., + access_type: str | None = ..., + scaling_min: int | None = ..., + scaling_max: int | None = ..., + instance_type: str | None = ..., + strategy: str | None = ..., + envs: t.List[dict[str, t.Any]] | None = ..., + extras: dict[str, t.Any] | None = ..., +) -> Deployment: + ... + + +@t.overload +def create( + name: str | None = ..., + path_context: str | None = ..., + context: str | None = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + bento: Tag | str | None = ..., + config_file: str | None = ..., +) -> Deployment: + ... + + +@t.overload +def create( + name: str | None = ..., + path_context: str | None = ..., + context: str | None = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: str | None = ..., + config_file: str | None = ..., +) -> Deployment: + ... + + +@t.overload +def create( + name: str | None = ..., + path_context: str | None = ..., + context: str | None = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + bento: Tag | str | None = ..., + config_dct: dict[str, t.Any] | None = ..., +) -> Deployment: + ... + + +@t.overload +def create( + name: str | None = ..., + path_context: str | None = ..., + context: str | None = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: str | None = ..., + config_dct: dict[str, t.Any] | None = ..., +) -> Deployment: + ... + + +@inject +def create( + name: str | None = None, + path_context: str | None = None, + context: str | None = None, + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: str | None = None, + bento: Tag | str | None = None, + cluster_name: str | None = None, + access_type: str | None = None, + scaling_min: int | None = None, + scaling_max: int | None = None, + instance_type: str | None = None, + strategy: str | None = None, + envs: t.List[dict[str, t.Any]] | None = None, + extras: dict[str, t.Any] | None = None, + config_dct: dict[str, t.Any] | None = None, + config_file: str | None = None, +) -> Deployment: + deploy_by_param = ( + access_type + or cluster_name + or scaling_min + or scaling_max + or instance_type + or strategy + or envs + or extras + ) + if ( + config_dct + and config_file + or config_dct + and deploy_by_param + or config_file + and deploy_by_param + ): + raise BentoMLException( + "Configure a deployment can only use one of the following: config_dct, config_file, or the other parameters" + ) + if bento and project_path: + raise BentoMLException("Only one of bento or project_path can be provided") + if bento is None and project_path is None: + raise BentoMLException("Either bento or project_path must be provided") + bento = get_real_bento_tag( + project_path=project_path, + bento=bento, + context=context, + _cloud_client=BentoCloudClient(), + ) + + return Deployment.create( + bento=bento, + access_type=access_type, + name=name, + cluster_name=cluster_name, + scaling_min=scaling_min, + scaling_max=scaling_max, + instance_type=instance_type, + strategy=strategy, + envs=envs, + extras=extras, + config_dct=config_dct, + config_file=config_file, + path_context=path_context, + context=context, + ) + + +@t.overload +def update( + name: str, + path_context: str | None = ..., + context: str | None = ..., + cluster_name: str | None = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: str | None = ..., + access_type: str | None = ..., + scaling_min: int | None = ..., + scaling_max: int | None = ..., + instance_type: str | None = ..., + strategy: str | None = ..., + envs: t.List[dict[str, t.Any]] | None = ..., + extras: dict[str, t.Any] | None = ..., +) -> Deployment: + ... + + +@t.overload +def update( + name: str, + path_context: str | None = ..., + context: str | None = ..., + cluster_name: str | None = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + bento: Tag | str | None = ..., + access_type: str | None = ..., + scaling_min: int | None = ..., + scaling_max: int | None = ..., + instance_type: str | None = ..., + strategy: str | None = ..., + envs: t.List[dict[str, t.Any]] | None = ..., + extras: dict[str, t.Any] | None = ..., +) -> Deployment: + ... + + +@t.overload +def update( + name: str, + path_context: str | None = ..., + context: str | None = ..., + cluster_name: str | None = None, + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: str | None = ..., + config_file: str | None = ..., +) -> Deployment: + ... + + +@t.overload +def update( + name: str, + path_context: str | None = ..., + context: str | None = ..., + cluster_name: str | None = None, + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + bento: Tag | str | None = ..., + config_file: str | None = ..., +) -> Deployment: + ... + + +@t.overload +def update( + name: str, + path_context: str | None = ..., + context: str | None = ..., + cluster_name: str | None = None, + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: str | None = ..., + config_dct: dict[str, t.Any] | None = ..., +) -> Deployment: + ... + + +@t.overload +def update( + name: str, + path_context: str | None = ..., + context: str | None = ..., + cluster_name: str | None = None, + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + bento: Tag | str | None = ..., + config_dct: dict[str, t.Any] | None = ..., +) -> Deployment: + ... + + +@inject +def update( + name: str, + path_context: str | None = None, + context: str | None = None, + cluster_name: str | None = None, + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: str | None = None, + bento: Tag | str | None = None, + access_type: str | None = None, + scaling_min: int | None = None, + scaling_max: int | None = None, + instance_type: str | None = None, + strategy: str | None = None, + envs: t.List[dict[str, t.Any]] | None = None, + extras: dict[str, t.Any] | None = None, + config_dct: dict[str, t.Any] | None = None, + config_file: str | None = None, +) -> Deployment: + deploy_by_param = ( + access_type + or scaling_min + or scaling_max + or instance_type + or strategy + or envs + or extras + ) + if ( + config_dct + and config_file + or config_dct + and deploy_by_param + or config_file + and deploy_by_param + ): + raise BentoMLException( + "Configure a deployment can only use one of the following: config_dct, config_file, or the other parameters" + ) + if bento and project_path: + raise BentoMLException("Only one of bento or project_path can be provided") + if bento is None and project_path is None: + bento = None + else: + bento = get_real_bento_tag( + project_path=project_path, + bento=bento, + context=context, + _cloud_client=BentoCloudClient(), + ) + + return Deployment.update( + bento=bento, + access_type=access_type, + name=name, + cluster_name=cluster_name, + scaling_min=scaling_min, + scaling_max=scaling_max, + instance_type=instance_type, + strategy=strategy, + envs=envs, + extras=extras, + config_dct=config_dct, + config_file=config_file, + path_context=path_context, + context=context, + ) + + +@t.overload +def apply( + name: str, + cluster_name: t.Optional[str] = ..., + path_context: t.Optional[str] = ..., + context: t.Optional[str] = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: t.Optional[str] = ..., + config_dct: t.Optional[dict[str, t.Any]] = ..., +) -> Deployment: + ... + + +@t.overload +def apply( + name: str, + cluster_name: t.Optional[str] = ..., + path_context: t.Optional[str] = ..., + context: t.Optional[str] = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + bento: t.Optional[t.Union[Tag, str]] = ..., + config_dct: t.Optional[dict[str, t.Any]] = ..., +) -> Deployment: + ... + + +@t.overload +def apply( + name: str, + cluster_name: t.Optional[str] = ..., + path_context: t.Optional[str] = ..., + context: t.Optional[str] = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: t.Optional[str] = ..., + config_file: t.Optional[str] = ..., +) -> Deployment: + ... + + +@t.overload +def apply( + name: str, + cluster_name: t.Optional[str] = ..., + path_context: t.Optional[str] = ..., + context: t.Optional[str] = ..., + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + bento: t.Optional[t.Union[Tag, str]] = ..., + config_file: t.Optional[str] = ..., +) -> Deployment: + ... + + +@inject +def apply( + name: str, + cluster_name: str | None = None, + path_context: str | None = None, + context: str | None = None, + _bento_store: BentoStore = Provide[BentoMLContainer.bento_store], + *, + project_path: str | None = None, + bento: Tag | str | None = None, + config_dct: dict[str, t.Any] | None = None, + config_file: str | None = None, +) -> Deployment: + if config_dct and config_file: + raise BentoMLException( + "Configure a deployment can only use one of the following: config_dct, config_file" + ) + if bento and project_path: + raise BentoMLException("Only one of bento or project_path can be provided") + if bento is None and project_path is None: + bento = None + else: + bento = get_real_bento_tag( + project_path=project_path, + bento=bento, + context=context, + _cloud_client=BentoCloudClient(), + ) + + return Deployment.apply( + name=name, + bento=bento, + cluster_name=cluster_name, + context=context, + path_context=path_context, + config_dct=config_dct, + config_file=config_file, + ) + + +def get( + name: str, + context: str | None = None, + cluster_name: str | None = None, +) -> Deployment: + return Deployment.get( + name=name, + context=context, + cluster_name=cluster_name, + ) + + +def terminate( + name: str, + context: str | None = None, + cluster_name: str | None = None, +) -> Deployment: + return Deployment.terminate( + name=name, + context=context, + cluster_name=cluster_name, + ) + + +def delete( + name: str, + context: str | None = None, + cluster_name: str | None = None, +) -> None: + Deployment.delete( + name=name, + context=context, + cluster_name=cluster_name, + ) + + +def list( + context: str | None = None, + cluster_name: str | None = None, + search: str | None = None, +) -> t.List[Deployment]: + return Deployment.list(context=context, cluster_name=cluster_name, search=search) + + +__all__ = ["create", "get", "update", "apply", "terminate", "delete", "list"] diff --git a/src/bentoml_cli/cloud.py b/src/bentoml_cli/cloud.py index 2155efcbf6f..8f11b030e3b 100644 --- a/src/bentoml_cli/cloud.py +++ b/src/bentoml_cli/cloud.py @@ -47,12 +47,12 @@ def cloud(): def login(shared_options: SharedOptions, endpoint: str, api_token: str) -> None: # type: ignore (not accessed) """Login to BentoCloud or Yatai server.""" cloud_rest_client = RestApiClient(endpoint, api_token) - user = cloud_rest_client.get_current_user() + user = cloud_rest_client.v1.get_current_user() if user is None: raise CLIException("current user is not found") - org = cloud_rest_client.get_current_organization() + org = cloud_rest_client.v1.get_current_organization() if org is None: raise CLIException("current organization is not found") diff --git a/src/bentoml_cli/deployment.py b/src/bentoml_cli/deployment.py index d2363edf2ac..226cc7c0b89 100644 --- a/src/bentoml_cli/deployment.py +++ b/src/bentoml_cli/deployment.py @@ -3,11 +3,14 @@ import typing as t import click +import yaml +from rich.syntax import Syntax + +from bentoml._internal.cloud.schemas.modelschemas import AccessControl +from bentoml._internal.cloud.schemas.modelschemas import DeploymentStrategy if t.TYPE_CHECKING: TupleStrAny = tuple[str, ...] - from bentoml._internal.cloud.schemas import DeploymentListSchema - from bentoml._internal.cloud.schemas import DeploymentSchema from .utils import SharedOptions else: @@ -20,17 +23,145 @@ def add_deployment_command(cli: click.Group) -> None: import click_option_group as cog from rich.table import Table - from bentoml._internal.configuration.containers import BentoMLContainer - from bentoml._internal.utils import bentoml_cattr + from bentoml._internal.cloud.deployment import Deployment + from bentoml._internal.cloud.deployment import get_real_bento_tag from bentoml._internal.utils import rich_console as console from bentoml_cli.utils import BentoMLCommandGroup - client = BentoMLContainer.bentocloud_client.get() + @cli.command() + @click.argument( + "target", + type=click.STRING, + required=True, + ) + @click.option( + "-n", + "--name", + type=click.STRING, + help="Deployment name", + ) + @click.option( + "--cluster", + type=click.STRING, + help="Name of the cluster", + ) + @click.option( + "--access-type", + type=click.Choice( + [access_ctrl_type.value for access_ctrl_type in AccessControl] + ), + help="Type of access", + ) + @click.option( + "--scaling-min", + type=click.INT, + help="Minimum scaling value", + ) + @click.option( + "--scaling-max", + type=click.INT, + help="Maximum scaling value", + ) + @click.option( + "--instance-type", + type=click.STRING, + help="Type of instance", + ) + @click.option( + "--strategy", + type=click.Choice( + [deployment_strategy.value for deployment_strategy in DeploymentStrategy] + ), + help="Deployment strategy", + ) + @click.option( + "--env", + type=click.STRING, + help="List of environment variables pass by --env key=value, --env ...", + multiple=True, + ) + @click.option( + "--config-file", + type=click.File(), + help="Configuration file path", + default=None, + ) + @click.option( + "--wait/--no-wait", + type=click.BOOL, + is_flag=True, + help="Do not wait for deployment to be ready", + default=True, + ) + @click.option( + "--timeout", + type=click.INT, + default=300, + help="Timeout for deployment to be ready in seconds", + ) + @click.pass_obj + def deploy( + shared_options: SharedOptions, + target: str, + name: str | None, + cluster: str | None, + access_type: str | None, + scaling_min: int | None, + scaling_max: int | None, + instance_type: str | None, + strategy: str | None, + env: tuple[str] | None, + config_file: t.TextIO | None, + wait: bool, + timeout: int, + ) -> None: + """Create a deployment on BentoCloud. + + \b + Create a deployment using parameters (standalone mode only), or using config yaml file. + """ + from os import path + + # determine if target is a path or a name + if path.exists(target): + # target is a path + click.echo(f"building bento from {target} ...") + bento_tag = get_real_bento_tag(project_path=target) + else: + click.echo(f"using bento {target}...") + bento_tag = get_real_bento_tag(bento=target) + + deployment = Deployment.create( + bento=bento_tag, + name=name, + cluster_name=cluster, + access_type=access_type, + scaling_min=scaling_min, + scaling_max=scaling_max, + instance_type=instance_type, + strategy=strategy, + envs=[ + {"key": item.split("=")[0], "value": item.split("=")[1]} for item in env + ] + if env is not None + else None, + config_file=config_file, + context=shared_options.cloud_context, + ) + if wait: + deployment.wait_until_ready(timeout=timeout) + click.echo( + f"Deployment '{deployment.name}' created successfully in cluster '{deployment.cluster_name}'." + ) + click.echo( + f"To check the deployment, go to: {deployment.get_bento_cloud_url()}." + ) + output_option = click.option( "-o", "--output", - type=click.Choice(["json", "default"]), - default="default", + type=click.Choice(["yaml", "json"]), + default="yaml", help="Display the output of this command.", ) @@ -42,26 +173,16 @@ def shared_decorator( def decorate(f: t.Callable[..., t.Any]) -> t.Callable[..., t.Any]: options = [ click.argument( - "deployment-name", + "name", type=click.STRING, required=required_deployment_name, ), - cog.optgroup.group( - cls=cog.AllOptionGroup, name="cluster and kube namespace options" - ), - cog.optgroup.option( - "--cluster-name", + click.option( + "--cluster", type=click.STRING, default=None, help="Name of the cluster.", ), - cog.optgroup.option( - "--kube-namespace", - type=click.STRING, - default=None, - help="Kubernetes namespace.", - ), - output_option, ] for opt in reversed(options): f = opt(f) @@ -77,238 +198,201 @@ def deployment_cli(): """Deployment Subcommands Groups""" @deployment_cli.command() + @shared_decorator(required_deployment_name=True) + @cog.optgroup.group(cls=cog.MutuallyExclusiveOptionGroup, name="target options") + @cog.optgroup.option( + "--bento", + type=click.STRING, + help="Bento name", + ) + @cog.optgroup.option( + "--project-path", + type=click.Path(exists=True), + help="Path to the project", + ) @click.option( - "-f", - "--file", - type=click.File(), - help="JSON file path for the deployment configuration", + "--access-type", + type=click.Choice( + [access_ctrl_type.value for access_ctrl_type in AccessControl] + ), + help="Type of access", ) - @output_option - @click.pass_obj - def create( # type: ignore - shared_options: SharedOptions, - file: str, - output: t.Literal["json", "default"], - ) -> DeploymentSchema: - """Create a deployment on BentoCloud. - - \b - A deployment can be created using a json file with configurations. - The json file has the exact format as the one on BentoCloud Deployment UI. - """ - res = client.deployment.create_from_file( - path_or_stream=file, context=shared_options.cloud_context - ) - if output == "default": - console.print(res) - elif output == "json": - click.echo(json.dumps(bentoml_cattr.unstructure(res), indent=2)) - return res - - @deployment_cli.command() - @shared_decorator(required_deployment_name=False) @click.option( - "-f", - "--file", - type=click.File(), - help="JSON file path for the deployment configuration", + "--scaling-min", + type=click.INT, + help="Minimum scaling value", + ) + @click.option( + "--scaling-max", + type=click.INT, + help="Maximum scaling value", + ) + @click.option( + "--instance-type", + type=click.STRING, + help="Type of instance", + ) + @click.option( + "--strategy", + type=click.Choice( + [deployment_strategy.value for deployment_strategy in DeploymentStrategy] + ), + help="Deployment strategy", ) @click.option( - "-n", "--name", type=click.STRING, help="Deployment name (deprecated)" + "--env", + type=click.STRING, + help="List of environment variables pass by --env key=value, --env ...", + multiple=True, + ) + @click.option( + "--config-file", + type=click.File(), + help="Configuration file path, mututally exclusive with other config options", + default=None, ) - @click.option("--bento", type=click.STRING, help="Bento tag") @click.pass_obj def update( # type: ignore shared_options: SharedOptions, - deployment_name: str | None, - file: str | None, - name: str | None, + name: str, + cluster: str | None, + project_path: str | None, bento: str | None, - cluster_name: str | None, - kube_namespace: str | None, - output: t.Literal["json", "default"], - ) -> DeploymentSchema: + access_type: str | None, + scaling_min: int | None, + scaling_max: int | None, + instance_type: str | None, + strategy: str | None, + env: tuple[str] | None, + config_file: t.TextIO | None, + ) -> None: """Update a deployment on BentoCloud. \b - A deployment can be updated using a json file with needed configurations. - The json file has the exact format as the one on BentoCloud Deployment UI. + A deployment can be updated using parameters (standalone mode only), or using config yaml file. + You can also update bento by providing a project path or existing bento. """ - if name is not None: - click.echo( - "--name is deprecated, pass DEPLOYMENT_NAME as an argument instead, e.g., bentoml update deploy-name" - ) - if file is not None: - if name is not None: - click.echo("Reading from file, ignoring --name", err=True) - elif deployment_name is not None: - click.echo( - "Reading from file, ignoring argument DEPLOYMENT_NAME", err=True - ) - res = client.deployment.update_from_file( - path_or_stream=file, context=shared_options.cloud_context - ) - elif name is not None: - res = client.deployment.update( - name, - bento=bento, - context=shared_options.cloud_context, - latest_bento=True, - cluster_name=cluster_name, - kube_namespace=kube_namespace, - ) - elif deployment_name is not None: - res = client.deployment.update( - deployment_name, + if bento is None and project_path is None: + target = None + else: + target = get_real_bento_tag( + project_path=project_path, bento=bento, context=shared_options.cloud_context, - latest_bento=True, - cluster_name=cluster_name, - kube_namespace=kube_namespace, ) - else: - raise click.BadArgumentUsage( - "Either --file or argument DEPLOYMENT_NAME is required for update command" - ) - if output == "default": - console.print(res) - elif output == "json": - unstructured = bentoml_cattr.unstructure(res) - click.echo(json.dumps(unstructured, indent=2)) - return res + + Deployment.update( + bento=target, + access_type=access_type, + name=name, + cluster_name=cluster, + scaling_min=scaling_min, + scaling_max=scaling_max, + instance_type=instance_type, + strategy=strategy, + envs=[ + {"key": item.split("=")[0], "value": item.split("=")[1]} for item in env + ] + if env is not None + else None, + config_file=config_file, + context=shared_options.cloud_context, + ) + + click.echo(f"Deployment '{name}' updated successfully.") @deployment_cli.command() @shared_decorator + @output_option @click.pass_obj def get( # type: ignore shared_options: SharedOptions, - deployment_name: str, - cluster_name: str | None, - kube_namespace: str | None, + name: str, + cluster: str | None, output: t.Literal["json", "default"], - ) -> DeploymentSchema: + ) -> None: """Get a deployment on BentoCloud.""" - res = client.deployment.get( - deployment_name=deployment_name, - context=shared_options.cloud_context, - cluster_name=cluster_name, - kube_namespace=kube_namespace, + d = Deployment.get( + name, context=shared_options.cloud_context, cluster_name=cluster ) - if output == "default": - console.print(res) - elif output == "json": - unstructured = bentoml_cattr.unstructure(res) - click.echo(json.dumps(unstructured, indent=2)) - return res + if output == "json": + info = json.dumps(d.info.to_dict(), indent=2, default=str) + console.print_json(info) + else: + info = yaml.dump(d.info.to_dict(), indent=2, sort_keys=False) + console.print(Syntax(info, "yaml", background_color="default")) @deployment_cli.command() @shared_decorator @click.pass_obj def terminate( # type: ignore shared_options: SharedOptions, - deployment_name: str, - cluster_name: str | None, - kube_namespace: str | None, - output: t.Literal["json", "default"], - ) -> DeploymentSchema: + name: str, + cluster: str | None, + ) -> None: """Terminate a deployment on BentoCloud.""" - res = client.deployment.terminate( - deployment_name=deployment_name, - context=shared_options.cloud_context, - cluster_name=cluster_name, - kube_namespace=kube_namespace, + Deployment.terminate( + name, context=shared_options.cloud_context, cluster_name=cluster ) - if output == "default": - console.print(res) - elif output == "json": - unstructured = bentoml_cattr.unstructure(res) - click.echo(json.dumps(unstructured, indent=2)) - return res + click.echo(f"Deployment '{name}' terminated successfully.") @deployment_cli.command() @shared_decorator @click.pass_obj def delete( # type: ignore shared_options: SharedOptions, - deployment_name: str, - cluster_name: str | None, - kube_namespace: str | None, - output: t.Literal["json", "default"], - ) -> DeploymentSchema: + name: str, + cluster: str | None, + ) -> None: """Delete a deployment on BentoCloud.""" - res = client.deployment.delete( - deployment_name=deployment_name, - context=shared_options.cloud_context, - cluster_name=cluster_name, - kube_namespace=kube_namespace, + Deployment.delete( + name, context=shared_options.cloud_context, cluster_name=cluster ) - if output == "default": - console.print(res) - elif output == "json": - unstructured = bentoml_cattr.unstructure(res) - click.echo(json.dumps(unstructured, indent=2)) - return res + click.echo(f"Deployment '{name}' deleted successfully.") @deployment_cli.command() @click.option( - "--cluster-name", type=click.STRING, default=None, help="Name of the cluster." - ) - @click.option( - "--query", type=click.STRING, default=None, help="Query for list request." + "--cluster", type=click.STRING, default=None, help="Name of the cluster." ) @click.option( "--search", type=click.STRING, default=None, help="Search for list request." ) - @click.option( - "--start", type=click.STRING, default=None, help="Start for list request." - ) - @click.option( - "--count", type=click.STRING, default=None, help="Count for list request." - ) @click.option( "-o", "--output", help="Display the output of this command.", - type=click.Choice(["json", "default", "table"]), + type=click.Choice(["json", "yaml", "table"]), default="table", ) @click.pass_obj def list( # type: ignore shared_options: SharedOptions, - cluster_name: str | None, - query: str | None, + cluster: str | None, search: str | None, - count: int | None, - start: int | None, - output: t.Literal["json", "default", "table"], - ) -> DeploymentListSchema: + output: t.Literal["json", "yaml", "table"], + ) -> None: """List existing deployments on BentoCloud.""" - res = client.deployment.list( - context=shared_options.cloud_context, - cluster_name=cluster_name, - query=query, - search=search, - count=count, - start=start, + d_list = Deployment.list( + context=shared_options.cloud_context, cluster_name=cluster, search=search ) + res: list[dict[str, t.Any]] = [d.info.to_dict() for d in d_list] if output == "table": table = Table(box=None) table.add_column("Deployment") + table.add_column("created_at") table.add_column("Bento") table.add_column("Status") - table.add_column("Created At") - for deployment in res.items: - target = deployment.latest_revision.targets[0] + for info in res: table.add_row( - deployment.name, - f"{target.bento.repository.name}:{target.bento.name}", - deployment.status.value, - deployment.created_at.astimezone().strftime("%Y-%m-%d %H:%M:%S"), + info["name"], + info["created_at"], + info["bento"], + info["status"], ) console.print(table) - elif output == "default": - console.print(res) elif output == "json": - unstructured = bentoml_cattr.unstructure(res) - click.echo(json.dumps(unstructured, indent=2)) - return res + info = json.dumps(res, indent=2, default=str) + console.print_json(info) + else: + info = yaml.dump(res, indent=2, sort_keys=False) + console.print(Syntax(info, "yaml", background_color="default")) diff --git a/tests/unit/_internal/cloud/test_deployment.py b/tests/unit/_internal/cloud/test_deployment.py index 2fb4ebd3c5d..4bd5fd96259 100644 --- a/tests/unit/_internal/cloud/test_deployment.py +++ b/tests/unit/_internal/cloud/test_deployment.py @@ -7,825 +7,534 @@ import attr import pytest -from bentoml._internal.cloud.schemas import BentoFullSchema -from bentoml._internal.cloud.schemas import BentoImageBuildStatus -from bentoml._internal.cloud.schemas import BentoManifestSchema -from bentoml._internal.cloud.schemas import BentoRepositorySchema -from bentoml._internal.cloud.schemas import BentoUploadStatus -from bentoml._internal.cloud.schemas import ClusterSchema -from bentoml._internal.cloud.schemas import CreateDeploymentSchema -from bentoml._internal.cloud.schemas import CreateDeploymentTargetSchema -from bentoml._internal.cloud.schemas import DeploymentMode -from bentoml._internal.cloud.schemas import DeploymentRevisionSchema -from bentoml._internal.cloud.schemas import DeploymentRevisionStatus -from bentoml._internal.cloud.schemas import DeploymentSchema -from bentoml._internal.cloud.schemas import DeploymentStatus -from bentoml._internal.cloud.schemas import DeploymentTargetCanaryRule -from bentoml._internal.cloud.schemas import DeploymentTargetCanaryRuleType -from bentoml._internal.cloud.schemas import DeploymentTargetConfig -from bentoml._internal.cloud.schemas import DeploymentTargetHPAConf -from bentoml._internal.cloud.schemas import DeploymentTargetRunnerConfig -from bentoml._internal.cloud.schemas import DeploymentTargetSchema -from bentoml._internal.cloud.schemas import DeploymentTargetType -from bentoml._internal.cloud.schemas import LabelItemSchema -from bentoml._internal.cloud.schemas import ResourceType -from bentoml._internal.cloud.schemas import UpdateDeploymentSchema -from bentoml._internal.cloud.schemas import UserSchema -from bentoml.cloud import BentoCloudClient -from bentoml.cloud import Resource +from bentoml._internal.cloud.client import RestApiClient +from bentoml._internal.cloud.deployment import Deployment +from bentoml._internal.cloud.schemas.modelschemas import AccessControl +from bentoml._internal.cloud.schemas.modelschemas import DeploymentServiceConfig +from bentoml._internal.cloud.schemas.modelschemas import DeploymentStrategy +from bentoml._internal.cloud.schemas.modelschemas import DeploymentTargetHPAConf +from bentoml._internal.cloud.schemas.schemasv1 import BentoFullSchema +from bentoml._internal.cloud.schemas.schemasv1 import BentoImageBuildStatus +from bentoml._internal.cloud.schemas.schemasv1 import BentoManifestSchema +from bentoml._internal.cloud.schemas.schemasv1 import BentoRepositorySchema +from bentoml._internal.cloud.schemas.schemasv1 import BentoUploadStatus +from bentoml._internal.cloud.schemas.schemasv1 import ClusterListSchema +from bentoml._internal.cloud.schemas.schemasv1 import ClusterSchema +from bentoml._internal.cloud.schemas.schemasv1 import DeploymentRevisionStatus +from bentoml._internal.cloud.schemas.schemasv1 import DeploymentStatus +from bentoml._internal.cloud.schemas.schemasv1 import LabelItemSchema +from bentoml._internal.cloud.schemas.schemasv1 import ResourceType +from bentoml._internal.cloud.schemas.schemasv1 import UserSchema +from bentoml._internal.cloud.schemas.schemasv2 import ( + CreateDeploymentSchema as CreateDeploymentSchemaV2, +) +from bentoml._internal.cloud.schemas.schemasv2 import ( + DeploymentFullSchema as DeploymentFullSchemaV2, +) +from bentoml._internal.cloud.schemas.schemasv2 import ( + DeploymentRevisionSchema as DeploymentRevisionSchemaV2, +) +from bentoml._internal.cloud.schemas.schemasv2 import ( + DeploymentTargetConfig as DeploymentTargetConfigV2, +) +from bentoml._internal.cloud.schemas.schemasv2 import ( + DeploymentTargetSchema as DeploymentTargetSchemaV2, +) +from bentoml._internal.cloud.schemas.schemasv2 import ( + UpdateDeploymentSchema as UpdateDeploymentSchemaV2, +) if t.TYPE_CHECKING: from unittest.mock import MagicMock -def f_create( - create_deployment_schema: CreateDeploymentSchema, - context: str | None = None, - cluster_name: str | None = None, -): - return create_deployment_schema - - -def f_update( - deployment_name: str, - update_deployment_schema: UpdateDeploymentSchema, - kube_namespace: str | None = None, - context: str | None = None, - cluster_name: str | None = None, -): - return update_deployment_schema - - -@pytest.fixture(name="get_schema", scope="function") -def fixture_get_schema() -> DeploymentSchema: - user = UserSchema(name="", email="", first_name="", last_name="") - return DeploymentSchema( - latest_revision=DeploymentRevisionSchema( - targets=[ - DeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento=BentoFullSchema( - uid="", - created_at=datetime(2023, 5, 25), - updated_at=None, - deleted_at=None, - name="12345", - resource_type=ResourceType.BENTO, - labels=[], - description="", - version="", - image_build_status=BentoImageBuildStatus.PENDING, - upload_status=BentoUploadStatus.SUCCESS, - upload_finished_reason="", - presigned_upload_url="", - presigned_download_url="", - manifest=BentoManifestSchema( - name="", - service="", - bentoml_version="", - size_bytes=0, - apis={}, - models=["iris_clf:ddaex6h2vw6kwcvj"], - ), - build_at=datetime(2023, 5, 25), - repository=BentoRepositorySchema( +@attr.define +class DummyUpdateSchema(UpdateDeploymentSchemaV2): + urls: t.List[str] = attr.Factory( + list + ) # place holder for urls that's assigned to deployment._urls + + +@pytest.fixture(name="rest_client", scope="function") +def fixture_rest_client() -> RestApiClient: + def dummy_create_deployment( + create_schema: CreateDeploymentSchemaV2, cluster_name: str + ): + return create_schema + + def dummy_update_deployment( + update_schema: UpdateDeploymentSchemaV2, cluster_name: str, deployment_name: str + ): + from bentoml._internal.utils import bentoml_cattr + + return bentoml_cattr.structure(attr.asdict(update_schema), DummyUpdateSchema) + + def dummy_get_deployment(cluster_name: str, deployment_name: str): + if deployment_name == "test-distributed": + return DeploymentFullSchemaV2( + distributed=True, + latest_revision=DeploymentRevisionSchemaV2( + targets=[ + DeploymentTargetSchemaV2( + bento=BentoFullSchema( + uid="", + created_at=datetime(2023, 5, 25), + updated_at=None, + deleted_at=None, + name="123", + resource_type=ResourceType.BENTO, + labels=[], + description="", + version="", + image_build_status=BentoImageBuildStatus.PENDING, + upload_status=BentoUploadStatus.SUCCESS, + upload_finished_reason="", + presigned_upload_url="", + presigned_download_url="", + manifest=BentoManifestSchema( + service="", + bentoml_version="", + size_bytes=0, + apis={}, + models=["iris_clf:ddaex6h2vw6kwcvj"], + ), + build_at=datetime(2023, 5, 25), + repository=BentoRepositorySchema( + uid="", + created_at="", + updated_at=None, + deleted_at=None, + name="abc", + resource_type=ResourceType.BENTO_REPOSITORY, + labels=[], + description="", + latest_bento="", + ), + ), + config=DeploymentTargetConfigV2( + access_type=AccessControl.PUBLIC, + envs=[ + LabelItemSchema(key="env_key", value="env_value") + ], + services={ + "irisclassifier": DeploymentServiceConfig( + instance_type="t3-small", + scaling=DeploymentTargetHPAConf( + min_replicas=1, max_replicas=1 + ), + deployment_strategy=DeploymentStrategy.RollingUpdate, + ), + "preprocessing": DeploymentServiceConfig( + instance_type="t3-small", + scaling=DeploymentTargetHPAConf( + min_replicas=1, max_replicas=1 + ), + deployment_strategy=DeploymentStrategy.RollingUpdate, + ), + }, + ), uid="", - created_at="", + created_at=datetime(2023, 5, 1), updated_at=None, deleted_at=None, - name="iris_classifier", - resource_type=ResourceType.BENTO_REPOSITORY, + name="", + resource_type=ResourceType.DEPLOYMENT_REVISION, labels=[], - description="", - latest_bento="", - ), - ), - config=DeploymentTargetConfig( - resource_instance="t3-micro", - enable_ingress=True, - hpa_conf=DeploymentTargetHPAConf( - min_replicas=2, max_replicas=10 - ), - runners={ - "runner1": DeploymentTargetRunnerConfig( - resource_instance="t3-small", - hpa_conf=DeploymentTargetHPAConf( - min_replicas=3, max_replicas=10 + creator=user, + ) + ], + uid="", + created_at=datetime(2023, 5, 1), + updated_at=None, + deleted_at=None, + name="test=xxx", + resource_type=ResourceType.DEPLOYMENT_REVISION, + labels=[], + creator=user, + status=DeploymentRevisionStatus.ACTIVE, + ), + uid="", + created_at=datetime(2023, 5, 1), + updated_at=None, + deleted_at=None, + name="test=xxx", + resource_type=ResourceType.DEPLOYMENT_REVISION, + labels=[], + creator=user, + status=DeploymentStatus.Running, + cluster=ClusterSchema( + uid="", + name="default", + resource_type=ResourceType.CLUSTER, + labels=[], + description="", + creator=user, + created_at=datetime(2023, 5, 1), + updated_at=None, + deleted_at=None, + ), + kube_namespace="", + ) + + else: + return DeploymentFullSchemaV2( + distributed=False, + latest_revision=DeploymentRevisionSchemaV2( + targets=[ + DeploymentTargetSchemaV2( + bento=BentoFullSchema( + uid="", + created_at=datetime(2023, 5, 25), + updated_at=None, + deleted_at=None, + name="123", + resource_type=ResourceType.BENTO, + labels=[], + description="", + version="", + image_build_status=BentoImageBuildStatus.PENDING, + upload_status=BentoUploadStatus.SUCCESS, + upload_finished_reason="", + presigned_upload_url="", + presigned_download_url="", + manifest=BentoManifestSchema( + service="", + bentoml_version="", + size_bytes=0, + apis={}, + models=["iris_clf:ddaex6h2vw6kwcvj"], + ), + build_at=datetime(2023, 5, 25), + repository=BentoRepositorySchema( + uid="", + created_at="", + updated_at=None, + deleted_at=None, + name="abc", + resource_type=ResourceType.BENTO_REPOSITORY, + labels=[], + description="", + latest_bento="", ), ), - "runner2": DeploymentTargetRunnerConfig( - resource_instance="t3-medium", - hpa_conf=DeploymentTargetHPAConf( - min_replicas=5, max_replicas=10 + config=DeploymentTargetConfigV2( + access_type=AccessControl.PUBLIC, + scaling=DeploymentTargetHPAConf( + min_replicas=3, max_replicas=5 ), + deployment_strategy=DeploymentStrategy.RollingUpdate, + envs=[ + LabelItemSchema(key="env_key", value="env_value") + ], ), - }, - ), - canary_rules=[], + uid="", + created_at=datetime(2023, 5, 1), + updated_at=None, + deleted_at=None, + name="", + resource_type=ResourceType.DEPLOYMENT_REVISION, + labels=[], + creator=user, + ) + ], uid="", created_at=datetime(2023, 5, 1), updated_at=None, deleted_at=None, - name="", + name="test=xxx", resource_type=ResourceType.DEPLOYMENT_REVISION, labels=[], creator=user, - ) - ], - uid="", - created_at=datetime(2023, 5, 1), - updated_at=None, - deleted_at=None, - name="test=xxx", - resource_type=ResourceType.DEPLOYMENT_REVISION, - labels=[], - creator=user, - status=DeploymentRevisionStatus.ACTIVE, - ), - uid="", - created_at=datetime(2023, 5, 1), - updated_at=None, - deleted_at=None, - name="test=xxx", - resource_type=ResourceType.DEPLOYMENT_REVISION, - labels=[], - creator=user, - status=DeploymentStatus.Running, - cluster=ClusterSchema( - uid="", - name="default", - resource_type=ResourceType.CLUSTER, - labels=[], - description="", - creator=user, - created_at=datetime(2023, 5, 1), - updated_at=None, - deleted_at=None, - ), - kube_namespace="", - ) - - -@pytest.fixture(scope="function", name="cloudclient") -def fixture_cloudclient() -> BentoCloudClient: - return BentoCloudClient() - - -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient -): - mock_create_deployment.side_effect = f_create - - res = cloudclient.deployment.create( - deployment_name="test-xxx", bento="iris_classifier:dqjxjyx2vweogcvj" - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig(), - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", - ) - - -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_canary_rules( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient -): - mock_create_deployment.side_effect = f_create - rules = [ - DeploymentTargetCanaryRule(DeploymentTargetCanaryRuleType.WEIGHT, 3, "", "", "") - ] - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - canary_rules=rules, - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig(), - canary_rules=rules, - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", - ) - - -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_labels( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient -): - mock_create_deployment.side_effect = f_create - - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - labels={"user": "steve"}, - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig(), + status=DeploymentRevisionStatus.ACTIVE, + ), + uid="", + created_at=datetime(2023, 5, 1), + updated_at=None, + deleted_at=None, + name="test=xxx", + resource_type=ResourceType.DEPLOYMENT_REVISION, + labels=[], + creator=user, + status=DeploymentStatus.Running, + cluster=ClusterSchema( + uid="", + name="default", + resource_type=ResourceType.CLUSTER, + labels=[], + description="", + creator=user, + created_at=datetime(2023, 5, 1), + updated_at=None, + deleted_at=None, + ), + kube_namespace="", ) - ], - mode=DeploymentMode.Function, - name="test-xxx", - labels=[LabelItemSchema("user", "steve")], - ) - -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_resource_instance( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient -): - mock_create_deployment.side_effect = f_create - - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - resource_instance="test-instance", - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig(resource_instance="test-instance"), + client = RestApiClient("", "") + user = UserSchema(name="", email="", first_name="", last_name="") + client.v2.create_deployment = dummy_create_deployment # type: ignore + client.v2.update_deployment = dummy_update_deployment # type: ignore + client.v1.get_cluster_list = lambda params: ClusterListSchema( + start=0, + count=0, + total=0, + items=[ + ClusterSchema( + uid="", + name="default", + resource_type=ResourceType.CLUSTER, + labels=[], + description="", + creator=user, + created_at=datetime(2023, 5, 1), + updated_at=None, + deleted_at=None, ) ], - mode=DeploymentMode.Function, - name="test-xxx", - ) + ) # type: ignore + client.v2.get_deployment = dummy_get_deployment -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_resource_instance_runner( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient -): - mock_create_deployment.side_effect = f_create - runner = Resource.for_runner(enable_debug_mode=True) - - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - resource_instance="test-instance", - runners_config={"runner": runner}, - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig( - resource_instance="test-instance", - runners={ - "runner": DeploymentTargetRunnerConfig( - resource_instance="test-instance", enable_debug_mode=True - ) - }, - ), - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", - ) + return client -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_resource_instance_api_server( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient -): - mock_create_deployment.side_effect = f_create - api_server = Resource.for_api_server(enable_ingress=True) - - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - resource_instance="test-resource", - api_server_config=api_server, - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig( - resource_instance="test-resource", enable_ingress=True - ), - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_create_deployment(mock_get_client: MagicMock, rest_client: RestApiClient): + mock_get_client.return_value = rest_client + deployment = Deployment.create(bento="abc:123") + # assert expected schema + assert deployment._schema == CreateDeploymentSchemaV2( + scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=1), + bento="abc:123", + name="", + cluster="default", + access_type=AccessControl.PUBLIC, + distributed=False, ) -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_api_server( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_create_deployment_custom_standalone( + mock_get_client: MagicMock, rest_client: RestApiClient ): - mock_create_deployment.side_effect = f_create - api_server_conf = Resource.for_api_server(resource_instance="t3-micro") - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - api_server_config=api_server_conf, - ) - - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig(resource_instance="t3-micro"), - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", - ) - - -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_hpa_conf( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient + mock_get_client.return_value = rest_client + deployment = Deployment.create( + bento="abc:123", + name="custom-name", + scaling_min=2, + scaling_max=4, + access_type="private", + cluster_name="custom-cluster", + envs=[{"key": "env_key", "value": "env_value"}], + strategy="RollingUpdate", + ) + # assert expected schema + assert deployment._schema == CreateDeploymentSchemaV2( + bento="abc:123", + name="custom-name", + cluster="custom-cluster", + access_type=AccessControl.PRIVATE, + scaling=DeploymentTargetHPAConf(min_replicas=2, max_replicas=4), + distributed=False, + deployment_strategy=DeploymentStrategy.RollingUpdate, + envs=[LabelItemSchema(key="env_key", value="env_value")], + ) + + +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_create_deployment_scailing_only_min( + mock_get_client: MagicMock, rest_client: RestApiClient ): - mock_create_deployment.side_effect = f_create - hpa_conf = Resource.for_hpa_conf(min_replicas=2, max_replicas=10) - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - hpa_conf=hpa_conf, - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig(hpa_conf=hpa_conf), - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", - ) - - -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_runner( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient + mock_get_client.return_value = rest_client + deployment = Deployment.create(bento="abc:123", scaling_min=3) + # assert expected schema + assert deployment._schema == CreateDeploymentSchemaV2( + bento="abc:123", + name="", + cluster="default", + access_type=AccessControl.PUBLIC, + scaling=DeploymentTargetHPAConf(min_replicas=3, max_replicas=3), + distributed=False, + ) + + +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_create_deployment_scailing_only_max( + mock_get_client: MagicMock, rest_client: RestApiClient ): - mock_create_deployment.side_effect = f_create - runner = Resource.for_runner(resource_instance="t3-micro", enable_debug_mode=True) - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - runners_config={"runner1": runner}, - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig(runners={"runner1": runner}), - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", - ) - - -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_runner_hpa_conf( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient + mock_get_client.return_value = rest_client + deployment = Deployment.create(bento="abc:123", scaling_max=3) + # assert expected schema + assert deployment._schema == CreateDeploymentSchemaV2( + bento="abc:123", + name="", + cluster="default", + access_type=AccessControl.PUBLIC, + scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=3), + distributed=False, + ) + + +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_create_deployment_scailing_mismatch_min_max( + mock_get_client: MagicMock, rest_client: RestApiClient ): - mock_create_deployment.side_effect = f_create - hpa_conf = Resource.for_hpa_conf(min_replicas=2, max_replicas=10) - runner = Resource.for_runner(resource_instance="t3-micro", enable_debug_mode=True) - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - runners_config={"runner1": runner}, - hpa_conf=hpa_conf, - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig( - hpa_conf=hpa_conf, - runners={ - "runner1": DeploymentTargetRunnerConfig( - resource_instance="t3-micro", - hpa_conf=hpa_conf, - enable_debug_mode=True, - ) - }, - ), - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", - ) - - -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_api_server_runner( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient + mock_get_client.return_value = rest_client + deployment = Deployment.create(bento="abc:123", scaling_min=3, scaling_max=2) + # assert expected schema + assert deployment._schema == CreateDeploymentSchemaV2( + bento="abc:123", + name="", + cluster="default", + access_type=AccessControl.PUBLIC, + scaling=DeploymentTargetHPAConf(min_replicas=2, max_replicas=2), + distributed=False, + ) + + +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_create_deployment_config_dct( + mock_get_client: MagicMock, rest_client: RestApiClient ): - mock_create_deployment.side_effect = f_create - api_server = Resource.for_api_server( - resource_instance="t3-micro", enable_stealing_traffic_debug_mode=True - ) - runner = Resource.for_runner(resource_instance="t3-micro", enable_debug_mode=True) - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - runners_config={"runner1": runner}, - api_server_config=api_server, - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig( - resource_instance="t3-micro", - enable_stealing_traffic_debug_mode=True, - runners={ - "runner1": DeploymentTargetRunnerConfig( - resource_instance="t3-micro", enable_debug_mode=True - ) - }, - ), - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", - ) - - -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_api_server_hpa_conf( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient + mock_get_client.return_value = rest_client + config_dct = { + "services": { + "irisclassifier": {"scaling": {"max_replicas": 2, "min_replicas": 1}}, + "preprocessing": {"scaling": {"max_replicas": 2}}, + }, + "envs": [{"key": "env_key", "value": "env_value"}], + "bentoml_config_overrides": { + "irisclassifier": { + "resources": { + "cpu": "300m", + "memory": "500m", + }, + } + }, + } + deployment = Deployment.create(bento="abc:123", config_dct=config_dct) + # assert expected schema + assert deployment._schema == CreateDeploymentSchemaV2( + bento="abc:123", + name="", + cluster="default", + access_type=AccessControl.PUBLIC, + distributed=True, + services={ + "irisclassifier": DeploymentServiceConfig( + scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=2) + ), + "preprocessing": DeploymentServiceConfig( + scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=2) + ), + }, + envs=[LabelItemSchema(key="env_key", value="env_value")], + bentoml_config_overrides={ + "irisclassifier": { + "resources": { + "cpu": "300m", + "memory": "500m", + }, + } + }, + ) + + +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_update_deployment(mock_get_client: MagicMock, rest_client: RestApiClient): + mock_get_client.return_value = rest_client + deployment = Deployment.update( + name="test", + bento="abc:1234", + access_type="private", + envs=[{"key": "env_key2", "value": "env_value2"}], + strategy="Recreate", + ) + # assert expected schema + assert deployment._schema == DummyUpdateSchema( + bento="abc:1234", + access_type=AccessControl.PRIVATE, + scaling=DeploymentTargetHPAConf(min_replicas=3, max_replicas=5), + deployment_strategy=DeploymentStrategy.Recreate, + envs=[LabelItemSchema(key="env_key2", value="env_value2")], + ) + + +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_update_deployment_scaling_only_min( + mock_get_client: MagicMock, rest_client: RestApiClient ): - mock_create_deployment.side_effect = f_create - api_server = Resource.for_api_server(resource_instance="t3-micro") - hpa_conf = Resource.for_hpa_conf(min_replicas=2, max_replicas=10) - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - api_server_config=api_server, - hpa_conf=hpa_conf, - ) - assert res == CreateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig( - resource_instance="t3-micro", hpa_conf=hpa_conf - ), - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", + mock_get_client.return_value = rest_client + deployment = Deployment.update(name="test", scaling_min=1) + # assert expected schema + assert deployment._schema == DummyUpdateSchema( + bento="abc:123", + access_type=AccessControl.PUBLIC, + scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=5), + deployment_strategy=DeploymentStrategy.RollingUpdate, + envs=[LabelItemSchema(key="env_key", value="env_value")], ) -@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment") -def test_create_deployment_api_server_runner_hpa_conf( - mock_create_deployment: MagicMock, cloudclient: BentoCloudClient +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_update_deployment_scaling_only_max( + mock_get_client: MagicMock, rest_client: RestApiClient ): - mock_create_deployment.side_effect = f_create - api_server = Resource.for_api_server(resource_instance="t3-micro") - runner = Resource.for_runner( - resource_instance="t3-small", hpa_conf={"min_replicas": 3} - ) - runner2 = Resource.for_runner( - resource_instance="t3-medium", hpa_conf={"min_replicas": 5} - ) - hpa_conf = Resource.for_hpa_conf(min_replicas=2, max_replicas=10) - res = cloudclient.deployment.create( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - api_server_config=api_server, - hpa_conf=hpa_conf, - runners_config={"runner1": runner, "runner2": runner2}, - expose_endpoint=True, - labels={"user": "steve"}, - ) - assert res == CreateDeploymentSchema( - labels=[LabelItemSchema("user", "steve")], - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="dqjxjyx2vweogcvj", - config=DeploymentTargetConfig( - resource_instance="t3-micro", - enable_ingress=True, - hpa_conf=hpa_conf, - runners={ - "runner1": DeploymentTargetRunnerConfig( - resource_instance="t3-small", - hpa_conf=DeploymentTargetHPAConf( - min_replicas=3, max_replicas=10 - ), - ), - "runner2": DeploymentTargetRunnerConfig( - resource_instance="t3-medium", - hpa_conf=DeploymentTargetHPAConf( - min_replicas=5, max_replicas=10 - ), - ), - }, - ), - ) - ], - mode=DeploymentMode.Function, - name="test-xxx", - ) - - -@pytest.fixture(name="update_schema", scope="function") -def fixture_update_schema() -> UpdateDeploymentSchema: - return UpdateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="12345", - config=DeploymentTargetConfig( - resource_instance="t3-micro", - enable_ingress=True, - hpa_conf=DeploymentTargetHPAConf(min_replicas=2, max_replicas=10), - runners={ - "runner1": DeploymentTargetRunnerConfig( - resource_instance="t3-small", - hpa_conf=DeploymentTargetHPAConf( - min_replicas=3, max_replicas=10 - ), - ), - "runner2": DeploymentTargetRunnerConfig( - resource_instance="t3-medium", - hpa_conf=DeploymentTargetHPAConf( - min_replicas=5, max_replicas=10 - ), - ), - }, - ), - ) - ], - mode=DeploymentMode.Function, - labels=[], + mock_get_client.return_value = rest_client + deployment = Deployment.update(name="test", scaling_max=3) + # assert expected schema + assert deployment._schema == DummyUpdateSchema( + bento="abc:123", + access_type=AccessControl.PUBLIC, + scaling=DeploymentTargetHPAConf(min_replicas=3, max_replicas=3), + deployment_strategy=DeploymentStrategy.RollingUpdate, + envs=[LabelItemSchema(key="env_key", value="env_value")], ) -@patch("bentoml._internal.cloud.deployment.Deployment.get") -@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment") -def test_update_deployment_bento( - mock_update_deployment: MagicMock, - mock_get: MagicMock, - update_schema: UpdateDeploymentSchema, - get_schema: DeploymentSchema, - cloudclient: BentoCloudClient, -): - mock_update_deployment.side_effect = f_update - mock_get.return_value = get_schema - res = cloudclient.deployment.update( - deployment_name="test-xxx", - bento="iris_classifier:dqjxjyx2vweogcvj", - cluster_name="", - kube_namespace="", - ) - update_schema.targets[0].bento = "dqjxjyx2vweogcvj" - assert res == update_schema - - -@patch("bentoml._internal.cloud.deployment.Deployment.get") -@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment") -def test_update_deployment_runner( - mock_update_deployment: MagicMock, - mock_get: MagicMock, - update_schema: UpdateDeploymentSchema, - get_schema: DeploymentSchema, - cloudclient: BentoCloudClient, -): - mock_update_deployment.side_effect = f_update - mock_get.return_value = get_schema - new_runnner = Resource.for_runner( - resource_instance="new-resource", hpa_conf={"min_replicas": 6} - ) - res = cloudclient.deployment.update( - deployment_name="test-xxx", - cluster_name="", - kube_namespace="", - runners_config={"runner1": new_runnner}, - ) - update_schema.targets[0].config.runners["runner1"].hpa_conf.min_replicas = 6 - update_schema.targets[0].config.runners[ - "runner1" - ].resource_instance = "new-resource" - assert res == update_schema - - -@patch("bentoml._internal.cloud.deployment.Deployment.get") -@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment") -def test_update_deployment_runner_hpa_conf( - mock_update_deployment: MagicMock, - mock_get: MagicMock, - update_schema: UpdateDeploymentSchema, - get_schema: DeploymentSchema, - cloudclient: BentoCloudClient, -): - mock_update_deployment.side_effect = f_update - mock_get.return_value = get_schema - hpa_conf = Resource.for_hpa_conf(min_replicas=5) - new_runnner = Resource.for_runner( - resource_instance="new-resource", hpa_conf={"min_replicas": 7} - ) - res = cloudclient.deployment.update( - deployment_name="test-xxx", - cluster_name="", - kube_namespace="", - runners_config={"runner1": new_runnner}, - hpa_conf=hpa_conf, - ) - update_schema.targets[0].config.hpa_conf.min_replicas = 5 - for k, v in update_schema.targets[0].config.runners.items(): - if k == "runner1": - v.hpa_conf.min_replicas = 7 - v.resource_instance = "new-resource" - else: - v.hpa_conf.min_replicas = 5 - assert res == update_schema - - -@patch("bentoml._internal.cloud.deployment.Deployment.get") -@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment") -def test_update_deployment_api_server( - mock_update_deployment: MagicMock, - mock_get: MagicMock, - update_schema: UpdateDeploymentSchema, - get_schema: DeploymentSchema, - cloudclient: BentoCloudClient, -): - mock_update_deployment.side_effect = f_update - mock_get.return_value = get_schema - api_server = Resource.for_api_server( - enable_ingress=False, hpa_conf={"min_replicas": 5} - ) - res = cloudclient.deployment.update( - deployment_name="test-xxx", - cluster_name="", - kube_namespace="", - api_server_config=api_server, - ) - update_schema.targets[0].config.hpa_conf.min_replicas = 5 - update_schema.targets[0].config.enable_ingress = False - assert res == update_schema - - -@patch("bentoml._internal.cloud.deployment.Deployment.get") -@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment") -def test_update_deployment_api_server_hpa_conf( - mock_update_deployment: MagicMock, - mock_get: MagicMock, - update_schema: UpdateDeploymentSchema, - get_schema: DeploymentSchema, - cloudclient: BentoCloudClient, -): - mock_update_deployment.side_effect = f_update - mock_get.return_value = get_schema - api_server = Resource.for_api_server(hpa_conf={"min_replicas": 9}) - hpa_conf = Resource.for_hpa_conf(min_replicas=8) - res = cloudclient.deployment.update( - deployment_name="test-xxx", - cluster_name="", - kube_namespace="", - api_server_config=api_server, - hpa_conf=hpa_conf, - ) - UpdateDeploymentSchema( - targets=[ - CreateDeploymentTargetSchema( - type=DeploymentTargetType.STABLE, - bento_repository="iris_classifier", - bento="12345", - config=DeploymentTargetConfig( - resource_instance="t3-micro", - enable_ingress=True, - hpa_conf=DeploymentTargetHPAConf(min_replicas=2, max_replicas=10), - runners={ - "runner1": DeploymentTargetRunnerConfig( - resource_instance="t3-small", - hpa_conf=DeploymentTargetHPAConf( - min_replicas=3, max_replicas=10 - ), - ), - "runner2": DeploymentTargetRunnerConfig( - resource_instance="t3-medium", - hpa_conf=DeploymentTargetHPAConf( - min_replicas=5, max_replicas=10 - ), - ), - }, - ), - ) - ], - mode=DeploymentMode.Function, - labels=[], - ) - update_schema.targets[0].config.hpa_conf.min_replicas = 9 - for _, v in update_schema.targets[0].config.runners.items(): - v.hpa_conf.min_replicas = 8 - assert res == update_schema - - -@patch("bentoml._internal.cloud.deployment.Deployment.get") -@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment") -def test_update_deployment_resource_instance( - mock_update_deployment: MagicMock, - mock_get: MagicMock, - update_schema: UpdateDeploymentSchema, - get_schema: DeploymentSchema, - cloudclient: BentoCloudClient, -): - mock_update_deployment.side_effect = f_update - mock_get.return_value = get_schema - res = cloudclient.deployment.update( - deployment_name="test-xxx", - cluster_name="", - kube_namespace="", - resource_instance="test-resource", - ) - update_schema.targets[0].config.resource_instance = "test-resource" - for _, v in update_schema.targets[0].config.runners.items(): - v.resource_instance = "test-resource" - assert res == update_schema - - -@patch("bentoml._internal.cloud.deployment.Deployment.get") -@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment") -def test_update_deployment_labels( - mock_update_deployment: MagicMock, - mock_get: MagicMock, - update_schema: UpdateDeploymentSchema, - get_schema: DeploymentSchema, - cloudclient: BentoCloudClient, +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_update_deployment_scaling_too_big_min( + mock_get_client: MagicMock, rest_client: RestApiClient ): - mock_update_deployment.side_effect = f_update - mock_get.return_value = get_schema - res = cloudclient.deployment.update( - deployment_name="test-xxx", - cluster_name="", - kube_namespace="", - labels={"user": "steve"}, + mock_get_client.return_value = rest_client + deployment = Deployment.update(name="test", scaling_min=10) + # assert expected schema + assert deployment._schema == DummyUpdateSchema( + bento="abc:123", + access_type=AccessControl.PUBLIC, + scaling=DeploymentTargetHPAConf(min_replicas=5, max_replicas=5), + deployment_strategy=DeploymentStrategy.RollingUpdate, + envs=[LabelItemSchema(key="env_key", value="env_value")], ) - assert res == attr.evolve(update_schema, labels=[LabelItemSchema("user", "steve")]) -@patch("bentoml._internal.cloud.deployment.Deployment.get") -@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment") -def test_update_deployment_canary_rules( - mock_update_deployment: MagicMock, - mock_get: MagicMock, - update_schema: UpdateDeploymentSchema, - get_schema: DeploymentSchema, - cloudclient: BentoCloudClient, +@patch("bentoml._internal.cloud.deployment.get_rest_api_client") +def test_update_deployment_distributed( + mock_get_client: MagicMock, rest_client: RestApiClient ): - mock_update_deployment.side_effect = f_update - mock_get.return_value = get_schema - rules = [ - DeploymentTargetCanaryRule(DeploymentTargetCanaryRuleType.WEIGHT, 3, "", "", "") - ] - res = cloudclient.deployment.update( - deployment_name="test-xxx", - cluster_name="", - kube_namespace="", - canary_rules=rules, + mock_get_client.return_value = rest_client + config_dct = { + "services": { + "irisclassifier": {"scaling": {"max_replicas": 50}}, + "preprocessing": {"instance_type": "t3-large"}, + } + } + deployment = Deployment.update(name="test-distributed", config_dct=config_dct) + # assert expected schema + assert deployment._schema == DummyUpdateSchema( + bento="abc:123", + access_type=AccessControl.PUBLIC, + envs=[LabelItemSchema(key="env_key", value="env_value")], + services={ + "irisclassifier": DeploymentServiceConfig( + instance_type="t3-small", + scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=50), + deployment_strategy=DeploymentStrategy.RollingUpdate, + ), + "preprocessing": DeploymentServiceConfig( + instance_type="t3-large", + scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=1), + deployment_strategy=DeploymentStrategy.RollingUpdate, + ), + }, ) - update_schema.targets[0].canary_rules = rules - assert res == update_schema