diff --git a/src/bentoml/__init__.py b/src/bentoml/__init__.py
index 1cbc610e842..6d6566ee37e 100644
--- a/src/bentoml/__init__.py
+++ b/src/bentoml/__init__.py
@@ -99,6 +99,7 @@
     from . import server  # Server API
     from . import monitoring  # Monitoring API
     from . import cloud  # Cloud API
+    from . import deployment  # deployment API
 
     # isort: on
     from _bentoml_impl.client import AsyncHTTPClient
@@ -166,7 +167,7 @@
     exceptions = _LazyLoader("bentoml.exceptions", globals(), "bentoml.exceptions")
     monitoring = _LazyLoader("bentoml.monitoring", globals(), "bentoml.monitoring")
     cloud = _LazyLoader("bentoml.cloud", globals(), "bentoml.cloud")
-
+    deployment = _LazyLoader("bentoml.deployment", globals(), "bentoml.deployment")
     del _LazyLoader
 
     _NEW_SDK_ATTRS = [
@@ -258,6 +259,7 @@ def __getattr__(name: str) -> Any:
     # integrations
     "ray",
     "cloud",
+    "deployment",
     "triton",
     "monitor",
     "load_config",
diff --git a/src/bentoml/_internal/cloud/__init__.py b/src/bentoml/_internal/cloud/__init__.py
index 97e73790b1e..f7d45507503 100644
--- a/src/bentoml/_internal/cloud/__init__.py
+++ b/src/bentoml/_internal/cloud/__init__.py
@@ -1,4 +1,3 @@
 from .base import CloudClient as CloudClient
 from .bentocloud import BentoCloudClient as BentoCloudClient
-from .deployment import Resource as Resource
 from .yatai import YataiClient as YataiClient
diff --git a/src/bentoml/_internal/cloud/base.py b/src/bentoml/_internal/cloud/base.py
index d1173f71e31..c8cdd162a56 100644
--- a/src/bentoml/_internal/cloud/base.py
+++ b/src/bentoml/_internal/cloud/base.py
@@ -59,30 +59,33 @@ def write(self, data: bytes) -> t.Any:  # type: ignore  # python buffer types ar
 
 
 class CloudClient(ABC):
-    log_progress = Progress(TextColumn("{task.description}"))
-
-    spinner_progress = Progress(
-        TextColumn("  "),
-        TimeElapsedColumn(),
-        TextColumn("[bold purple]{task.fields[action]}"),
-        SpinnerColumn("simpleDots"),
-    )
-
-    transmission_progress = Progress(
-        TextColumn("[bold blue]{task.description}", justify="right"),
-        BarColumn(bar_width=None),
-        "[progress.percentage]{task.percentage:>3.1f}%",
-        "•",
-        DownloadColumn(),
-        "•",
-        TransferSpeedColumn(),
-        "•",
-        TimeRemainingColumn(),
-    )
-
-    progress_group = Group(
-        Panel(Group(log_progress, spinner_progress)), transmission_progress
-    )
+    # Moved atrributes to __init__ because otherwise it will keep all the log when running SDK.
+    def __init__(self):
+        self.log_progress = Progress(TextColumn("{task.description}"))
+
+        self.spinner_progress = Progress(
+            TextColumn("  "),
+            TimeElapsedColumn(),
+            TextColumn("[bold purple]{task.fields[action]}"),
+            SpinnerColumn("simpleDots"),
+        )
+
+        self.transmission_progress = Progress(
+            TextColumn("[bold blue]{task.description}", justify="right"),
+            BarColumn(bar_width=None),
+            "[progress.percentage]{task.percentage:>3.1f}%",
+            "•",
+            DownloadColumn(),
+            "•",
+            TransferSpeedColumn(),
+            "•",
+            TimeRemainingColumn(),
+        )
+
+        self.progress_group = Group(
+            Panel(Group(self.log_progress, self.spinner_progress)),
+            self.transmission_progress,
+        )
 
     @contextmanager
     def spin(self, *, text: str):
diff --git a/src/bentoml/_internal/cloud/bentocloud.py b/src/bentoml/_internal/cloud/bentocloud.py
index 0709843864d..861f9f4d1ed 100644
--- a/src/bentoml/_internal/cloud/bentocloud.py
+++ b/src/bentoml/_internal/cloud/bentocloud.py
@@ -10,7 +10,7 @@
 from tempfile import NamedTemporaryFile
 
 import fs
-import requests
+import httpx
 from rich.live import Live
 from simple_di import Provide
 from simple_di import inject
@@ -29,40 +29,36 @@
 from .base import CallbackIOWrapper
 from .base import CloudClient
 from .config import get_rest_api_client
-from .deployment import Deployment
-from .schemas import BentoApiSchema
-from .schemas import BentoManifestSchema
-from .schemas import BentoRunnerResourceSchema
-from .schemas import BentoRunnerSchema
-from .schemas import BentoUploadStatus
-from .schemas import CompleteMultipartUploadSchema
-from .schemas import CompletePartSchema
-from .schemas import CreateBentoRepositorySchema
-from .schemas import CreateBentoSchema
-from .schemas import CreateModelRepositorySchema
-from .schemas import CreateModelSchema
-from .schemas import FinishUploadBentoSchema
-from .schemas import FinishUploadModelSchema
-from .schemas import LabelItemSchema
-from .schemas import ModelManifestSchema
-from .schemas import ModelUploadStatus
-from .schemas import PreSignMultipartUploadUrlSchema
-from .schemas import TransmissionStrategy
-from .schemas import UpdateBentoSchema
+from .schemas.modelschemas import BentoApiSchema
+from .schemas.modelschemas import BentoRunnerResourceSchema
+from .schemas.modelschemas import BentoRunnerSchema
+from .schemas.schemasv1 import BentoManifestSchema
+from .schemas.schemasv1 import BentoUploadStatus
+from .schemas.schemasv1 import CompleteMultipartUploadSchema
+from .schemas.schemasv1 import CompletePartSchema
+from .schemas.schemasv1 import CreateBentoRepositorySchema
+from .schemas.schemasv1 import CreateBentoSchema
+from .schemas.schemasv1 import CreateModelRepositorySchema
+from .schemas.schemasv1 import CreateModelSchema
+from .schemas.schemasv1 import FinishUploadBentoSchema
+from .schemas.schemasv1 import FinishUploadModelSchema
+from .schemas.schemasv1 import LabelItemSchema
+from .schemas.schemasv1 import ModelManifestSchema
+from .schemas.schemasv1 import ModelUploadStatus
+from .schemas.schemasv1 import PreSignMultipartUploadUrlSchema
+from .schemas.schemasv1 import TransmissionStrategy
+from .schemas.schemasv1 import UpdateBentoSchema
 
 if t.TYPE_CHECKING:
     from concurrent.futures import Future
 
     from rich.progress import TaskID
 
-    from .schemas import BentoWithRepositoryListSchema
-    from .schemas import ModelWithRepositoryListSchema
+    from .schemas.schemasv1 import BentoWithRepositoryListSchema
+    from .schemas.schemasv1 import ModelWithRepositoryListSchema
 
 
 class BentoCloudClient(CloudClient):
-    def __init__(self):
-        self.deployment = Deployment()
-
     def push_bento(
         self,
         bento: Bento,
@@ -124,18 +120,18 @@ def push_model(model: Model) -> None:
             futures: t.Iterator[None] = executor.map(push_model, models)
             list(futures)
         with self.spin(text=f'Fetching Bento repository "{name}"'):
-            bento_repository = yatai_rest_client.get_bento_repository(
+            bento_repository = yatai_rest_client.v1.get_bento_repository(
                 bento_repository_name=name
             )
         if not bento_repository:
             with self.spin(text=f'Bento repository "{name}" not found, creating now..'):
-                bento_repository = yatai_rest_client.create_bento_repository(
+                bento_repository = yatai_rest_client.v1.create_bento_repository(
                     req=CreateBentoRepositorySchema(name=name, description="")
                 )
         with self.spin(
             text=f'Try fetching Bento "{bento.tag}" from remote Bento store..'
         ):
-            remote_bento = yatai_rest_client.get_bento(
+            remote_bento = yatai_rest_client.v1.get_bento(
                 bento_repository_name=name, version=version
             )
         if (
@@ -144,7 +140,7 @@ def push_model(model: Model) -> None:
             and remote_bento.upload_status == BentoUploadStatus.SUCCESS
         ):
             self.log_progress.add_task(
-                f'[bold blue]Push failed: Bento "{bento.tag}" already exists in remote Bento store'
+                f'[bold blue]Push skipped: Bento "{bento.tag}" already exists in remote Bento store'
             )
             return
         labels: list[LabelItemSchema] = [
@@ -182,7 +178,7 @@ def push_model(model: Model) -> None:
             with self.spin(
                 text=f'Registering Bento "{bento.tag}" with remote Bento store..'
             ):
-                remote_bento = yatai_rest_client.create_bento(
+                remote_bento = yatai_rest_client.v1.create_bento(
                     bento_repository_name=bento_repository.name,
                     req=CreateBentoSchema(
                         description="",
@@ -194,7 +190,7 @@ def push_model(model: Model) -> None:
                 )
         else:
             with self.spin(text=f'Updating Bento "{bento.tag}"..'):
-                remote_bento = yatai_rest_client.update_bento(
+                remote_bento = yatai_rest_client.v1.update_bento(
                     bento_repository_name=bento_repository.name,
                     version=version,
                     req=UpdateBentoSchema(
@@ -212,7 +208,7 @@ def push_model(model: Model) -> None:
             with self.spin(
                 text=f'Getting a presigned upload url for bento "{bento.tag}" ..'
             ):
-                remote_bento = yatai_rest_client.presign_bento_upload_url(
+                remote_bento = yatai_rest_client.v1.presign_bento_upload_url(
                     bento_repository_name=bento_repository.name, version=version
                 )
                 if remote_bento.presigned_upload_url:
@@ -240,7 +236,7 @@ def filter_(
             tar_io.seek(0, 0)
 
             with self.spin(text=f'Start uploading bento "{bento.tag}"..'):
-                yatai_rest_client.start_upload_bento(
+                yatai_rest_client.v1.start_upload_bento(
                     bento_repository_name=bento_repository.name, version=version
                 )
 
@@ -255,7 +251,7 @@ def filter_(
 
             if transmission_strategy == "proxy":
                 try:
-                    yatai_rest_client.upload_bento(
+                    yatai_rest_client.v1.upload_bento(
                         bento_repository_name=bento_repository.name,
                         version=version,
                         data=tar_io,
@@ -275,7 +271,7 @@ def filter_(
             )
             try:
                 if presigned_upload_url is not None:
-                    resp = requests.put(presigned_upload_url, data=tar_io)
+                    resp = httpx.put(presigned_upload_url, content=tar_io)
                     if resp.status_code != 200:
                         finish_req = FinishUploadBentoSchema(
                             status=BentoUploadStatus.FAILED,
@@ -285,9 +281,11 @@ def filter_(
                     with self.spin(
                         text=f'Start multipart uploading Bento "{bento.tag}"...'
                     ):
-                        remote_bento = yatai_rest_client.start_bento_multipart_upload(
-                            bento_repository_name=bento_repository.name,
-                            version=version,
+                        remote_bento = (
+                            yatai_rest_client.v1.start_bento_multipart_upload(
+                                bento_repository_name=bento_repository.name,
+                                version=version,
+                            )
                         )
                         if not remote_bento.upload_id:
                             raise BentoMLException(
@@ -305,7 +303,7 @@ def chunk_upload(
                             text=f'({chunk_number}/{chunks_count}) Presign multipart upload url of Bento "{bento.tag}"...'
                         ):
                             remote_bento = (
-                                yatai_rest_client.presign_bento_multipart_upload_url(
+                                yatai_rest_client.v1.presign_bento_multipart_upload_url(
                                     bento_repository_name=bento_repository.name,
                                     version=version,
                                     req=PreSignMultipartUploadUrlSchema(
@@ -330,8 +328,8 @@ def chunk_upload(
                             )
 
                             with CallbackIOWrapper(chunk, read_cb=io_cb) as chunk_io:
-                                resp = requests.put(
-                                    remote_bento.presigned_upload_url, data=chunk_io
+                                resp = httpx.put(
+                                    remote_bento.presigned_upload_url, content=chunk_io
                                 )
                                 if resp.status_code != 200:
                                     return FinishUploadBentoSchema(
@@ -375,7 +373,7 @@ def chunk_upload(
                         text=f'Completing multipart upload of Bento "{bento.tag}"...'
                     ):
                         remote_bento = (
-                            yatai_rest_client.complete_bento_multipart_upload(
+                            yatai_rest_client.v1.complete_bento_multipart_upload(
                                 bento_repository_name=bento_repository.name,
                                 version=version,
                                 req=CompleteMultipartUploadSchema(
@@ -395,7 +393,7 @@ def chunk_upload(
                     f'[bold red]Failed to upload Bento "{bento.tag}"'
                 )
             with self.spin(text="Submitting upload status to remote Bento store"):
-                yatai_rest_client.finish_upload_bento(
+                yatai_rest_client.v1.finish_upload_bento(
                     bento_repository_name=bento_repository.name,
                     version=version,
                     req=finish_req,
@@ -460,7 +458,7 @@ def _do_pull_bento(
         yatai_rest_client = get_rest_api_client(context)
 
         with self.spin(text=f'Fetching bento "{_tag}"'):
-            remote_bento = yatai_rest_client.get_bento(
+            remote_bento = yatai_rest_client.v1.get_bento(
                 bento_repository_name=name, version=version
             )
         if not remote_bento:
@@ -498,7 +496,7 @@ def pull_model(model_tag: Tag):
                 with self.spin(
                     text=f'Getting a presigned download url for bento "{_tag}"'
                 ):
-                    remote_bento = yatai_rest_client.presign_bento_download_url(
+                    remote_bento = yatai_rest_client.v1.presign_bento_download_url(
                         name, version
                     )
                     if remote_bento.presigned_download_url:
@@ -506,7 +504,7 @@ def pull_model(model_tag: Tag):
                         transmission_strategy = "presigned_url"
 
             if transmission_strategy == "proxy":
-                response = yatai_rest_client.download_bento(
+                response = yatai_rest_client.v1.download_bento(
                     bento_repository_name=name,
                     version=version,
                 )
@@ -515,31 +513,32 @@ def pull_model(model_tag: Tag):
                     with self.spin(
                         text=f'Getting a presigned download url for bento "{_tag}"'
                     ):
-                        remote_bento = yatai_rest_client.presign_bento_download_url(
+                        remote_bento = yatai_rest_client.v1.presign_bento_download_url(
                             name, version
                         )
                         presigned_download_url = remote_bento.presigned_download_url
-                response = requests.get(presigned_download_url, stream=True)
 
-            if response.status_code != 200:
-                raise BentoMLException(
-                    f'Failed to download bento "{_tag}": {response.text}'
-                )
-            total_size_in_bytes = int(response.headers.get("content-length", 0))
-            block_size = 1024  # 1 Kibibyte
             with NamedTemporaryFile() as tar_file:
-                self.transmission_progress.update(
-                    download_task_id,
-                    completed=0,
-                    total=total_size_in_bytes,
-                    visible=True,
-                )
-                self.transmission_progress.start_task(download_task_id)
-                for data in response.iter_content(block_size):
+                with httpx.stream("GET", presigned_download_url) as response:
+                    if response.status_code != 200:
+                        raise BentoMLException(
+                            f'Failed to download bento "{_tag}": {response.text}'
+                        )
+                    total_size_in_bytes = int(response.headers.get("content-length", 0))
+                    block_size = 1024  # 1 Kibibyte
                     self.transmission_progress.update(
-                        download_task_id, advance=len(data)
+                        download_task_id,
+                        completed=0,
+                        total=total_size_in_bytes,
+                        visible=True,
                     )
-                    tar_file.write(data)
+                    self.transmission_progress.start_task(download_task_id)
+                    for data in response.iter_bytes(block_size):
+                        self.transmission_progress.update(
+                            download_task_id, advance=len(data)
+                        )
+                        tar_file.write(data)
+
                 self.log_progress.add_task(
                     f'[bold green]Finished downloading all bento "{_tag}" files'
                 )
@@ -604,18 +603,18 @@ def _do_push_model(
             raise BentoMLException(f'Model "{model.tag}" version cannot be None')
         info = model.info
         with self.spin(text=f'Fetching model repository "{name}"'):
-            model_repository = yatai_rest_client.get_model_repository(
+            model_repository = yatai_rest_client.v1.get_model_repository(
                 model_repository_name=name
             )
         if not model_repository:
             with self.spin(text=f'Model repository "{name}" not found, creating now..'):
-                model_repository = yatai_rest_client.create_model_repository(
+                model_repository = yatai_rest_client.v1.create_model_repository(
                     req=CreateModelRepositorySchema(name=name, description="")
                 )
         with self.spin(
             text=f'Try fetching model "{model.tag}" from remote model store..'
         ):
-            remote_model = yatai_rest_client.get_model(
+            remote_model = yatai_rest_client.v1.get_model(
                 model_repository_name=name, version=version
             )
         if (
@@ -635,7 +634,7 @@ def _do_push_model(
             with self.spin(
                 text=f'Registering model "{model.tag}" with remote model store..'
             ):
-                remote_model = yatai_rest_client.create_model(
+                remote_model = yatai_rest_client.v1.create_model(
                     model_repository_name=model_repository.name,
                     req=CreateModelSchema(
                         description="",
@@ -663,7 +662,7 @@ def _do_push_model(
             with self.spin(
                 text=f'Getting a presigned upload url for Model "{model.tag}" ..'
             ):
-                remote_model = yatai_rest_client.presign_model_upload_url(
+                remote_model = yatai_rest_client.v1.presign_model_upload_url(
                     model_repository_name=model_repository.name, version=version
                 )
                 if remote_model.presigned_upload_url:
@@ -682,7 +681,7 @@ def io_cb(x: int):
                     tar.add(model.path, arcname="./")
             tar_io.seek(0, 0)
             with self.spin(text=f'Start uploading model "{model.tag}"..'):
-                yatai_rest_client.start_upload_model(
+                yatai_rest_client.v1.start_upload_model(
                     model_repository_name=model_repository.name, version=version
                 )
             file_size = tar_io.getbuffer().nbytes
@@ -696,7 +695,7 @@ def io_cb(x: int):
 
             if transmission_strategy == "proxy":
                 try:
-                    yatai_rest_client.upload_model(
+                    yatai_rest_client.v1.upload_model(
                         model_repository_name=model_repository.name,
                         version=version,
                         data=tar_io,
@@ -716,7 +715,7 @@ def io_cb(x: int):
             )
             try:
                 if presigned_upload_url is not None:
-                    resp = requests.put(presigned_upload_url, data=tar_io)
+                    resp = httpx.put(presigned_upload_url, content=tar_io)
                     if resp.status_code != 200:
                         finish_req = FinishUploadModelSchema(
                             status=ModelUploadStatus.FAILED,
@@ -726,9 +725,11 @@ def io_cb(x: int):
                     with self.spin(
                         text=f'Start multipart uploading Model "{model.tag}"...'
                     ):
-                        remote_model = yatai_rest_client.start_model_multipart_upload(
-                            model_repository_name=model_repository.name,
-                            version=version,
+                        remote_model = (
+                            yatai_rest_client.v1.start_model_multipart_upload(
+                                model_repository_name=model_repository.name,
+                                version=version,
+                            )
                         )
                         if not remote_model.upload_id:
                             raise BentoMLException(
@@ -746,7 +747,7 @@ def chunk_upload(
                             text=f'({chunk_number}/{chunks_count}) Presign multipart upload url of model "{model.tag}"...'
                         ):
                             remote_model = (
-                                yatai_rest_client.presign_model_multipart_upload_url(
+                                yatai_rest_client.v1.presign_model_multipart_upload_url(
                                     model_repository_name=model_repository.name,
                                     version=version,
                                     req=PreSignMultipartUploadUrlSchema(
@@ -772,8 +773,8 @@ def chunk_upload(
                             )
 
                             with CallbackIOWrapper(chunk, read_cb=io_cb) as chunk_io:
-                                resp = requests.put(
-                                    remote_model.presigned_upload_url, data=chunk_io
+                                resp = httpx.put(
+                                    remote_model.presigned_upload_url, content=chunk_io
                                 )
                                 if resp.status_code != 200:
                                     return FinishUploadModelSchema(
@@ -817,7 +818,7 @@ def chunk_upload(
                         text=f'Completing multipart upload of model "{model.tag}"...'
                     ):
                         remote_model = (
-                            yatai_rest_client.complete_model_multipart_upload(
+                            yatai_rest_client.v1.complete_model_multipart_upload(
                                 model_repository_name=model_repository.name,
                                 version=version,
                                 req=CompleteMultipartUploadSchema(
@@ -837,11 +838,12 @@ def chunk_upload(
                     f'[bold red]Failed to upload model "{model.tag}"'
                 )
             with self.spin(text="Submitting upload status to remote model store"):
-                yatai_rest_client.finish_upload_model(
+                yatai_rest_client.v1.finish_upload_model(
                     model_repository_name=model_repository.name,
                     version=version,
                     req=finish_req,
                 )
+
             if finish_req.status != ModelUploadStatus.SUCCESS:
                 self.log_progress.add_task(
                     f'[bold red]Failed pushing model "{model.tag}" : {finish_req.reason}'
@@ -903,7 +905,7 @@ def _do_pull_model(
         name = _tag.name
         version = _tag.version
         if version in (None, "latest"):
-            latest_model = yatai_rest_client.get_latest_model(name, query=query)
+            latest_model = yatai_rest_client.v1.get_latest_model(name, query=query)
             if latest_model is None:
                 raise BentoMLException(
                     f'Model "{_tag}" not found on remote model store, you may need to specify a version'
@@ -929,7 +931,9 @@ def _do_pull_model(
             )
 
         with self.spin(text=f'Getting a presigned download url for model "{_tag}"..'):
-            remote_model = yatai_rest_client.presign_model_download_url(name, version)
+            remote_model = yatai_rest_client.v1.presign_model_download_url(
+                name, version
+            )
 
         if not remote_model:
             raise BentoMLException(f'Model "{_tag}" not found on remote model store')
@@ -942,7 +946,7 @@ def _do_pull_model(
             transmission_strategy = remote_model.transmission_strategy
         else:
             with self.spin(text=f'Getting a presigned download url for model "{_tag}"'):
-                remote_model = yatai_rest_client.presign_model_download_url(
+                remote_model = yatai_rest_client.v1.presign_model_download_url(
                     name, version
                 )
                 if remote_model.presigned_download_url:
@@ -950,7 +954,7 @@ def _do_pull_model(
                     transmission_strategy = "presigned_url"
 
         if transmission_strategy == "proxy":
-            response = yatai_rest_client.download_model(
+            response = yatai_rest_client.v1.download_model(
                 model_repository_name=name, version=version
             )
         else:
@@ -958,30 +962,33 @@ def _do_pull_model(
                 with self.spin(
                     text=f'Getting a presigned download url for model "{_tag}"'
                 ):
-                    remote_model = yatai_rest_client.presign_model_download_url(
+                    remote_model = yatai_rest_client.v1.presign_model_download_url(
                         name, version
                     )
                     presigned_download_url = remote_model.presigned_download_url
 
-            response = requests.get(presigned_download_url, stream=True)
-            if response.status_code != 200:
-                raise BentoMLException(
-                    f'Failed to download model "{_tag}": {response.text}'
+        with NamedTemporaryFile() as tar_file:
+            with httpx.stream("GET", presigned_download_url) as response:
+                if response.status_code != 200:
+                    raise BentoMLException(
+                        f'Failed to download model "{_tag}": {response.text}'
+                    )
+
+                total_size_in_bytes = int(response.headers.get("content-length", 0))
+                block_size = 1024  # 1 Kibibyte
+                self.transmission_progress.update(
+                    download_task_id,
+                    description=f'Downloading model "{_tag}"',
+                    total=total_size_in_bytes,
+                    visible=True,
                 )
+                self.transmission_progress.start_task(download_task_id)
+                for data in response.iter_bytes(block_size):
+                    self.transmission_progress.update(
+                        download_task_id, advance=len(data)
+                    )
+                    tar_file.write(data)
 
-        total_size_in_bytes = int(response.headers.get("content-length", 0))
-        block_size = 1024  # 1 Kibibyte
-        with NamedTemporaryFile() as tar_file:
-            self.transmission_progress.update(
-                download_task_id,
-                description=f'Downloading model "{_tag}"',
-                total=total_size_in_bytes,
-                visible=True,
-            )
-            self.transmission_progress.start_task(download_task_id)
-            for data in response.iter_content(block_size):
-                self.transmission_progress.update(download_task_id, advance=len(data))
-                tar_file.write(data)
             self.log_progress.add_task(
                 f'[bold green]Finished downloading model "{_tag}" files'
             )
@@ -1005,7 +1012,7 @@ def _do_pull_model(
 
     def list_bentos(self, context: str | None = None) -> BentoWithRepositoryListSchema:
         yatai_rest_client = get_rest_api_client(context)
-        res = yatai_rest_client.get_bentos_list()
+        res = yatai_rest_client.v1.get_bentos_list()
         if res is None:
             raise BentoMLException("List bentos request failed")
 
@@ -1017,7 +1024,7 @@ def list_bentos(self, context: str | None = None) -> BentoWithRepositoryListSche
 
     def list_models(self, context: str | None = None) -> ModelWithRepositoryListSchema:
         yatai_rest_client = get_rest_api_client(context)
-        res = yatai_rest_client.get_models_list()
+        res = yatai_rest_client.v1.get_models_list()
         if res is None:
             raise BentoMLException("List models request failed")
 
diff --git a/src/bentoml/_internal/cloud/client.py b/src/bentoml/_internal/cloud/client.py
index 60e1fb49d96..16e62eb6813 100644
--- a/src/bentoml/_internal/cloud/client.py
+++ b/src/bentoml/_internal/cloud/client.py
@@ -4,53 +4,50 @@
 import typing as t
 from urllib.parse import urljoin
 
-import requests
+import httpx
 
 from ...exceptions import CloudRESTApiClientError
 from ..configuration import BENTOML_VERSION
-from .schemas import BentoRepositorySchema
-from .schemas import BentoSchema
-from .schemas import BentoWithRepositoryListSchema
-from .schemas import ClusterFullSchema
-from .schemas import ClusterListSchema
-from .schemas import CompleteMultipartUploadSchema
-from .schemas import CreateBentoRepositorySchema
-from .schemas import CreateBentoSchema
-from .schemas import CreateDeploymentSchema
-from .schemas import CreateModelRepositorySchema
-from .schemas import CreateModelSchema
-from .schemas import DeploymentListSchema
-from .schemas import DeploymentSchema
-from .schemas import FinishUploadBentoSchema
-from .schemas import FinishUploadModelSchema
-from .schemas import ModelRepositorySchema
-from .schemas import ModelSchema
-from .schemas import ModelWithRepositoryListSchema
-from .schemas import OrganizationSchema
-from .schemas import PreSignMultipartUploadUrlSchema
-from .schemas import UpdateBentoSchema
-from .schemas import UpdateDeploymentSchema
-from .schemas import UserSchema
-from .schemas import schema_from_json
-from .schemas import schema_from_object
-from .schemas import schema_to_json
+from .schemas.schemasv1 import BentoRepositorySchema
+from .schemas.schemasv1 import BentoSchema
+from .schemas.schemasv1 import BentoWithRepositoryListSchema
+from .schemas.schemasv1 import ClusterFullSchema
+from .schemas.schemasv1 import ClusterListSchema
+from .schemas.schemasv1 import CompleteMultipartUploadSchema
+from .schemas.schemasv1 import CreateBentoRepositorySchema
+from .schemas.schemasv1 import CreateBentoSchema
+from .schemas.schemasv1 import CreateDeploymentSchema as CreateDeploymentSchemaV1
+from .schemas.schemasv1 import CreateModelRepositorySchema
+from .schemas.schemasv1 import CreateModelSchema
+from .schemas.schemasv1 import DeploymentFullSchema
+from .schemas.schemasv1 import DeploymentListSchema
+from .schemas.schemasv1 import FinishUploadBentoSchema
+from .schemas.schemasv1 import FinishUploadModelSchema
+from .schemas.schemasv1 import ModelRepositorySchema
+from .schemas.schemasv1 import ModelSchema
+from .schemas.schemasv1 import ModelWithRepositoryListSchema
+from .schemas.schemasv1 import OrganizationSchema
+from .schemas.schemasv1 import PreSignMultipartUploadUrlSchema
+from .schemas.schemasv1 import UpdateBentoSchema
+from .schemas.schemasv1 import UpdateDeploymentSchema
+from .schemas.schemasv1 import UserSchema
+from .schemas.schemasv2 import CreateDeploymentSchema as CreateDeploymentSchemaV2
+from .schemas.schemasv2 import DeploymentFullSchema as DeploymentFullSchemaV2
+from .schemas.schemasv2 import DeploymentListSchema as DeploymentListSchemaV2
+from .schemas.schemasv2 import UpdateDeploymentSchema as UpdateDeploymentSchemaV2
+from .schemas.utils import schema_from_json
+from .schemas.utils import schema_from_object
+from .schemas.utils import schema_to_json
 
 logger = logging.getLogger(__name__)
 
 
-class RestApiClient:
-    def __init__(self, endpoint: str, api_token: str) -> None:
+class BaseRestApiClient:
+    def __init__(self, endpoint: str, session: httpx.Client) -> None:
         self.endpoint = endpoint
-        self.session = requests.Session()
-        self.session.headers.update(
-            {
-                "X-YATAI-API-TOKEN": api_token,
-                "Content-Type": "application/json",
-                "X-Bentoml-Version": BENTOML_VERSION,
-            }
-        )
+        self.session = session
 
-    def _is_not_found(self, resp: requests.Response) -> bool:
+    def _is_not_found(self, resp: httpx.Response) -> bool:
         # We used to return 400 for record not found, handle both cases
         return (
             resp.status_code == 404
@@ -58,12 +55,14 @@ def _is_not_found(self, resp: requests.Response) -> bool:
             and "record not found" in resp.text
         )
 
-    def _check_resp(self, resp: requests.Response) -> None:
+    def _check_resp(self, resp: httpx.Response) -> None:
         if resp.status_code != 200:
             raise CloudRESTApiClientError(
                 f"request failed with status code {resp.status_code}: {resp.text}"
             )
 
+
+class RestApiClientV1(BaseRestApiClient):
     def get_current_user(self) -> UserSchema | None:
         url = urljoin(self.endpoint, "/api/v1/auth/current")
         resp = self.session.get(url)
@@ -96,7 +95,7 @@ def create_bento_repository(
         self, req: CreateBentoRepositorySchema
     ) -> BentoRepositorySchema:
         url = urljoin(self.endpoint, "/api/v1/bento_repositories")
-        resp = self.session.post(url, data=schema_to_json(req))
+        resp = self.session.post(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, BentoRepositorySchema)
 
@@ -117,7 +116,7 @@ def create_bento(
         url = urljoin(
             self.endpoint, f"/api/v1/bento_repositories/{bento_repository_name}/bentos"
         )
-        resp = self.session.post(url, data=schema_to_json(req))
+        resp = self.session.post(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, BentoSchema)
 
@@ -128,7 +127,7 @@ def update_bento(
             self.endpoint,
             f"/api/v1/bento_repositories/{bento_repository_name}/bentos/{version}",
         )
-        resp = self.session.patch(url, data=schema_to_json(req))
+        resp = self.session.patch(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, BentoSchema)
 
@@ -175,7 +174,7 @@ def presign_bento_multipart_upload_url(
             self.endpoint,
             f"/api/v1/bento_repositories/{bento_repository_name}/bentos/{version}/presign_multipart_upload_url",
         )
-        resp = self.session.patch(url, data=schema_to_json(req))
+        resp = self.session.patch(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, BentoSchema)
 
@@ -189,7 +188,7 @@ def complete_bento_multipart_upload(
             self.endpoint,
             f"/api/v1/bento_repositories/{bento_repository_name}/bentos/{version}/complete_multipart_upload",
         )
-        resp = self.session.patch(url, data=schema_to_json(req))
+        resp = self.session.patch(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, BentoSchema)
 
@@ -211,7 +210,7 @@ def finish_upload_bento(
             self.endpoint,
             f"/api/v1/bento_repositories/{bento_repository_name}/bentos/{version}/finish_upload",
         )
-        resp = self.session.patch(url, data=schema_to_json(req))
+        resp = self.session.patch(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, BentoSchema)
 
@@ -224,7 +223,7 @@ def upload_bento(
         )
         resp = self.session.put(
             url,
-            data=data,
+            content=data,
             headers=dict(
                 self.session.headers, **{"Content-Type": "application/octet-stream"}
             ),
@@ -234,14 +233,14 @@ def upload_bento(
 
     def download_bento(
         self, bento_repository_name: str, version: str
-    ) -> requests.Response:
+    ) -> httpx.Response:
         url = urljoin(
             self.endpoint,
             f"/api/v1/bento_repositories/{bento_repository_name}/bentos/{version}/download",
         )
-        resp = self.session.get(url, stream=True)
-        self._check_resp(resp)
-        return resp
+        with self.session.stream("GET", url) as resp:
+            self._check_resp(resp)
+            return resp
 
     def get_model_repository(
         self, model_repository_name: str
@@ -259,7 +258,7 @@ def create_model_repository(
         self, req: CreateModelRepositorySchema
     ) -> ModelRepositorySchema:
         url = urljoin(self.endpoint, "/api/v1/model_repositories")
-        resp = self.session.post(url, data=schema_to_json(req))
+        resp = self.session.post(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, ModelRepositorySchema)
 
@@ -280,7 +279,7 @@ def create_model(
         url = urljoin(
             self.endpoint, f"/api/v1/model_repositories/{model_repository_name}/models"
         )
-        resp = self.session.post(url, data=schema_to_json(req))
+        resp = self.session.post(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, ModelSchema)
 
@@ -327,7 +326,7 @@ def presign_model_multipart_upload_url(
             self.endpoint,
             f"/api/v1/model_repositories/{model_repository_name}/models/{version}/presign_multipart_upload_url",
         )
-        resp = self.session.patch(url, data=schema_to_json(req))
+        resp = self.session.patch(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, ModelSchema)
 
@@ -341,7 +340,7 @@ def complete_model_multipart_upload(
             self.endpoint,
             f"/api/v1/model_repositories/{model_repository_name}/models/{version}/complete_multipart_upload",
         )
-        resp = self.session.patch(url, data=schema_to_json(req))
+        resp = self.session.patch(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, ModelSchema)
 
@@ -363,7 +362,7 @@ def finish_upload_model(
             self.endpoint,
             f"/api/v1/model_repositories/{model_repository_name}/models/{version}/finish_upload",
         )
-        resp = self.session.patch(url, data=schema_to_json(req))
+        resp = self.session.patch(url, content=schema_to_json(req))
         self._check_resp(resp)
         return schema_from_json(resp.text, ModelSchema)
 
@@ -376,7 +375,7 @@ def upload_model(
         )
         resp = self.session.put(
             url,
-            data=data,
+            content=data,
             headers=dict(
                 self.session.headers, **{"Content-Type": "application/octet-stream"}
             ),
@@ -386,14 +385,14 @@ def upload_model(
 
     def download_model(
         self, model_repository_name: str, version: str
-    ) -> requests.Response:
+    ) -> httpx.Response:
         url = urljoin(
             self.endpoint,
             f"/api/v1/model_repositories/{model_repository_name}/models/{version}/download",
         )
-        resp = self.session.get(url, stream=True)
-        self._check_resp(resp)
-        return resp
+        with self.session.stream("GET", url) as resp:
+            self._check_resp(resp)
+            return resp
 
     def get_bento_repositories_list(
         self, bento_repository_name: str
@@ -421,7 +420,7 @@ def get_models_list(self) -> ModelWithRepositoryListSchema | None:
         self._check_resp(resp)
         return schema_from_json(resp.text, ModelWithRepositoryListSchema)
 
-    def get_deployment_list(
+    def get_cluster_deployment_list(
         self, cluster_name: str, **params: str | int | None
     ) -> DeploymentListSchema | None:
         url = urljoin(self.endpoint, f"/api/v1/clusters/{cluster_name}/deployments")
@@ -431,17 +430,27 @@ def get_deployment_list(
         self._check_resp(resp)
         return schema_from_json(resp.text, DeploymentListSchema)
 
+    def get_organization_deployment_list(
+        self, **params: str | int | None
+    ) -> DeploymentListSchema | None:
+        url = urljoin(self.endpoint, "/api/v1/deployments")
+        resp = self.session.get(url, params=params)
+        if self._is_not_found(resp):
+            return None
+        self._check_resp(resp)
+        return schema_from_json(resp.text, DeploymentListSchema)
+
     def create_deployment(
-        self, cluster_name: str, create_schema: CreateDeploymentSchema
-    ) -> DeploymentSchema | None:
+        self, cluster_name: str, create_schema: CreateDeploymentSchemaV1
+    ) -> DeploymentFullSchema | None:
         url = urljoin(self.endpoint, f"/api/v1/clusters/{cluster_name}/deployments")
-        resp = self.session.post(url, data=schema_to_json(create_schema))
+        resp = self.session.post(url, content=schema_to_json(create_schema))
         self._check_resp(resp)
-        return schema_from_json(resp.text, DeploymentSchema)
+        return schema_from_json(resp.text, DeploymentFullSchema)
 
     def get_deployment(
         self, cluster_name: str, kube_namespace: str, deployment_name: str
-    ) -> DeploymentSchema | None:
+    ) -> DeploymentFullSchema | None:
         url = urljoin(
             self.endpoint,
             f"/api/v1/clusters/{cluster_name}/namespaces/{kube_namespace}/deployments/{deployment_name}",
@@ -450,7 +459,7 @@ def get_deployment(
         if self._is_not_found(resp):
             return None
         self._check_resp(resp)
-        return schema_from_json(resp.text, DeploymentSchema)
+        return schema_from_json(resp.text, DeploymentFullSchema)
 
     def update_deployment(
         self,
@@ -458,20 +467,20 @@ def update_deployment(
         kube_namespace: str,
         deployment_name: str,
         update_schema: UpdateDeploymentSchema,
-    ) -> DeploymentSchema | None:
+    ) -> DeploymentFullSchema | None:
         url = urljoin(
             self.endpoint,
             f"/api/v1/clusters/{cluster_name}/namespaces/{kube_namespace}/deployments/{deployment_name}",
         )
-        resp = self.session.patch(url, data=schema_to_json(update_schema))
+        resp = self.session.patch(url, content=schema_to_json(update_schema))
         if self._is_not_found(resp):
             return None
         self._check_resp(resp)
-        return schema_from_json(resp.text, DeploymentSchema)
+        return schema_from_json(resp.text, DeploymentFullSchema)
 
     def terminate_deployment(
         self, cluster_name: str, kube_namespace: str, deployment_name: str
-    ) -> DeploymentSchema | None:
+    ) -> DeploymentFullSchema | None:
         url = urljoin(
             self.endpoint,
             f"/api/v1/clusters/{cluster_name}/namespaces/{kube_namespace}/deployments/{deployment_name}/terminate",
@@ -480,11 +489,11 @@ def terminate_deployment(
         if self._is_not_found(resp):
             return None
         self._check_resp(resp)
-        return schema_from_json(resp.text, DeploymentSchema)
+        return schema_from_json(resp.text, DeploymentFullSchema)
 
     def delete_deployment(
         self, cluster_name: str, kube_namespace: str, deployment_name: str
-    ) -> DeploymentSchema | None:
+    ) -> DeploymentFullSchema | None:
         url = urljoin(
             self.endpoint,
             f"/api/v1/clusters/{cluster_name}/namespaces/{kube_namespace}/deployments/{deployment_name}",
@@ -493,7 +502,7 @@ def delete_deployment(
         if self._is_not_found(resp):
             return None
         self._check_resp(resp)
-        return schema_from_json(resp.text, DeploymentSchema)
+        return schema_from_json(resp.text, DeploymentFullSchema)
 
     def get_cluster_list(
         self, params: dict[str, str | int] | None = None
@@ -527,3 +536,112 @@ def get_latest_model(
         self._check_resp(resp)
         models = resp.json()["items"]
         return schema_from_object(models[0], ModelSchema) if models else None
+
+
+class RestApiClientV2(BaseRestApiClient):
+    def create_deployment(
+        self, create_schema: CreateDeploymentSchemaV2, cluster_name: str
+    ) -> DeploymentFullSchemaV2:
+        url = urljoin(self.endpoint, "/api/v2/deployments")
+        resp = self.session.post(
+            url, content=schema_to_json(create_schema), params={"cluster": cluster_name}
+        )
+        self._check_resp(resp)
+        return schema_from_json(resp.text, DeploymentFullSchemaV2)
+
+    def update_deployment(
+        self,
+        update_schema: UpdateDeploymentSchemaV2,
+        cluster_name: str,
+        deployment_name: str,
+    ) -> DeploymentFullSchemaV2 | None:
+        url = urljoin(
+            self.endpoint,
+            f"/api/v2/deployments/{deployment_name}",
+        )
+        data = schema_to_json(update_schema)
+        resp = self.session.put(url, content=data, params={"cluster": cluster_name})
+        if self._is_not_found(resp):
+            return None
+        self._check_resp(resp)
+        return schema_from_json(resp.text, DeploymentFullSchemaV2)
+
+    def get_deployment(
+        self, cluster_name: str, deployment_name: str
+    ) -> DeploymentFullSchemaV2 | None:
+        url = urljoin(
+            self.endpoint,
+            f"/api/v2/deployments/{deployment_name}",
+        )
+        resp = self.session.get(url, params={"cluster": cluster_name})
+        if self._is_not_found(resp):
+            return None
+        self._check_resp(resp)
+        return schema_from_json(resp.text, DeploymentFullSchemaV2)
+
+    def list_deployment(
+        self,
+        cluster_name: str | None = None,
+        all: bool | None = None,
+        # if both of the above is none, list default cluster's deployments
+        count: int | None = None,
+        q: str | None = None,
+        search: str | None = None,
+        start: int | None = None,
+    ) -> DeploymentListSchemaV2 | None:
+        url = urljoin(self.endpoint, "/api/v2/deployments")
+        resp = self.session.get(
+            url,
+            params={
+                "cluster": cluster_name,
+                "all": all,
+                "count": count,
+                "q": q,
+                "search": search,
+                "start": start,
+            },
+        )
+        if self._is_not_found(resp):
+            return None
+        self._check_resp(resp)
+        return schema_from_json(resp.text, DeploymentListSchemaV2)
+
+    def terminate_deployment(
+        self, cluster_name: str, deployment_name: str
+    ) -> DeploymentFullSchemaV2 | None:
+        url = urljoin(
+            self.endpoint,
+            f"/api/v2/deployments/{deployment_name}/terminate",
+        )
+        resp = self.session.post(url, params={"cluster": cluster_name})
+        if self._is_not_found(resp):
+            return None
+        self._check_resp(resp)
+        return schema_from_json(resp.text, DeploymentFullSchemaV2)
+
+    def delete_deployment(
+        self, cluster_name: str, deployment_name: str
+    ) -> DeploymentFullSchemaV2 | None:
+        url = urljoin(
+            self.endpoint,
+            f"/api/v2/deployments/{deployment_name}",
+        )
+        resp = self.session.delete(url, params={"cluster": cluster_name})
+        if self._is_not_found(resp):
+            return None
+        self._check_resp(resp)
+        return schema_from_json(resp.text, DeploymentFullSchemaV2)
+
+
+class RestApiClient:
+    def __init__(self, endpoint: str, api_token: str) -> None:
+        self.session = httpx.Client()
+        self.session.headers.update(
+            {
+                "X-YATAI-API-TOKEN": api_token,
+                "Content-Type": "application/json",
+                "X-Bentoml-Version": BENTOML_VERSION,
+            }
+        )
+        self.v2 = RestApiClientV2(endpoint, self.session)
+        self.v1 = RestApiClientV1(endpoint, self.session)
diff --git a/src/bentoml/_internal/cloud/config.py b/src/bentoml/_internal/cloud/config.py
index 633aed22581..d2c22d67736 100644
--- a/src/bentoml/_internal/cloud/config.py
+++ b/src/bentoml/_internal/cloud/config.py
@@ -34,7 +34,7 @@ def get_rest_api_client(self) -> RestApiClient:
     def get_email(self) -> str:
         if not self.email:
             cli = self.get_rest_api_client()
-            user = cli.get_current_user()
+            user = cli.v1.get_current_user()
             if user is None:
                 raise CloudRESTApiClientError(
                     "Unable to get current user from yatai server"
diff --git a/src/bentoml/_internal/cloud/deployment.py b/src/bentoml/_internal/cloud/deployment.py
index ba043f80843..1f4174ddb44 100644
--- a/src/bentoml/_internal/cloud/deployment.py
+++ b/src/bentoml/_internal/cloud/deployment.py
@@ -1,29 +1,35 @@
 from __future__ import annotations
 
-import json
 import logging
+import time
 import typing as t
 
 import attr
+import yaml
 from deepmerge.merger import Merger
+from simple_di import Provide
+from simple_di import inject
+
+if t.TYPE_CHECKING:
+    from _bentoml_impl.client import AsyncHTTPClient
+    from _bentoml_impl.client import SyncHTTPClient
+    from bentoml._internal.bento.bento import BentoStore
+    from bentoml._internal.cloud.bentocloud import BentoCloudClient
+
 
 from ...exceptions import BentoMLException
+from ...exceptions import NotFound
+from ..configuration.containers import BentoMLContainer
 from ..tag import Tag
 from ..utils import bentoml_cattr
-from ..utils import first_not_none
 from ..utils import resolve_user_filepath
 from .config import get_rest_api_client
-from .schemas import CreateDeploymentSchema
-from .schemas import DeploymentListSchema
-from .schemas import DeploymentMode
-from .schemas import DeploymentSchema
-from .schemas import DeploymentTargetCanaryRule
-from .schemas import DeploymentTargetConfig
-from .schemas import DeploymentTargetHPAConf
-from .schemas import DeploymentTargetRunnerConfig
-from .schemas import DeploymentTargetType
-from .schemas import FullDeploymentSchema
-from .schemas import UpdateDeploymentSchema
+from .schemas.modelschemas import AccessControl
+from .schemas.modelschemas import DeploymentStatus
+from .schemas.modelschemas import DeploymentTargetHPAConf
+from .schemas.schemasv2 import CreateDeploymentSchema as CreateDeploymentSchemaV2
+from .schemas.schemasv2 import DeploymentSchema
+from .schemas.schemasv2 import UpdateDeploymentSchema as UpdateDeploymentSchemaV2
 
 logger = logging.getLogger(__name__)
 
@@ -37,28 +43,138 @@
 )
 
 
-@attr.define
-class Resource:
-    @classmethod
-    def for_hpa_conf(cls, **kwargs: t.Any) -> DeploymentTargetHPAConf:
-        return bentoml_cattr.structure(kwargs, DeploymentTargetHPAConf)
-
-    @classmethod
-    def for_runner(cls, **kwargs: t.Any) -> DeploymentTargetRunnerConfig:
-        exclusive_api_server_key = {
-            v for v in kwargs if v not in attr.fields_dict(DeploymentTargetRunnerConfig)
-        }
-        return bentoml_cattr.structure(
-            {k: v for k, v in kwargs.items() if k not in exclusive_api_server_key},
-            DeploymentTargetRunnerConfig,
+@inject
+def get_real_bento_tag(
+    project_path: str | None = None,
+    bento: str | Tag | None = None,
+    context: str | None = None,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    _cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client],
+) -> Tag:
+    if project_path:
+        from bentoml.bentos import build_bentofile
+
+        bento_obj = build_bentofile(build_ctx=project_path, _bento_store=_bento_store)
+        _cloud_client.push_bento(bento=bento_obj, context=context)
+        return bento_obj.tag
+    elif bento:
+        bento = Tag.from_taglike(bento)
+        try:
+            bento_obj = _bento_store.get(bento)
+        except NotFound as e:
+            # "bento repo needs to exist if it is latest"
+            if bento.version is None or bento.version == "latest":
+                raise e
+            bento_obj = None
+
+        # try to push if bento exists, otherwise expects bentocloud to have it
+        if bento_obj:
+            _cloud_client.push_bento(bento=bento_obj, context=context)
+            bento = bento_obj.tag
+        return bento
+    else:
+        raise BentoMLException(
+            "Create a deployment needs a target; project path or bento is necessary"
         )
 
-    @classmethod
-    def for_api_server(cls, **kwargs: t.Any) -> DeploymentTargetConfig:
-        return bentoml_cattr.structure(kwargs, DeploymentTargetConfig)
+
+@attr.define
+class DeploymentInfo:
+    __omit_if_default__ = True
+    name: str
+    created_at: str
+    bento: Tag
+    status: DeploymentStatus
+    admin_console: str
+    endpoint: t.Optional[str]
+    config: dict[str, t.Any]
+
+    def to_dict(self) -> t.Dict[str, t.Any]:
+        return bentoml_cattr.unstructure(self)
 
 
+@attr.define
 class Deployment:
+    context: t.Optional[str]
+    cluster_name: str
+    name: str
+    _schema: DeploymentSchema = attr.field(alias="_schema", repr=False)
+    _urls: t.Optional[list[str]] = attr.field(alias="_urls", default=None)
+
+    @staticmethod
+    def _fix_scaling(
+        scaling: DeploymentTargetHPAConf | None,
+    ) -> DeploymentTargetHPAConf:
+        if scaling is None:
+            return DeploymentTargetHPAConf(1, 1)
+        if scaling.min_replicas is None:
+            scaling.min_replicas = 1
+        if scaling.max_replicas is None:
+            scaling.max_replicas = max(scaling.min_replicas, 1)
+        # one edge case:
+        if scaling.min_replicas > scaling.max_replicas:
+            scaling.min_replicas = scaling.max_replicas
+            logger.warning(
+                "min scaling value is greater than max scaling value, setting min scaling to max scaling value"
+            )
+        if scaling.min_replicas < 0:
+            raise BentoMLException(
+                "min scaling values must be greater than or equal to 0"
+            )
+        if scaling.max_replicas <= 0:
+            raise BentoMLException("max scaling values must be greater than 0")
+        return scaling
+
+    @staticmethod
+    def _validate_input_on_distributed(
+        config_struct: UpdateDeploymentSchemaV2, distributed: bool
+    ) -> None:
+        if distributed:
+            if config_struct.instance_type is not None:
+                raise BentoMLException(
+                    "The 'instance_type' field is not allowed for distributed deployments. Please specify it per service in the services field."
+                )
+            if (
+                config_struct.scaling is not None
+                and config_struct.scaling != DeploymentTargetHPAConf()
+            ):
+                raise BentoMLException(
+                    "The 'scaling' field is not allowed for distributed deployments. Please specify it per service in the services field."
+                )
+            if config_struct.deployment_strategy is not None:
+                raise BentoMLException(
+                    "The 'deployment_strategy' field is not allowed for distributed deployments. Please specify it per service in the services field."
+                )
+            if config_struct.extras is not None:
+                raise BentoMLException(
+                    "The 'extras' field is not allowed for distributed deployments. Please specify it per service in the services field."
+                )
+            if config_struct.cold_start_timeout is not None:
+                raise BentoMLException(
+                    "The 'cold_start_timeout' field is not allowed for distributed deployments. Please specify it per service in the services field."
+                )
+        elif not distributed:
+            if config_struct.services != {}:
+                raise BentoMLException(
+                    "The 'services' field is only allowed for distributed deployments."
+                )
+
+    @classmethod
+    def _fix_and_validate_schema(
+        cls, config_struct: UpdateDeploymentSchemaV2, distributed: bool
+    ):
+        cls._validate_input_on_distributed(config_struct, distributed)
+        # fix scaling
+        if distributed:
+            if len(config_struct.services) == 0:
+                raise BentoMLException("The configuration for services is mandatory")
+            for _, svc in config_struct.services.items():
+                svc.scaling = cls._fix_scaling(svc.scaling)
+        else:
+            config_struct.scaling = cls._fix_scaling(config_struct.scaling)
+        if config_struct.access_type is None:
+            config_struct.access_type = AccessControl.PUBLIC
+
     @classmethod
     def _get_default_kube_namespace(
         cls,
@@ -66,7 +182,7 @@ def _get_default_kube_namespace(
         context: str | None = None,
     ) -> str:
         cloud_rest_client = get_rest_api_client(context)
-        res = cloud_rest_client.get_cluster(cluster_name)
+        res = cloud_rest_client.v1.get_cluster(cluster_name)
         if not res:
             raise BentoMLException("Cannot get default kube namespace")
         return res.config.default_deployment_kube_namespace
@@ -74,330 +190,140 @@ def _get_default_kube_namespace(
     @classmethod
     def _get_default_cluster(cls, context: str | None = None) -> str:
         cloud_rest_client = get_rest_api_client(context)
-        res = cloud_rest_client.get_cluster_list(params={"count": 1})
+        res = cloud_rest_client.v1.get_cluster_list(params={"count": 1})
         if not res:
             raise BentoMLException("Failed to get list of clusters.")
         if not res.items:
             raise BentoMLException("Cannot get default clusters.")
         return res.items[0].name
 
-    @classmethod
-    def _create_deployment(
-        cls,
-        create_deployment_schema: CreateDeploymentSchema,
-        context: str | None = None,
-        cluster_name: str | None = None,
-    ) -> DeploymentSchema:
-        cloud_rest_client = get_rest_api_client(context)
-        if cluster_name is None:
-            cluster_name = cls._get_default_cluster(context)
-        if create_deployment_schema.kube_namespace is None:
-            create_deployment_schema.kube_namespace = cls._get_default_kube_namespace(
-                cluster_name, context
-            )
-        for target in create_deployment_schema.targets:
-            if (
-                cloud_rest_client.get_bento(target.bento_repository, target.bento)
-                is None
-            ):
-                raise BentoMLException(
-                    f"Create deployment: {target.bento_repository}:{target.bento} does not exist"
-                )
-        if (
-            cloud_rest_client.get_deployment(
-                cluster_name,
-                create_deployment_schema.kube_namespace,
-                create_deployment_schema.name,
-            )
-            is not None
-        ):
-            raise BentoMLException("Create deployment: Deployment already exists")
-        res = cloud_rest_client.create_deployment(
-            cluster_name, create_deployment_schema
-        )
+    def _refetch(self) -> None:
+        cloud_rest_client = get_rest_api_client(self.context)
+        res = cloud_rest_client.v2.get_deployment(self.cluster_name, self.name)
         if res is None:
-            raise BentoMLException("Create deployment request failed")
-        logger.debug("Deployment Schema: %s", create_deployment_schema)
-        return res
+            raise NotFound(f"deployment {self.name} is not found")
+        self._schema = res
+        self._urls = res.urls
 
-    @classmethod
-    def _update_deployment(
-        cls,
-        deployment_name: str,
-        update_deployment_schema: UpdateDeploymentSchema,
-        kube_namespace: str | None = None,
-        context: str | None = None,
-        cluster_name: str | None = None,
-    ) -> DeploymentSchema:
-        cloud_rest_client = get_rest_api_client(context)
-        if cluster_name is None:
-            cluster_name = cls._get_default_cluster(context)
-        if kube_namespace is None:
-            kube_namespace = cls._get_default_kube_namespace(cluster_name, context)
-        for target in update_deployment_schema.targets:
-            if (
-                cloud_rest_client.get_bento(target.bento_repository, target.bento)
-                is None
-            ):
-                raise BentoMLException(
-                    f"Update deployment: {target.bento_repository}:{target.bento} does not exist"
-                )
-            cloud_rest_client.get_deployment(
-                cluster_name,
-                kube_namespace,
-                deployment_name,
+    def _conver_schema_to_update_schema(self) -> dict[str, t.Any]:
+        if self._schema.latest_revision is None:
+            raise BentoMLException(
+                f"Deployment {self._schema.name} has no latest revision"
             )
-
-        res = cloud_rest_client.update_deployment(
-            cluster_name, kube_namespace, deployment_name, update_deployment_schema
+        target_schema = self._schema.latest_revision.targets[0]
+        if target_schema is None:
+            raise BentoMLException(f"Deployment {self._schema.name} has no target")
+        if target_schema.config is None:
+            raise BentoMLException(f"Deployment {self._schema.name} has no config")
+        if target_schema.bento is None:
+            raise BentoMLException(f"Deployment {self._schema.name} has no bento")
+        update_schema = UpdateDeploymentSchemaV2(
+            services=target_schema.config.services,
+            instance_type=target_schema.config.instance_type,
+            deployment_strategy=target_schema.config.deployment_strategy,
+            scaling=target_schema.config.scaling,
+            envs=target_schema.config.envs,
+            extras=target_schema.config.extras,
+            access_type=target_schema.config.access_type,
+            bentoml_config_overrides=target_schema.config.bentoml_config_overrides,
+            bento=target_schema.bento.repository.name + ":" + target_schema.bento.name,
+            cold_start_timeout=target_schema.config.cold_start_timeout,
         )
-        if res is None:
-            raise BentoMLException("Update deployment request failed")
-        logger.debug("%s is created.", deployment_name)
-        logger.debug("Deployment Schema: %s", update_deployment_schema)
-        return res
-
-    @classmethod
-    def update(
-        cls,
-        deployment_name: str,
-        bento: Tag | str | None = None,
-        description: str | None = None,
-        expose_endpoint: bool | None = None,
-        cluster_name: str | None = None,
-        kube_namespace: str | None = None,
-        resource_instance: str | None = None,
-        hpa_conf: DeploymentTargetHPAConf | None = None,
-        runners_config: dict[str, DeploymentTargetRunnerConfig] | None = None,
-        api_server_config: DeploymentTargetConfig | None = None,
-        mode: DeploymentMode | None = None,
-        type: DeploymentTargetType | None = None,
-        context: str | None = None,
-        labels: dict[str, str] | None = None,
-        canary_rules: t.List[DeploymentTargetCanaryRule] | None = None,
-        latest_bento: bool = False,
-    ) -> DeploymentSchema:
-        from bentoml import get as get_bento
-
-        if mode is None:
-            mode = DeploymentMode.Function
-        if type is None:
-            type = DeploymentTargetType.STABLE
-
-        if cluster_name is None:
-            cluster_name = cls._get_default_cluster(context)
-        if kube_namespace is None:
-            kube_namespace = cls._get_default_kube_namespace(cluster_name, context)
+        return bentoml_cattr.unstructure(update_schema)
 
-        base_schema = cls.get(deployment_name, context, cluster_name, kube_namespace)
-        # Deployment target always has length of 1
-        if base_schema.latest_revision is None:
+    def _conver_schema_to_bento(self) -> Tag:
+        if self._schema.latest_revision is None:
             raise BentoMLException(
-                f"Deployment {deployment_name} has no latest revision"
-            )
-        deployment_target = base_schema.latest_revision.targets[0]
-
-        if bento is None:
-            # NOTE: bento.repository.name is the bento.name, and bento.name is the bento.version
-            # from bentocloud to bentoml.Tag concept
-            bento = deployment_target.bento.repository.name
-        bento = Tag.from_taglike(bento)
-        if latest_bento and bento.version is None or bento.version == "latest":
-            bento = get_bento(bento).tag
-        elif bento.version is None:
-            bento.version = deployment_target.bento.name
-
-        updated_config = bentoml_cattr.unstructure(deployment_target.config)
-        if hpa_conf is not None:
-            hpa_conf_dct = bentoml_cattr.unstructure(hpa_conf)
-            if "hpa_conf" in updated_config:
-                if updated_config["hpa_conf"] is None:
-                    updated_config["hpa_conf"] = {}
-                config_merger.merge(updated_config["hpa_conf"], hpa_conf_dct)
-            if "runners" in updated_config and updated_config["runners"] is not None:
-                for _, runner in updated_config["runners"].items():
-                    if runner["hpa_conf"] is None:
-                        runner["hpa_conf"] = {}
-                    config_merger.merge(runner["hpa_conf"], hpa_conf_dct)
-        if resource_instance is not None:
-            updated_config["resource_instance"] = resource_instance
-            if updated_config.get("runners") is not None:
-                for runner in updated_config["runners"].values():
-                    runner["resource_instance"] = resource_instance
-        if expose_endpoint is not None:
-            updated_config["enable_ingress"] = expose_endpoint
-
-        if api_server_config is not None:
-            if runners_config is not None:
-                api_server_config.runners = runners_config
-            config_merger.merge(
-                updated_config, bentoml_cattr.unstructure(api_server_config)
+                f"Deployment {self._schema.name} has no latest revision"
             )
-        elif runners_config is not None:
-            config_merger.merge(
-                updated_config,
-                {
-                    "runners": {
-                        k: bentoml_cattr.unstructure(v)
-                        for k, v in runners_config.items()
-                    }
-                },
-            )
-
-        dct_update: dict[str, t.Any] = {
-            "mode": first_not_none(mode, base_schema.mode),
-            "labels": first_not_none(
-                [{"key": key, "value": value} for key, value in labels.items()]
-                if labels
-                else None,
-                [bentoml_cattr.unstructure(i) for i in base_schema.labels],
-            ),
-            "description": description,
-        }
-        update_target = {
-            "type": first_not_none(type, deployment_target.type),
-            "bento": first_not_none(bento.version, deployment_target.bento.name),
-            "bento_repository": first_not_none(
-                bento.name, deployment_target.bento.repository.name
-            ),
-            "config": updated_config,
-        }
-
-        rules = first_not_none(
-            [bentoml_cattr.unstructure(i) for i in canary_rules]
-            if canary_rules
-            else None,
-            [bentoml_cattr.unstructure(i) for i in deployment_target.canary_rules]
-            if deployment_target.canary_rules
-            else None,
+        target_schema = self._schema.latest_revision.targets[0]
+        if target_schema is None:
+            raise BentoMLException(f"Deployment {self._schema.name} has no target")
+        if target_schema.bento is None:
+            raise BentoMLException(f"Deployment {self._schema.name} has no bento")
+        return Tag.from_taglike(
+            target_schema.bento.repository.name + ":" + target_schema.bento.name
         )
-        if rules:
-            update_target["canary_rules"] = rules
-
-        # update the target
-        dct_update["targets"] = [update_target]
 
-        return cls._update_deployment(
-            deployment_name=deployment_name,
-            update_deployment_schema=bentoml_cattr.structure(
-                dct_update, UpdateDeploymentSchema
-            ),
-            context=context,
-            cluster_name=cluster_name,
-            kube_namespace=kube_namespace,
+    @property
+    def info(self) -> DeploymentInfo:
+        schema = self._conver_schema_to_update_schema()
+        del schema["bento"]
+        return DeploymentInfo(
+            name=self.name,
+            bento=self._conver_schema_to_bento(),
+            status=self._schema.status,
+            admin_console=self.get_bento_cloud_url(),
+            endpoint=self._urls[0] if self._urls else None,
+            config=schema,
+            created_at=self._schema.created_at.strftime("%Y-%m-%d %H:%M:%S"),
         )
 
-    @classmethod
-    def create(
-        cls,
-        deployment_name: str,
-        bento: Tag | str,
-        description: str | None = None,
-        expose_endpoint: bool | None = None,
-        cluster_name: str | None = None,
-        kube_namespace: str | None = None,
-        resource_instance: str | None = None,
-        hpa_conf: DeploymentTargetHPAConf | None = None,
-        runners_config: dict[str, DeploymentTargetRunnerConfig] | None = None,
-        api_server_config: DeploymentTargetConfig | None = None,
-        mode: DeploymentMode | None = None,
-        type: DeploymentTargetType | None = None,
-        context: str | None = None,
-        labels: dict[str, str] | None = None,
-        canary_rules: t.List[DeploymentTargetCanaryRule] | None = None,
-    ) -> DeploymentSchema:
-        if mode is None:
-            mode = DeploymentMode.Function
-        if type is None:
-            type = DeploymentTargetType.STABLE
-        bento_tag = Tag.from_taglike(bento)
+    def get_config(self) -> dict[str, t.Any]:
+        self._refetch()
+        res = self._conver_schema_to_update_schema()
+        # bento should not be in the deployment config
+        del res["bento"]
+        return res
 
-        dct: dict[str, t.Any] = {
-            "name": deployment_name,
-            "kube_namespace": kube_namespace,
-            "mode": mode,
-            "description": description,
-        }
-        if labels:
-            dct["labels"] = [
-                {"key": key, "value": value} for key, value in labels.items()
-            ]
-        if api_server_config is None:
-            _config: dict[str, t.Any] = {
-                "runners": {
-                    k: bentoml_cattr.unstructure(v) for k, v in runners_config.items()
-                }
-                if runners_config
-                else None,
-            }
-        else:
-            api_server_config.runners = runners_config
-            _config = bentoml_cattr.unstructure(api_server_config)
-
-        create_target: dict[str, t.Any] = {
-            "type": type,
-            "bento_repository": bento_tag.name,
-            "bento": bento_tag.version,
-            "config": _config,
-        }
-        if canary_rules:
-            create_target["canary_rules"] = [
-                bentoml_cattr.unstructure(i) for i in canary_rules
-            ]
-
-        # Only change the value by the top-level param if it is not provided already in api_server_config or runner_config
-        if hpa_conf:
-            hpa_conf_dct = bentoml_cattr.unstructure(hpa_conf)
-            _config_hpa_conf = _config.get("hpa_conf", None)
-            if _config_hpa_conf is None:
-                _config_hpa_conf = {}
-            for k, v in hpa_conf_dct.items():
-                if k not in _config_hpa_conf:
-                    _config_hpa_conf[k] = v
-                else:
-                    logger.warning(
-                        "Key %s is already set in API server config and will not be overwritten with hpa_conf.%s",
-                        k,
-                        k,
-                    )
-            _config["hpa_conf"] = _config_hpa_conf
-            if "runners" in _config and _config["runners"] is not None:
-                _runner_config = _config["runners"]
-                for runner in _runner_config.values():
-                    _runner_hpa_conf = runner.get("hpa_conf", None)
-                    if _runner_hpa_conf is None:
-                        _runner_hpa_conf = {}
-                    for k, v in hpa_conf_dct.items():
-                        if k not in _runner_hpa_conf:
-                            _runner_hpa_conf[k] = v
-                        else:
-                            logger.warning(
-                                "Key %s is already set in runner config and will not be overwritten with hpa_conf.%s",
-                                k,
-                                k,
-                            )
-                    runner["hpa_conf"] = _runner_hpa_conf
-
-        if resource_instance:
-            if "resource_instance" not in _config:
-                _config["resource_instance"] = resource_instance
-            if "runners" in _config and _config["runners"] is not None:
-                for runner in _config["runners"].values():
-                    if "resource_instance" not in runner:
-                        runner["resource_instance"] = resource_instance
-
-        if expose_endpoint is not None and _config.get("enable_ingress", None) is None:
-            _config["enable_ingress"] = expose_endpoint
-
-        # setup the create target itself
-        dct["targets"] = [create_target]
-
-        return cls._create_deployment(
-            context=context,
-            cluster_name=cluster_name,
-            create_deployment_schema=bentoml_cattr.structure(
-                dct, CreateDeploymentSchema
-            ),
+    def get_bento(self) -> str:
+        self._refetch()
+        return str(self._conver_schema_to_bento())
+
+    def get_status(self) -> str:
+        self._refetch()
+        return self._schema.status.value
+
+    def get_client(
+        self,
+        is_async: bool = False,
+        media_type: str = "application/json",
+        token: str | None = None,
+    ) -> SyncHTTPClient:
+        from _bentoml_impl.client import SyncHTTPClient
+
+        self._refetch()
+        if self._schema.status != DeploymentStatus.Running:
+            raise BentoMLException(f"Deployment status is {self._schema.status}")
+        if self._urls is None or len(self._urls) != 1:
+            raise BentoMLException("Deployment url is not ready")
+        return SyncHTTPClient(self._urls[0], media_type=media_type, token=token)
+
+    def get_bento_cloud_url(self) -> str:
+        client = get_rest_api_client(self.context)
+        namespace = self._get_default_kube_namespace(self.cluster_name, self.context)
+        return f"{client.v1.endpoint}/clusters/{self.cluster_name}/namespaces/{namespace}/deployments/{self.name}"
+
+    def get_async_client(
+        self,
+        media_type: str = "application/json",
+        token: str | None = None,
+    ) -> AsyncHTTPClient:
+        from _bentoml_impl.client import AsyncHTTPClient
+
+        self._refetch()
+        if self._schema.status != DeploymentStatus.Running:
+            raise BentoMLException(f"Deployment status is {self._schema.status}")
+        if self._urls is None or len(self._urls) != 1:
+            raise BentoMLException("Deployment url is not ready")
+        return AsyncHTTPClient(self._urls[0], media_type=media_type, token=token)
+
+    def wait_until_ready(self, timeout: int = 300, check_interval: int = 5) -> None:
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            status = self.get_status()
+            if status == DeploymentStatus.Running.value:
+                logger.info(
+                    f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] Deployment '{self.name}' is ready."
+                )
+                return
+            logger.info(
+                f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] Waiting for deployment '{self.name}' to be ready. Current status: '{status}'."
+            )
+            time.sleep(check_interval)
+
+        raise TimeoutError(
+            f"Timed out waiting for deployment '{self.name}' to be ready."
         )
 
     @classmethod
@@ -405,158 +331,344 @@ def list(
         cls,
         context: str | None = None,
         cluster_name: str | None = None,
-        query: str | None = None,
         search: str | None = None,
-        count: int | None = None,
-        start: int | None = None,
-    ) -> DeploymentListSchema:
+    ) -> list[Deployment]:
         cloud_rest_client = get_rest_api_client(context)
         if cluster_name is None:
-            cluster_name = cls._get_default_cluster(context)
-        if query or start or count or search:
-            params = {"start": start, "count": count, "search": search, "q": query}
-            res = cloud_rest_client.get_deployment_list(cluster_name, **params)
+            res_count = cloud_rest_client.v2.list_deployment(all=True, search=search)
+            if res_count is None:
+                raise BentoMLException("List deployments request failed")
+            if res_count.total == 0:
+                return []
+            res = cloud_rest_client.v2.list_deployment(
+                search=search, count=res_count.total, all=True
+            )
             if res is None:
                 raise BentoMLException("List deployments request failed")
-            return res
         else:
-            all_deployment = cloud_rest_client.get_deployment_list(cluster_name)
-            if all_deployment is None:
+            res_count = cloud_rest_client.v2.list_deployment(
+                cluster_name, search=search
+            )
+            if res_count is None:
+                raise NotFound(f"Cluster {cluster_name} is not found")
+            if res_count.total == 0:
+                return []
+            res = cloud_rest_client.v2.list_deployment(
+                cluster_name, search=search, count=res_count.total
+            )
+            if res is None:
                 raise BentoMLException("List deployments request failed")
-            return all_deployment
+        return [
+            Deployment(
+                name=schema.name,
+                context=context,
+                cluster_name=schema.cluster.name,
+                _schema=schema,
+            )
+            for schema in res.items
+        ]
 
     @classmethod
-    def create_from_file(
+    def create(
         cls,
-        path_or_stream: str | t.TextIO,
+        bento: Tag,
+        access_type: str | None = None,
+        name: str | None = None,
+        cluster_name: str | None = None,
+        scaling_min: int | None = None,
+        scaling_max: int | None = None,
+        instance_type: str | None = None,
+        strategy: str | None = None,
+        envs: t.List[dict[str, t.Any]] | None = None,
+        extras: dict[str, t.Any] | None = None,
+        config_dct: dict[str, t.Any] | None = None,
+        config_file: str | t.TextIO | None = None,
         path_context: str | None = None,
         context: str | None = None,
-    ) -> DeploymentSchema:
-        if isinstance(path_or_stream, str):
-            real_path = resolve_user_filepath(path_or_stream, path_context)
+    ) -> Deployment:
+        cloud_rest_client = get_rest_api_client(context)
+        dct: dict[str, t.Any] = {
+            "bento": str(bento),
+        }
+        if name:
+            dct["name"] = name
+        else:
+            # the cloud takes care of the name
+            dct["name"] = ""
+
+        if config_dct:
+            merging_dct = config_dct
+            pass
+        elif isinstance(config_file, str):
+            real_path = resolve_user_filepath(config_file, path_context)
             try:
                 with open(real_path, "r") as file:
-                    data = json.load(file)
+                    merging_dct = yaml.safe_load(file)
             except FileNotFoundError:
                 raise ValueError(f"File not found: {real_path}")
-            except json.JSONDecodeError as e:
-                raise ValueError(f"Error decoding JSON file: {real_path}\n{e}")
+            except yaml.YAMLError as exc:
+                logger.error("Error while parsing YAML file: %s", exc)
+                raise
             except Exception as e:
                 raise ValueError(
                     f"An error occurred while reading the file: {real_path}\n{e}"
                 )
+        elif config_file is not None:
+            try:
+                merging_dct = yaml.safe_load(config_file)
+            except yaml.YAMLError as exc:
+                logger.error("Error while parsing YAML config-file stream: %s", exc)
+                raise
         else:
-            # load the data from trusted stream
-            data = json.load(path_or_stream)
-        deployment_schema = bentoml_cattr.structure(data, FullDeploymentSchema)
-        return cls._create_deployment(
-            create_deployment_schema=deployment_schema,
+            merging_dct = {
+                "scaling": {"min_replicas": scaling_min, "max_replicas": scaling_max},
+                "instance_type": instance_type,
+                "deployment_strategy": strategy,
+                "envs": envs,
+                "extras": extras,
+                "access_type": access_type,
+                "cluster": cluster_name,
+            }
+        dct.update(merging_dct)
+
+        # add cluster
+        if "cluster" not in dct or dct["cluster"] is None:
+            cluster_name = cls._get_default_cluster(context)
+            dct["cluster"] = cluster_name
+
+        if "distributed" not in dct:
+            dct["distributed"] = (
+                "services" in dct
+                and dct["services"] is not None
+                and dct["services"] != {}
+            )
+
+        config_struct = bentoml_cattr.structure(dct, CreateDeploymentSchemaV2)
+        cls._fix_and_validate_schema(config_struct, dct["distributed"])
+
+        res = cloud_rest_client.v2.create_deployment(
+            create_schema=config_struct, cluster_name=config_struct.cluster
+        )
+        logger.debug("Deployment Schema: %s", config_struct)
+        return Deployment(
             context=context,
-            cluster_name=deployment_schema.cluster_name,
+            cluster_name=config_struct.cluster,
+            name=res.name,
+            _schema=res,
         )
 
     @classmethod
-    def update_from_file(
+    def update(
         cls,
-        path_or_stream: str | t.TextIO,
+        name: str,
+        bento: Tag | str | None = None,
+        access_type: str | None = None,
+        cluster_name: str | None = None,
+        scaling_min: int | None = None,
+        scaling_max: int | None = None,
+        instance_type: str | None = None,
+        strategy: str | None = None,
+        envs: t.List[dict[str, t.Any]] | None = None,
+        extras: dict[str, t.Any] | None = None,
+        config_dct: dict[str, t.Any] | None = None,
+        config_file: str | t.TextIO | None = None,
         path_context: str | None = None,
         context: str | None = None,
-    ) -> DeploymentSchema:
-        if isinstance(path_or_stream, str):
-            real_path = resolve_user_filepath(path_or_stream, path_context)
+    ) -> Deployment:
+        deployment = Deployment.get(
+            name=name, context=context, cluster_name=cluster_name
+        )
+        orig_dct = deployment._conver_schema_to_update_schema()
+        distributed = deployment._schema.distributed
+        cloud_rest_client = get_rest_api_client(context)
+        if bento:
+            orig_dct["bento"] = str(bento)
+
+        if config_dct:
+            merging_dct = config_dct
+            pass
+        elif isinstance(config_file, str):
+            real_path = resolve_user_filepath(config_file, path_context)
             try:
                 with open(real_path, "r") as file:
-                    data = json.load(file)
+                    merging_dct = yaml.safe_load(file)
             except FileNotFoundError:
                 raise ValueError(f"File not found: {real_path}")
-            except json.JSONDecodeError as e:
-                raise ValueError(f"Error decoding JSON file: {real_path}\n{e}")
+            except yaml.YAMLError as exc:
+                logger.error("Error while parsing YAML file: %s", exc)
+                raise
             except Exception as e:
                 raise ValueError(
                     f"An error occurred while reading the file: {real_path}\n{e}"
                 )
+        elif config_file is not None:
+            try:
+                merging_dct = yaml.safe_load(config_file)
+            except yaml.YAMLError as exc:
+                logger.error("Error while parsing YAML config-file stream: %s", exc)
+                raise
+
         else:
-            data = json.load(path_or_stream)
-        deployment_schema = bentoml_cattr.structure(data, FullDeploymentSchema)
-        return cls._update_deployment(
-            deployment_name=deployment_schema.name,
-            update_deployment_schema=deployment_schema,
-            context=context,
-            cluster_name=deployment_schema.cluster_name,
-            kube_namespace=deployment_schema.kube_namespace,
+            merging_dct: dict[str, t.Any] = {"scaling": {}}
+            if scaling_min is not None:
+                merging_dct["scaling"]["min_replicas"] = scaling_min
+            if scaling_max is not None:
+                merging_dct["scaling"]["max_replicas"] = scaling_max
+            if instance_type is not None:
+                merging_dct["instance_type"] = instance_type
+
+            if strategy is not None:
+                merging_dct["deployment_strategy"] = strategy
+
+            if envs is not None:
+                merging_dct["envs"] = envs
+
+            if extras is not None:
+                merging_dct["extras"] = extras
+
+            if access_type is not None:
+                merging_dct["access_type"] = access_type
+
+        config_merger.merge(orig_dct, merging_dct)
+
+        config_struct = bentoml_cattr.structure(orig_dct, UpdateDeploymentSchemaV2)
+
+        cls._fix_and_validate_schema(config_struct, distributed)
+
+        res = cloud_rest_client.v2.update_deployment(
+            cluster_name=deployment.cluster_name,
+            deployment_name=name,
+            update_schema=config_struct,
         )
+        if res is None:
+            raise NotFound(f"deployment {name} is not found")
+        logger.debug("Deployment Schema: %s", config_struct)
+        deployment._schema = res
+        deployment._urls = res.urls
+        return deployment
 
     @classmethod
-    def get(
+    def apply(
         cls,
-        deployment_name: str,
-        context: str | None = None,
+        name: str,
+        bento: Tag | None = None,
         cluster_name: str | None = None,
-        kube_namespace: str | None = None,
-    ) -> DeploymentSchema:
+        config_dct: dict[str, t.Any] | None = None,
+        config_file: str | None = None,
+        path_context: str | None = None,
+        context: str | None = None,
+    ) -> Deployment:
+        try:
+            deployment = Deployment.get(
+                name=name, context=context, cluster_name=cluster_name
+            )
+        except NotFound as e:
+            if bento is not None:
+                return cls.create(
+                    bento=bento,
+                    name=name,
+                    cluster_name=cluster_name,
+                    config_dct=config_dct,
+                    config_file=config_file,
+                    path_context=path_context,
+                    context=context,
+                )
+            else:
+                raise e
         cloud_rest_client = get_rest_api_client(context)
-        if cluster_name is None:
-            cluster_name = cls._get_default_cluster(context)
-        if kube_namespace is None:
-            kube_namespace = cls._get_default_kube_namespace(cluster_name, context)
-        res = cloud_rest_client.get_deployment(
-            cluster_name, kube_namespace, deployment_name
+        if bento is None:
+            bento = deployment._conver_schema_to_bento()
+
+        schema_dct: dict[str, t.Any] = {"bento": str(bento)}
+        distributed = deployment._schema.distributed
+
+        if config_file:
+            real_path = resolve_user_filepath(config_file, path_context)
+            try:
+                with open(real_path, "r") as file:
+                    config_dct = yaml.safe_load(file)
+            except FileNotFoundError:
+                raise ValueError(f"File not found: {real_path}")
+            except yaml.YAMLError as exc:
+                logger.error("Error while parsing YAML file: %s", exc)
+                raise
+            except Exception as e:
+                raise ValueError(
+                    f"An error occurred while reading the file: {real_path}\n{e}"
+                )
+        if config_dct is None:
+            raise BentoMLException("Apply a deployment needs a configuration input")
+
+        schema_dct.update(config_dct)
+        config_struct = bentoml_cattr.structure(schema_dct, UpdateDeploymentSchemaV2)
+        cls._fix_and_validate_schema(config_struct, distributed)
+
+        res = cloud_rest_client.v2.update_deployment(
+            deployment_name=name,
+            update_schema=config_struct,
+            cluster_name=deployment.cluster_name,
         )
         if res is None:
-            raise BentoMLException("Get deployment request failed")
-        return res
+            raise NotFound(f"deployment {name} is not found")
+        logger.debug("Deployment Schema: %s", config_struct)
+        deployment._schema = res
+        deployment._urls = res.urls
+        return deployment
 
     @classmethod
-    def delete(
+    def get(
         cls,
-        deployment_name: str,
+        name: str,
         context: str | None = None,
         cluster_name: str | None = None,
-        kube_namespace: str | None = None,
-    ) -> DeploymentSchema:
-        cloud_rest_client = get_rest_api_client(context)
+    ) -> Deployment:
         if cluster_name is None:
             cluster_name = cls._get_default_cluster(context)
-        if kube_namespace is None:
-            kube_namespace = cls._get_default_kube_namespace(cluster_name, context)
-        res = cloud_rest_client.get_deployment(
-            cluster_name,
-            kube_namespace,
-            deployment_name,
-        )
+        cloud_rest_client = get_rest_api_client(context)
+        res = cloud_rest_client.v2.get_deployment(cluster_name, name)
         if res is None:
-            raise BentoMLException("Delete deployment: Deployment does not exist")
+            raise NotFound(f"deployment {name} is not found")
 
-        res = cloud_rest_client.delete_deployment(
-            cluster_name, kube_namespace, deployment_name
+        deployment = Deployment(
+            context=context,
+            cluster_name=cluster_name,
+            name=name,
+            _schema=res,
+            _urls=res.urls,
         )
-        if res is None:
-            raise BentoMLException("Delete deployment request failed")
-        return res
+        return deployment
 
     @classmethod
     def terminate(
         cls,
-        deployment_name: str,
+        name: str,
         context: str | None = None,
         cluster_name: str | None = None,
-        kube_namespace: str | None = None,
-    ) -> DeploymentSchema:
+    ) -> Deployment:
         cloud_rest_client = get_rest_api_client(context)
         if cluster_name is None:
             cluster_name = cls._get_default_cluster(context)
-        if kube_namespace is None:
-            kube_namespace = cls._get_default_kube_namespace(cluster_name, context)
-        res = cloud_rest_client.get_deployment(
-            cluster_name,
-            kube_namespace,
-            deployment_name,
-        )
+        res = cloud_rest_client.v2.terminate_deployment(cluster_name, name)
         if res is None:
-            raise BentoMLException("Teminate deployment: Deployment does not exist")
-        res = cloud_rest_client.terminate_deployment(
-            cluster_name, kube_namespace, deployment_name
+            raise NotFound(f"Deployment {name} is not found")
+        return Deployment(
+            name=name,
+            cluster_name=cluster_name,
+            context=context,
+            _schema=res,
+            _urls=res.urls,
         )
+
+    @classmethod
+    def delete(
+        cls,
+        name: str,
+        context: str | None = None,
+        cluster_name: str | None = None,
+    ) -> None:
+        cloud_rest_client = get_rest_api_client(context)
+        if cluster_name is None:
+            cluster_name = cls._get_default_cluster(context)
+        res = cloud_rest_client.v2.delete_deployment(cluster_name, name)
         if res is None:
-            raise BentoMLException("Terminate deployment request failed")
-        return res
+            raise NotFound(f"Deployment {name} is not found")
diff --git a/src/bentoml/_internal/cloud/schemas/__init__.py b/src/bentoml/_internal/cloud/schemas/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/bentoml/_internal/cloud/schemas.py b/src/bentoml/_internal/cloud/schemas/modelschemas.py
similarity index 51%
rename from src/bentoml/_internal/cloud/schemas.py
rename to src/bentoml/_internal/cloud/schemas/modelschemas.py
index f8e7ef90bbc..2a2dfdc05c4 100644
--- a/src/bentoml/_internal/cloud/schemas.py
+++ b/src/bentoml/_internal/cloud/schemas/modelschemas.py
@@ -1,98 +1,20 @@
 from __future__ import annotations
 
-import json
 import typing as t
-from datetime import datetime
 from enum import Enum
 from typing import TYPE_CHECKING
 
 import attr
-import cattr
-from dateutil.parser import parse
 
-from bentoml._internal.tag import Tag
+from bentoml._internal.cloud.schemas.utils import dict_options_converter
 
-from ..bento.bento import BentoServiceInfo
+from ...bento.bento import BentoServiceInfo
 
 time_format = "%Y-%m-%d %H:%M:%S.%f"
 
 T = t.TypeVar("T")
 
 
-def datetime_encoder(time_obj: t.Optional[datetime]) -> t.Optional[str]:
-    if not time_obj:
-        return None
-    return time_obj.strftime(time_format)
-
-
-def datetime_decoder(datetime_str: t.Optional[str], _: t.Any) -> t.Optional[datetime]:
-    if not datetime_str:
-        return None
-    return parse(datetime_str)
-
-
-def tag_encoder(tag_obj: t.Optional[Tag]) -> t.Optional[str]:
-    if not tag_obj:
-        return None
-    return str(tag_obj)
-
-
-def tag_decoder(tag_str: t.Optional[str], _: t.Any) -> t.Optional[Tag]:
-    if not tag_str:
-        return None
-    return Tag.from_str(tag_str)
-
-
-def dict_options_converter(
-    options_type: type[T],
-) -> t.Callable[[T | dict[str, T]], T]:
-    def _converter(value: T | dict[str, T] | None) -> T:
-        if value is None:
-            return options_type()
-        if isinstance(value, dict):
-            return options_type(**value)
-        return value
-
-    return _converter
-
-
-cloud_converter = cattr.Converter()
-
-cloud_converter.register_unstructure_hook(datetime, datetime_encoder)
-cloud_converter.register_structure_hook(datetime, datetime_decoder)
-cloud_converter.register_unstructure_hook(Tag, tag_encoder)
-cloud_converter.register_structure_hook(Tag, tag_decoder)
-
-
-def schema_from_json(json_content: str, cls: t.Type[T]) -> T:
-    dct = json.loads(json_content)
-    return cloud_converter.structure(dct, cls)
-
-
-def schema_to_json(obj: t.Any) -> str:
-    res = cloud_converter.unstructure(obj, obj.__class__)
-    return json.dumps(res)
-
-
-def schema_from_object(obj: t.Any, cls: t.Type[T]) -> T:
-    return cloud_converter.structure(obj, cls)
-
-
-@attr.define
-class BaseSchema:
-    uid: str
-    created_at: datetime
-    updated_at: t.Optional[datetime]
-    deleted_at: t.Optional[datetime]
-
-
-@attr.define
-class BaseListSchema:
-    start: int
-    count: int
-    total: int
-
-
 class ResourceType(Enum):
     USER = "user"
     ORG = "organization"
@@ -112,58 +34,6 @@ class ResourceType(Enum):
     ResourceInstance = "resource_instance"
 
 
-@attr.define
-class ResourceSchema(BaseSchema):
-    name: str
-    resource_type: ResourceType
-    labels: t.List[LabelItemSchema]
-
-
-@attr.define
-class UserSchema:
-    name: str
-    email: str
-    first_name: str
-    last_name: str
-
-    def get_name(self) -> str:
-        if not self.first_name and not self.last_name:
-            return self.name
-        return f"{self.first_name} {self.last_name}".strip()
-
-
-@attr.define
-class OrganizationSchema(ResourceSchema):
-    description: str
-
-
-@attr.define
-class OrganizationListSchema(BaseListSchema):
-    items: t.List[OrganizationSchema]
-
-
-@attr.define
-class ClusterSchema(ResourceSchema):
-    description: str
-    creator: UserSchema
-
-
-@attr.define
-class ClusterConfigSchema:
-    default_deployment_kube_namespace: str
-
-
-@attr.define
-class ClusterListSchema(BaseListSchema):
-    items: t.List[ClusterSchema]
-
-
-@attr.define
-class CreateBentoRepositorySchema:
-    name: str
-    description: str
-
-
 class BentoImageBuildStatus(Enum):
     PENDING = "pending"
     BUILDING = "building"
@@ -203,10 +73,10 @@ class BentoRunnerSchema:
 
 @attr.define
 class BentoManifestSchema:
-    name: str
     service: str
     bentoml_version: str
     size_bytes: int
+    name: t.Optional[str] = attr.field(default=None)
     apis: t.Dict[str, BentoApiSchema] = attr.field(factory=dict)
     models: t.List[str] = attr.field(factory=list)
     runners: t.Optional[t.List[BentoRunnerSchema]] = attr.field(factory=list)
@@ -220,90 +90,6 @@ class BentoManifestSchema:
     TransmissionStrategy = str
 
 
-@attr.define
-class BentoSchema(ResourceSchema):
-    description: str
-    version: str
-    image_build_status: BentoImageBuildStatus
-    upload_status: BentoUploadStatus
-    upload_finished_reason: str
-    presigned_upload_url: str
-    presigned_download_url: str
-    manifest: BentoManifestSchema
-    transmission_strategy: t.Optional[TransmissionStrategy] = attr.field(default=None)
-    upload_id: t.Optional[str] = attr.field(default=None)
-
-    upload_started_at: t.Optional[datetime] = attr.field(default=None)
-    upload_finished_at: t.Optional[datetime] = attr.field(default=None)
-    build_at: datetime = attr.field(factory=datetime.now)
-
-
-@attr.define
-class BentoRepositorySchema(ResourceSchema):
-    description: str
-    latest_bento: t.Optional[BentoSchema]
-
-
-@attr.define
-class BentoWithRepositorySchema(BentoSchema):
-    repository: BentoRepositorySchema = attr.field(default=None)
-
-
-@attr.define
-class BentoWithRepositoryListSchema(BaseListSchema):
-    items: t.List[BentoWithRepositorySchema] = attr.field(factory=list)
-
-
-@attr.define
-class CreateBentoSchema:
-    description: str
-    version: str
-    manifest: BentoManifestSchema
-    build_at: datetime = attr.field(factory=datetime.now)
-    labels: t.List[LabelItemSchema] = attr.field(factory=list)
-
-
-@attr.define
-class UpdateBentoSchema:
-    manifest: t.Optional[BentoManifestSchema] = attr.field(default=None)
-    labels: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None)
-
-
-@attr.define
-class BentoFullSchema(BentoWithRepositorySchema):
-    models: t.List[ModelWithRepositorySchema] = attr.field(factory=list)
-
-
-@attr.define
-class PreSignMultipartUploadUrlSchema:
-    upload_id: str
-    part_number: int
-
-
-@attr.define
-class CompletePartSchema:
-    part_number: int
-    etag: str
-
-
-@attr.define
-class CompleteMultipartUploadSchema:
-    parts: t.List[CompletePartSchema]
-    upload_id: str
-
-
-@attr.define
-class FinishUploadBentoSchema:
-    status: t.Optional[BentoUploadStatus]
-    reason: t.Optional[str]
-
-
-@attr.define
-class CreateModelRepositorySchema:
-    name: str
-    description: str
-
-
 class ModelImageBuildStatus(Enum):
     PENDING = "pending"
     BUILDING = "building"
@@ -329,66 +115,6 @@ class ModelManifestSchema:
     options: t.Dict[str, t.Any] = attr.field(factory=dict)
 
 
-@attr.define
-class ModelSchema(ResourceSchema):
-    description: str
-    version: str
-    image_build_status: ModelImageBuildStatus
-    upload_status: ModelUploadStatus
-    upload_finished_reason: str
-    presigned_upload_url: str
-    presigned_download_url: str
-    manifest: ModelManifestSchema
-
-    transmission_strategy: t.Optional[TransmissionStrategy] = attr.field(default=None)
-    upload_id: t.Optional[str] = attr.field(default=None)
-
-    upload_started_at: t.Optional[datetime] = attr.field(default=None)
-    upload_finished_at: t.Optional[datetime] = attr.field(default=None)
-    build_at: datetime = attr.field(factory=datetime.now)
-
-
-@attr.define
-class ModelRepositorySchema(ResourceSchema):
-    description: str
-    latest_model: t.Optional[ModelSchema]
-
-
-@attr.define
-class ModelWithRepositorySchema(ModelSchema):
-    repository: ModelRepositorySchema = attr.field(default=None)
-
-
-@attr.define
-class ModelWithRepositoryListSchema(BaseListSchema):
-    items: t.List[ModelWithRepositorySchema] = attr.field(factory=list)
-
-
-@attr.define
-class CreateModelSchema:
-    description: str
-    version: str
-    manifest: ModelManifestSchema
-    build_at: datetime = attr.field(factory=datetime.now)
-    labels: t.List[LabelItemSchema] = attr.field(factory=list)
-
-
-@attr.define
-class FinishUploadModelSchema:
-    status: t.Optional[ModelUploadStatus]
-    reason: t.Optional[str]
-
-
-@attr.define
-class BentoRepositoryListSchema(BaseListSchema):
-    items: t.List[BentoRepositorySchema]
-
-
-@attr.define
-class BentoListSchema(BaseListSchema):
-    items: t.List[BentoSchema]
-
-
 class DeploymentTargetCanaryRuleType(Enum):
     WEIGHT = "weight"
     HEADER = "header"
@@ -415,6 +141,25 @@ class ApiServerBentoDeploymentOverrides:
     extraPodSpec: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None)
 
 
+@attr.define
+class ApiServerBentoFunctionOverrides:
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    annotations: t.Optional[t.Dict[str, str]] = attr.field(default=None)
+    monitorExporter: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None)
+    extraPodMetadata: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None)
+    extraPodSpec: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None)
+
+
+@attr.define
+class RunnerBentoFunctionOverrides:
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    annotations: t.Optional[t.Dict[str, str]] = attr.field(default=None)
+    extraPodMetadata: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None)
+    extraPodSpec: t.Optional[t.Dict[str, t.Any]] = attr.field(default=None)
+
+
 @attr.define
 class RunnerBentoDeploymentOverrides:
     __omit_if_default__ = True
@@ -484,10 +229,6 @@ class HPAPolicy:
 class DeploymentTargetHPAConf:
     __omit_if_default__ = True
     __forbid_extra_keys__ = True
-    cpu: t.Optional[int] = attr.field(default=None)
-    gpu: t.Optional[int] = attr.field(default=None)
-    memory: t.Optional[str] = attr.field(default=None)
-    qps: t.Optional[int] = attr.field(default=None)
     min_replicas: t.Optional[int] = attr.field(default=None)
     max_replicas: t.Optional[int] = attr.field(default=None)
     policy: t.Optional[HPAPolicy] = attr.field(default=None)
@@ -534,6 +275,12 @@ class DeploymentStrategy(Enum):
     BestEffortControlledRollout = "BestEffortControlledRollout"
 
 
+class AccessControl(Enum):
+    PUBLIC = "public"
+    PROTECTED = "protected"
+    PRIVATE = "private"
+
+
 @attr.define
 class DeploymentTargetRunnerConfig:
     __omit_if_default__ = True
@@ -541,7 +288,7 @@ class DeploymentTargetRunnerConfig:
     resource_instance: t.Optional[str] = attr.field(default=None)
     resources: t.Optional[DeploymentTargetResources] = attr.field(default=None)
     hpa_conf: t.Optional[DeploymentTargetHPAConf] = attr.field(default=None)
-    envs: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None)
+    envs: t.Optional[t.List[t.Optional[LabelItemSchema]]] = attr.field(default=None)
     enable_stealing_traffic_debug_mode: t.Optional[bool] = attr.field(default=None)
     enable_debug_mode: t.Optional[bool] = attr.field(default=None)
     enable_debug_pod_receive_production_traffic: t.Optional[bool] = attr.field(
@@ -551,6 +298,9 @@ class DeploymentTargetRunnerConfig:
     bento_deployment_overrides: t.Optional[RunnerBentoDeploymentOverrides] = attr.field(
         default=None
     )
+    bento_function_overrides: t.Optional[RunnerBentoFunctionOverrides] = attr.field(
+        default=None
+    )
     traffic_control: t.Optional[TrafficControlConfig] = attr.field(default=None)
     deployment_cold_start_wait_timeout: t.Optional[int] = attr.field(default=None)
 
@@ -564,18 +314,18 @@ class DeploymentTargetType(Enum):
 class DeploymentTargetConfig:
     __omit_if_default__ = True
     __forbid_extra_keys__ = True
-    resources: DeploymentTargetResources = attr.field(
+    resources: t.Optional[DeploymentTargetResources] = attr.field(
         default=None, converter=dict_options_converter(DeploymentTargetResources)
     )
     kubeResourceUid: str = attr.field(default="")  # empty str
     kubeResourceVersion: str = attr.field(default="")
     resource_instance: t.Optional[str] = attr.field(default=None)
     hpa_conf: t.Optional[DeploymentTargetHPAConf] = attr.field(default=None)
-    envs: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None)
+    envs: t.Optional[t.List[t.Optional[LabelItemSchema]]] = attr.field(default=None)
     runners: t.Optional[t.Dict[str, DeploymentTargetRunnerConfig]] = attr.field(
         default=None
     )
-    access_control: t.Optional[str] = attr.field(default=None)
+    access_control: t.Optional[AccessControl] = attr.field(default=None)
     enable_ingress: t.Optional[bool] = attr.field(default=None)  # false for enables
     enable_stealing_traffic_debug_mode: t.Optional[bool] = attr.field(default=None)
     enable_debug_mode: t.Optional[bool] = attr.field(default=None)
@@ -591,23 +341,38 @@ class DeploymentTargetConfig:
     bento_request_overrides: t.Optional[BentoRequestOverrides] = attr.field(
         default=None
     )  # Put into image builder
+    bento_function_overrides: t.Optional[ApiServerBentoFunctionOverrides] = attr.field(
+        default=None
+    )
     traffic_control: t.Optional[TrafficControlConfig] = attr.field(default=None)
     deployment_cold_start_wait_timeout: t.Optional[int] = attr.field(default=None)
+    bentoml_config_overrides: t.Optional[dict[str, t.Any]] = attr.field(default=None)
 
 
 @attr.define
-class CreateDeploymentTargetSchema:
+class ExtraDeploymentOverrides:
     __omit_if_default__ = True
     __forbid_extra_keys__ = True
-    type: DeploymentTargetType  # stable by default
-    bento_repository: str
-    bento: str
-    config: DeploymentTargetConfig
-    canary_rules: t.Optional[t.List[DeploymentTargetCanaryRule]] = attr.field(
+    bento_function_overrides: t.Optional[ApiServerBentoFunctionOverrides] = attr.field(
+        default=None
+    )
+    bento_request_overrides: t.Optional[BentoRequestOverrides] = attr.field(
         default=None
     )
 
 
+@attr.define
+class DeploymentServiceConfig:
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    instance_type: t.Optional[str] = attr.field(default=None)
+    scaling: t.Optional[DeploymentTargetHPAConf] = attr.field(default=None)
+    envs: t.Optional[t.List[t.Optional[LabelItemSchema]]] = attr.field(default=None)
+    deployment_strategy: t.Optional[DeploymentStrategy] = attr.field(default=None)
+    extras: t.Optional[ExtraDeploymentOverrides] = attr.field(default=None)
+    cold_start_timeout: t.Optional[int] = attr.field(default=None)
+
+
 class DeploymentStatus(Enum):
     Unknown = "unknown"
     NonDeployed = "non-deployed"
@@ -622,45 +387,9 @@ class DeploymentStatus(Enum):
     ImageBuildSucceeded = "image-build-succeeded"
 
 
-@attr.define
-class DeploymentSchema(ResourceSchema):
-    __omit_if_default__ = True
-    __forbid_extra_keys__ = True
-    creator: UserSchema
-    cluster: ClusterSchema
-    status: DeploymentStatus
-    kube_namespace: str
-    latest_revision: t.Optional[DeploymentRevisionSchema] = attr.field(
-        default=None
-    )  # Delete returns no latest revision
-    mode: t.Optional[DeploymentMode] = attr.field(default=None)
-
-
-@attr.define
-class DeploymentTargetSchema(ResourceSchema):
-    __omit_if_default__ = True
-    __forbid_extra_keys__ = True
-    creator: UserSchema
-    type: DeploymentTargetType
-    bento: BentoFullSchema
-    config: DeploymentTargetConfig
-    canary_rules: t.Optional[t.List[DeploymentTargetCanaryRule]] = attr.field(
-        default=None
-    )
-
-
-class DeploymentRevisionStatus(Enum):
-    ACTIVE = "active"
-    INACTIVE = "inactive"
-
-
-@attr.define
-class DeploymentRevisionSchema(ResourceSchema):
-    __omit_if_default__ = True
-    __forbid_extra_keys__ = True
-    creator: UserSchema
-    status: DeploymentRevisionStatus
-    targets: t.List[DeploymentTargetSchema]
+class DeploymentMode(Enum):
+    Deployment = "deployment"
+    Function = "function"
 
 
 @attr.define
@@ -671,57 +400,6 @@ class ResourceInstanceConfigSchema:
     node_selectors: t.Optional[t.Dict[str, str]] = attr.field(factory=dict)
 
 
-@attr.define
-class ResourceInstanceSchema(ResourceSchema):
-    display_name: str
-    description: str
-    config: ResourceInstanceConfigSchema
-
-
-@attr.define
-class ClusterFullSchema(ClusterSchema):
-    __omit_if_default__ = True
-    __forbid_extra_keys__ = True
-    organization: OrganizationSchema
-    kube_config: str
-    config: ClusterConfigSchema
-    grafana_root_path: str
-    resource_instances: t.List[ResourceInstanceSchema]
-
-
-@attr.define
-class DeploymentListSchema(BaseListSchema):
-    __omit_if_default__ = True
-    __forbid_extra_keys__ = True
-    items: t.List[DeploymentSchema]
-
-
-class DeploymentMode(Enum):
-    Deployment = "deployment"
-    Function = "function"
-
-
-@attr.define
-class UpdateDeploymentSchema:
-    __omit_if_default__ = True
-    __forbid_extra_keys__ = True
-    targets: t.List[CreateDeploymentTargetSchema]
-    mode: t.Optional[DeploymentMode] = attr.field(default=None)
-    labels: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None)
-    description: t.Optional[str] = attr.field(default=None)
-    do_not_deploy: t.Optional[bool] = attr.field(default=None)
-
-
-@attr.define
-class CreateDeploymentSchema(UpdateDeploymentSchema):
-    __omit_if_default__ = True
-    __forbid_extra_keys__ = True
-    name: str = attr.field(default=None)
-    kube_namespace: t.Optional[str] = attr.field(default=None)
-
-
-@attr.define
-class FullDeploymentSchema(CreateDeploymentSchema):
-    __omit_if_default__ = True
-    __forbid_extra_keys__ = True
-    cluster_name: t.Optional[str] = attr.field(default=None)
+class DeploymentRevisionStatus(Enum):
+    ACTIVE = "active"
+    INACTIVE = "inactive"
diff --git a/src/bentoml/_internal/cloud/schemas/schemasv1.py b/src/bentoml/_internal/cloud/schemas/schemasv1.py
new file mode 100644
index 00000000000..79c4d2ec554
--- /dev/null
+++ b/src/bentoml/_internal/cloud/schemas/schemasv1.py
@@ -0,0 +1,337 @@
+from __future__ import annotations
+
+import typing as t
+from datetime import datetime
+
+import attr
+
+from bentoml._internal.cloud.schemas.modelschemas import BentoImageBuildStatus
+from bentoml._internal.cloud.schemas.modelschemas import BentoManifestSchema
+from bentoml._internal.cloud.schemas.modelschemas import BentoUploadStatus
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentMode
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentRevisionStatus
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentStatus
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentTargetCanaryRule
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentTargetConfig
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentTargetType
+from bentoml._internal.cloud.schemas.modelschemas import LabelItemSchema
+from bentoml._internal.cloud.schemas.modelschemas import ModelImageBuildStatus
+from bentoml._internal.cloud.schemas.modelschemas import ModelManifestSchema
+from bentoml._internal.cloud.schemas.modelschemas import ModelUploadStatus
+from bentoml._internal.cloud.schemas.modelschemas import ResourceInstanceConfigSchema
+from bentoml._internal.cloud.schemas.modelschemas import ResourceType
+from bentoml._internal.cloud.schemas.modelschemas import TransmissionStrategy
+
+
+@attr.define
+class BaseSchema:
+    uid: str
+    created_at: datetime
+    updated_at: t.Optional[datetime]
+    deleted_at: t.Optional[datetime]
+
+
+@attr.define
+class BaseListSchema:
+    start: int
+    count: int
+    total: int
+
+
+@attr.define
+class ResourceSchema(BaseSchema):
+    name: str
+    resource_type: ResourceType
+    labels: t.List[LabelItemSchema]
+
+
+@attr.define
+class UserSchema:
+    name: str
+    email: str
+    first_name: str
+    last_name: str
+
+    def get_name(self) -> str:
+        if not self.first_name and not self.last_name:
+            return self.name
+        return f"{self.first_name} {self.last_name}".strip()
+
+
+@attr.define
+class OrganizationSchema(ResourceSchema):
+    description: str
+
+
+@attr.define
+class OrganizationListSchema(BaseListSchema):
+    items: t.List[OrganizationSchema]
+
+
+@attr.define
+class ClusterSchema(ResourceSchema):
+    description: str
+    creator: UserSchema
+
+
+@attr.define
+class ClusterConfigSchema:
+    default_deployment_kube_namespace: str
+
+
+@attr.define
+class ClusterListSchema(BaseListSchema):
+    items: t.List[ClusterSchema]
+
+
+@attr.define
+class CreateBentoRepositorySchema:
+    name: str
+    description: str
+
+
+@attr.define
+class BentoSchema(ResourceSchema):
+    description: str
+    version: str
+    image_build_status: BentoImageBuildStatus
+    upload_status: BentoUploadStatus
+    upload_finished_reason: str
+    presigned_upload_url: str
+    presigned_download_url: str
+    manifest: t.Optional[BentoManifestSchema] = attr.field(default=None)
+    transmission_strategy: t.Optional[TransmissionStrategy] = attr.field(default=None)
+    upload_id: t.Optional[str] = attr.field(default=None)
+
+    upload_started_at: t.Optional[datetime] = attr.field(default=None)
+    upload_finished_at: t.Optional[datetime] = attr.field(default=None)
+    build_at: datetime = attr.field(factory=datetime.now)
+
+
+@attr.define
+class BentoRepositorySchema(ResourceSchema):
+    description: str
+    latest_bento: t.Optional[BentoSchema]
+
+
+@attr.define
+class BentoWithRepositorySchema(BentoSchema):
+    repository: BentoRepositorySchema = attr.field(default=None)
+
+
+@attr.define
+class BentoWithRepositoryListSchema(BaseListSchema):
+    items: t.List[BentoWithRepositorySchema] = attr.field(factory=list)
+
+
+@attr.define
+class CreateBentoSchema:
+    description: str
+    version: str
+    manifest: t.Optional[BentoManifestSchema] = attr.field(default=None)
+    build_at: datetime = attr.field(factory=datetime.now)
+    labels: t.List[LabelItemSchema] = attr.field(factory=list)
+
+
+@attr.define
+class UpdateBentoSchema:
+    manifest: t.Optional[BentoManifestSchema] = attr.field(default=None)
+    labels: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None)
+
+
+@attr.define
+class BentoFullSchema(BentoWithRepositorySchema):
+    models: t.List[ModelWithRepositorySchema] = attr.field(factory=list)
+
+
+@attr.define
+class PreSignMultipartUploadUrlSchema:
+    upload_id: str
+    part_number: int
+
+
+@attr.define
+class CompletePartSchema:
+    part_number: int
+    etag: str
+
+
+@attr.define
+class CompleteMultipartUploadSchema:
+    parts: t.List[CompletePartSchema]
+    upload_id: str
+
+
+@attr.define
+class FinishUploadBentoSchema:
+    status: t.Optional[BentoUploadStatus]
+    reason: t.Optional[str]
+
+
+@attr.define
+class CreateModelRepositorySchema:
+    name: str
+    description: str
+
+
+@attr.define
+class ModelSchema(ResourceSchema):
+    description: str
+    version: str
+    image_build_status: ModelImageBuildStatus
+    upload_status: ModelUploadStatus
+    upload_finished_reason: str
+    presigned_upload_url: str
+    presigned_download_url: str
+    manifest: ModelManifestSchema
+
+    transmission_strategy: t.Optional[TransmissionStrategy] = attr.field(default=None)
+    upload_id: t.Optional[str] = attr.field(default=None)
+
+    upload_started_at: t.Optional[datetime] = attr.field(default=None)
+    upload_finished_at: t.Optional[datetime] = attr.field(default=None)
+    build_at: datetime = attr.field(factory=datetime.now)
+
+
+@attr.define
+class ModelRepositorySchema(ResourceSchema):
+    description: str
+    latest_model: t.Optional[ModelSchema]
+
+
+@attr.define
+class ModelWithRepositorySchema(ModelSchema):
+    repository: ModelRepositorySchema = attr.field(default=None)
+
+
+@attr.define
+class ModelWithRepositoryListSchema(BaseListSchema):
+    items: t.List[ModelWithRepositorySchema] = attr.field(factory=list)
+
+
+@attr.define
+class CreateModelSchema:
+    description: str
+    version: str
+    manifest: ModelManifestSchema
+    build_at: datetime = attr.field(factory=datetime.now)
+    labels: t.List[LabelItemSchema] = attr.field(factory=list)
+
+
+@attr.define
+class FinishUploadModelSchema:
+    status: t.Optional[ModelUploadStatus]
+    reason: t.Optional[str]
+
+
+@attr.define
+class BentoRepositoryListSchema(BaseListSchema):
+    items: t.List[BentoRepositorySchema]
+
+
+@attr.define
+class BentoListSchema(BaseListSchema):
+    items: t.List[BentoSchema]
+
+
+@attr.define
+class CreateDeploymentTargetSchema:
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    type: DeploymentTargetType  # stable by default
+    bento_repository: str
+    bento: str
+    config: DeploymentTargetConfig
+    canary_rules: t.Optional[t.List[DeploymentTargetCanaryRule]] = attr.field(
+        default=None
+    )
+
+
+@attr.define
+class DeploymentSchema(ResourceSchema):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    creator: UserSchema
+    cluster: ClusterSchema
+    status: DeploymentStatus
+    kube_namespace: str
+    distributed: bool = attr.field(default=False)
+    latest_revision: t.Optional[DeploymentRevisionSchema] = attr.field(
+        default=None
+    )  # Delete returns no latest revision
+    mode: t.Optional[DeploymentMode] = attr.field(default=None)
+
+
+@attr.define
+class DeploymentTargetSchema(ResourceSchema):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    creator: UserSchema
+    type: DeploymentTargetType
+    bento: BentoFullSchema
+    config: DeploymentTargetConfig
+    canary_rules: t.Optional[t.List[DeploymentTargetCanaryRule]] = attr.field(
+        default=None
+    )
+
+
+@attr.define
+class DeploymentRevisionSchema(ResourceSchema):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    creator: UserSchema
+    status: DeploymentRevisionStatus
+    targets: t.List[DeploymentTargetSchema]
+
+
+@attr.define
+class ResourceInstanceSchema(ResourceSchema):
+    display_name: str
+    description: str
+    config: ResourceInstanceConfigSchema
+
+
+@attr.define
+class ClusterFullSchema(ClusterSchema):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    organization: OrganizationSchema
+    kube_config: str
+    config: ClusterConfigSchema
+    grafana_root_path: str
+    resource_instances: t.List[ResourceInstanceSchema]
+
+
+@attr.define
+class DeploymentListSchema(BaseListSchema):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    items: t.List[DeploymentSchema]
+
+
+@attr.define
+class UpdateDeploymentSchema:
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    targets: t.List[CreateDeploymentTargetSchema]
+    mode: t.Optional[DeploymentMode] = attr.field(default=None)
+    labels: t.Optional[t.List[LabelItemSchema]] = attr.field(default=None)
+    description: t.Optional[str] = attr.field(default=None)
+    do_not_deploy: t.Optional[bool] = attr.field(default=None)
+
+
+@attr.define(kw_only=True)
+class CreateDeploymentSchema(UpdateDeploymentSchema):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    name: str
+    kube_namespace: str
+    # cluster: str
+    distributed: t.Optional[bool] = attr.field(default=False)
+
+
+@attr.define(kw_only=True)
+class DeploymentFullSchema(DeploymentSchema):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    urls: list[str]
diff --git a/src/bentoml/_internal/cloud/schemas/schemasv2.py b/src/bentoml/_internal/cloud/schemas/schemasv2.py
new file mode 100644
index 00000000000..d81a9e21e9f
--- /dev/null
+++ b/src/bentoml/_internal/cloud/schemas/schemasv2.py
@@ -0,0 +1,94 @@
+from __future__ import annotations
+
+import typing as t
+
+import attr
+
+from bentoml._internal.cloud.schemas.modelschemas import AccessControl
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentMode
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentRevisionStatus
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentServiceConfig
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentStatus
+from bentoml._internal.cloud.schemas.schemasv1 import BaseListSchema
+from bentoml._internal.cloud.schemas.schemasv1 import BentoWithRepositorySchema
+from bentoml._internal.cloud.schemas.schemasv1 import ClusterSchema
+from bentoml._internal.cloud.schemas.schemasv1 import ResourceSchema
+from bentoml._internal.cloud.schemas.schemasv1 import UserSchema
+
+
+@attr.define
+class DeploymentTargetSchema(ResourceSchema):
+    creator: t.Optional[UserSchema]
+    config: t.Optional[DeploymentTargetConfig]
+    bento: t.Optional[BentoWithRepositorySchema]
+
+
+@attr.define
+class DeploymentTargetConfig(DeploymentServiceConfig):
+    kube_resource_uid: t.Optional[str] = attr.field(default=None)
+    kube_resource_version: t.Optional[str] = attr.field(default=None)
+    services: t.Dict[str, DeploymentServiceConfig] = attr.field(factory=dict)
+    access_type: t.Optional[AccessControl] = attr.field(default=None)
+    bentoml_config_overrides: t.Dict[str, t.Optional[t.Any]] = attr.field(factory=dict)
+
+
+@attr.define
+class DeploymentTargetListSchema(BaseListSchema):
+    items: t.List[t.Optional[DeploymentTargetSchema]]
+
+
+@attr.define
+class DeploymentRevisionSchema(ResourceSchema):
+    creator: t.Optional[UserSchema]
+    status: DeploymentRevisionStatus
+    targets: t.List[t.Optional[DeploymentTargetSchema]]
+
+
+@attr.define
+class DeploymentRevisionListSchema(BaseListSchema):
+    items: t.List[t.Optional[DeploymentRevisionSchema]]
+
+
+@attr.define(kw_only=True)
+class UpdateDeploymentSchema(DeploymentServiceConfig):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = False  # distributed, cluster and name need to be ignored
+    bento: str
+    access_type: t.Optional[AccessControl] = attr.field(default=None)
+    description: t.Optional[str] = attr.field(default=None)
+    services: t.Dict[str, DeploymentServiceConfig] = attr.field(factory=dict)
+    bentoml_config_overrides: t.Dict[str, t.Any] = attr.field(factory=dict)
+
+
+@attr.define(kw_only=True)
+class CreateDeploymentSchema(UpdateDeploymentSchema):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    name: str
+    cluster: str
+    distributed: bool
+
+
+@attr.define
+class DeploymentSchema(ResourceSchema):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    status: DeploymentStatus
+    kube_namespace: str
+    creator: t.Optional[UserSchema]
+    cluster: t.Optional[ClusterSchema]
+    latest_revision: t.Optional[DeploymentRevisionSchema]
+    mode: t.Optional[DeploymentMode] = attr.field(default=None)
+    distributed: bool = attr.field(default=False)
+
+
+@attr.define
+class DeploymentFullSchema(DeploymentSchema):
+    urls: t.List[str] = attr.field(factory=list)
+
+
+@attr.define
+class DeploymentListSchema(BaseListSchema):
+    __omit_if_default__ = True
+    __forbid_extra_keys__ = True
+    items: t.List[DeploymentSchema]
diff --git a/src/bentoml/_internal/cloud/schemas/utils.py b/src/bentoml/_internal/cloud/schemas/utils.py
new file mode 100644
index 00000000000..37b54e6e9ec
--- /dev/null
+++ b/src/bentoml/_internal/cloud/schemas/utils.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+import json
+import typing as t
+from datetime import datetime
+
+import cattr
+from dateutil.parser import parse
+
+from bentoml._internal.tag import Tag
+
+time_format = "%Y-%m-%d %H:%M:%S.%f"
+T = t.TypeVar("T")
+
+
+def datetime_encoder(time_obj: t.Optional[datetime]) -> t.Optional[str]:
+    if not time_obj:
+        return None
+    return time_obj.strftime(time_format)
+
+
+def datetime_decoder(datetime_str: t.Optional[str], _: t.Any) -> t.Optional[datetime]:
+    if not datetime_str:
+        return None
+    return parse(datetime_str)
+
+
+def tag_encoder(tag_obj: t.Optional[Tag]) -> t.Optional[str]:
+    if not tag_obj:
+        return None
+    return str(tag_obj)
+
+
+def tag_decoder(tag_str: t.Optional[str], _: t.Any) -> t.Optional[Tag]:
+    if not tag_str:
+        return None
+    return Tag.from_str(tag_str)
+
+
+def dict_options_converter(
+    options_type: type[T],
+) -> t.Callable[[T | dict[str, T]], T]:
+    def _converter(value: T | dict[str, T] | None) -> T:
+        if value is None:
+            return options_type()
+        if isinstance(value, dict):
+            return options_type(**value)
+        return value
+
+    return _converter
+
+
+cloud_converter = cattr.Converter()
+
+cloud_converter.register_unstructure_hook(datetime, datetime_encoder)
+cloud_converter.register_structure_hook(datetime, datetime_decoder)
+cloud_converter.register_unstructure_hook(Tag, tag_encoder)
+cloud_converter.register_structure_hook(Tag, tag_decoder)
+
+
+def schema_from_json(json_content: str, cls: t.Type[T]) -> T:
+    dct = json.loads(json_content)
+    return cloud_converter.structure(dct, cls)
+
+
+def schema_to_json(obj: t.Any) -> str:
+    res = cloud_converter.unstructure(obj, obj.__class__)
+    return json.dumps(res)
+
+
+def schema_from_object(obj: t.Any, cls: t.Type[T]) -> T:
+    return cloud_converter.structure(obj, cls)
diff --git a/src/bentoml/_internal/cloud/yatai.py b/src/bentoml/_internal/cloud/yatai.py
index eb9ecd0c055..7ce202cf390 100644
--- a/src/bentoml/_internal/cloud/yatai.py
+++ b/src/bentoml/_internal/cloud/yatai.py
@@ -10,7 +10,7 @@
 from tempfile import NamedTemporaryFile
 
 import fs
-import requests
+import httpx
 from rich.live import Live
 from simple_di import Provide
 from simple_di import inject
@@ -29,25 +29,25 @@
 from .base import CallbackIOWrapper
 from .base import CloudClient
 from .config import get_rest_api_client
-from .schemas import BentoApiSchema
-from .schemas import BentoManifestSchema
-from .schemas import BentoRunnerResourceSchema
-from .schemas import BentoRunnerSchema
-from .schemas import BentoUploadStatus
-from .schemas import CompleteMultipartUploadSchema
-from .schemas import CompletePartSchema
-from .schemas import CreateBentoRepositorySchema
-from .schemas import CreateBentoSchema
-from .schemas import CreateModelRepositorySchema
-from .schemas import CreateModelSchema
-from .schemas import FinishUploadBentoSchema
-from .schemas import FinishUploadModelSchema
-from .schemas import LabelItemSchema
-from .schemas import ModelManifestSchema
-from .schemas import ModelUploadStatus
-from .schemas import PreSignMultipartUploadUrlSchema
-from .schemas import TransmissionStrategy
-from .schemas import UpdateBentoSchema
+from .schemas.modelschemas import BentoApiSchema
+from .schemas.modelschemas import BentoRunnerResourceSchema
+from .schemas.modelschemas import BentoRunnerSchema
+from .schemas.schemasv1 import BentoManifestSchema
+from .schemas.schemasv1 import BentoUploadStatus
+from .schemas.schemasv1 import CompleteMultipartUploadSchema
+from .schemas.schemasv1 import CompletePartSchema
+from .schemas.schemasv1 import CreateBentoRepositorySchema
+from .schemas.schemasv1 import CreateBentoSchema
+from .schemas.schemasv1 import CreateModelRepositorySchema
+from .schemas.schemasv1 import CreateModelSchema
+from .schemas.schemasv1 import FinishUploadBentoSchema
+from .schemas.schemasv1 import FinishUploadModelSchema
+from .schemas.schemasv1 import LabelItemSchema
+from .schemas.schemasv1 import ModelManifestSchema
+from .schemas.schemasv1 import ModelUploadStatus
+from .schemas.schemasv1 import PreSignMultipartUploadUrlSchema
+from .schemas.schemasv1 import TransmissionStrategy
+from .schemas.schemasv1 import UpdateBentoSchema
 
 if t.TYPE_CHECKING:
     from concurrent.futures import Future
@@ -152,12 +152,14 @@ def push_model(model: Model) -> None:
             for r in info.runners
         ]
         manifest = BentoManifestSchema(
+            name=info.name,
             service=info.service,
             bentoml_version=info.bentoml_version,
             apis=apis,
             models=models,
             runners=runners,
             size_bytes=bento.total_size(),
+            config=info.config,
         )
         if not remote_bento:
             with self.spin(text=f'Registering Bento "{bento.tag}" with Yatai..'):
@@ -254,7 +256,7 @@ def filter_(
             )
             try:
                 if presigned_upload_url is not None:
-                    resp = requests.put(presigned_upload_url, data=tar_io)
+                    resp = httpx.put(presigned_upload_url, data=tar_io)
                     if resp.status_code != 200:
                         finish_req = FinishUploadBentoSchema(
                             status=BentoUploadStatus.FAILED,
@@ -309,7 +311,7 @@ def chunk_upload(
                             )
 
                             with CallbackIOWrapper(chunk, read_cb=io_cb) as chunk_io:
-                                resp = requests.put(
+                                resp = httpx.put(
                                     remote_bento.presigned_upload_url, data=chunk_io
                                 )
                                 if resp.status_code != 200:
@@ -498,27 +500,27 @@ def pull_model(model_tag: Tag):
                             name, version
                         )
                         presigned_download_url = remote_bento.presigned_download_url
-                response = requests.get(presigned_download_url, stream=True)
 
-            if response.status_code != 200:
-                raise BentoMLException(
-                    f'Failed to download bento "{_tag}": {response.text}'
-                )
-            total_size_in_bytes = int(response.headers.get("content-length", 0))
-            block_size = 1024  # 1 Kibibyte
             with NamedTemporaryFile() as tar_file:
-                self.transmission_progress.update(
-                    download_task_id,
-                    completed=0,
-                    total=total_size_in_bytes,
-                    visible=True,
-                )
-                self.transmission_progress.start_task(download_task_id)
-                for data in response.iter_content(block_size):
+                with httpx.stream("GET", presigned_download_url) as response:
+                    if response.status_code != 200:
+                        raise BentoMLException(
+                            f'Failed to download bento "{_tag}": {response.text}'
+                        )
+                    total_size_in_bytes = int(response.headers.get("content-length", 0))
+                    block_size = 1024  # 1 Kibibyte
                     self.transmission_progress.update(
-                        download_task_id, advance=len(data)
+                        download_task_id,
+                        completed=0,
+                        total=total_size_in_bytes,
+                        visible=True,
                     )
-                    tar_file.write(data)
+                    self.transmission_progress.start_task(download_task_id)
+                    for data in response.iter_bytes(block_size):
+                        self.transmission_progress.update(
+                            download_task_id, advance=len(data)
+                        )
+                        tar_file.write(data)
                 self.log_progress.add_task(
                     f'[bold green]Finished downloading all bento "{_tag}" files'
                 )
@@ -690,7 +692,7 @@ def io_cb(x: int):
             )
             try:
                 if presigned_upload_url is not None:
-                    resp = requests.put(presigned_upload_url, data=tar_io)
+                    resp = httpx.put(presigned_upload_url, data=tar_io)
                     if resp.status_code != 200:
                         finish_req = FinishUploadModelSchema(
                             status=ModelUploadStatus.FAILED,
@@ -746,8 +748,8 @@ def chunk_upload(
                             )
 
                             with CallbackIOWrapper(chunk, read_cb=io_cb) as chunk_io:
-                                resp = requests.put(
-                                    remote_model.presigned_upload_url, data=chunk_io
+                                resp = httpx.put(
+                                    remote_model.presigned_upload_url, content=chunk_io
                                 )
                                 if resp.status_code != 200:
                                     return FinishUploadModelSchema(
@@ -937,25 +939,26 @@ def _do_pull_model(
                     )
                     presigned_download_url = remote_model.presigned_download_url
 
-            response = requests.get(presigned_download_url, stream=True)
-            if response.status_code != 200:
-                raise BentoMLException(
-                    f'Failed to download model "{_tag}": {response.text}'
-                )
-
-        total_size_in_bytes = int(response.headers.get("content-length", 0))
-        block_size = 1024  # 1 Kibibyte
         with NamedTemporaryFile() as tar_file:
-            self.transmission_progress.update(
-                download_task_id,
-                description=f'Downloading model "{_tag}"',
-                total=total_size_in_bytes,
-                visible=True,
-            )
-            self.transmission_progress.start_task(download_task_id)
-            for data in response.iter_content(block_size):
-                self.transmission_progress.update(download_task_id, advance=len(data))
-                tar_file.write(data)
+            with httpx.stream("GET", presigned_download_url) as response:
+                if response.status_code != 200:
+                    raise BentoMLException(
+                        f'Failed to download model "{_tag}": {response.text}'
+                    )
+                total_size_in_bytes = int(response.headers.get("content-length", 0))
+                block_size = 1024  # 1 Kibibyte
+                self.transmission_progress.update(
+                    download_task_id,
+                    description=f'Downloading model "{_tag}"',
+                    total=total_size_in_bytes,
+                    visible=True,
+                )
+                self.transmission_progress.start_task(download_task_id)
+                for data in response.iter_bytes(block_size):
+                    self.transmission_progress.update(
+                        download_task_id, advance=len(data)
+                    )
+                    tar_file.write(data)
             self.log_progress.add_task(
                 f'[bold green]Finished downloading model "{_tag}" files'
             )
diff --git a/src/bentoml/cloud.py b/src/bentoml/cloud.py
index 66e9c1bca1c..283a76c1f10 100644
--- a/src/bentoml/cloud.py
+++ b/src/bentoml/cloud.py
@@ -1,3 +1,12 @@
 from ._internal.cloud import BentoCloudClient as BentoCloudClient
-from ._internal.cloud import Resource as Resource
 from ._internal.cloud import YataiClient as YataiClient
+
+deprecated_names = ["Resource"]
+
+
+def __getattr__(name: str):
+    if name in deprecated_names:
+        raise AttributeError(
+            f"{name} is deprecated, please use bentoml.deloyment instead"
+        )
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/src/bentoml/deployment.py b/src/bentoml/deployment.py
new file mode 100644
index 00000000000..22fd456f541
--- /dev/null
+++ b/src/bentoml/deployment.py
@@ -0,0 +1,492 @@
+"""
+User facing python APIs for deployment
+"""
+
+from __future__ import annotations
+
+import typing as t
+
+from simple_di import Provide
+from simple_di import inject
+
+from bentoml._internal.cloud.deployment import Deployment
+from bentoml._internal.cloud.deployment import get_real_bento_tag
+from bentoml._internal.tag import Tag
+from bentoml.cloud import BentoCloudClient
+from bentoml.exceptions import BentoMLException
+
+from ._internal.configuration.containers import BentoMLContainer
+
+if t.TYPE_CHECKING:
+    from ._internal.bento import BentoStore
+
+
+@t.overload
+def create(
+    name: str | None = ...,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: str | None = ...,
+    cluster_name: str | None = ...,
+    access_type: str | None = ...,
+    scaling_min: int | None = ...,
+    scaling_max: int | None = ...,
+    instance_type: str | None = ...,
+    strategy: str | None = ...,
+    envs: t.List[dict[str, t.Any]] | None = ...,
+    extras: dict[str, t.Any] | None = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def create(
+    name: str | None = ...,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    bento: Tag | str | None = ...,
+    cluster_name: str | None = ...,
+    access_type: str | None = ...,
+    scaling_min: int | None = ...,
+    scaling_max: int | None = ...,
+    instance_type: str | None = ...,
+    strategy: str | None = ...,
+    envs: t.List[dict[str, t.Any]] | None = ...,
+    extras: dict[str, t.Any] | None = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def create(
+    name: str | None = ...,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    bento: Tag | str | None = ...,
+    config_file: str | None = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def create(
+    name: str | None = ...,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: str | None = ...,
+    config_file: str | None = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def create(
+    name: str | None = ...,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    bento: Tag | str | None = ...,
+    config_dct: dict[str, t.Any] | None = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def create(
+    name: str | None = ...,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: str | None = ...,
+    config_dct: dict[str, t.Any] | None = ...,
+) -> Deployment:
+    ...
+
+
+@inject
+def create(
+    name: str | None = None,
+    path_context: str | None = None,
+    context: str | None = None,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: str | None = None,
+    bento: Tag | str | None = None,
+    cluster_name: str | None = None,
+    access_type: str | None = None,
+    scaling_min: int | None = None,
+    scaling_max: int | None = None,
+    instance_type: str | None = None,
+    strategy: str | None = None,
+    envs: t.List[dict[str, t.Any]] | None = None,
+    extras: dict[str, t.Any] | None = None,
+    config_dct: dict[str, t.Any] | None = None,
+    config_file: str | None = None,
+) -> Deployment:
+    deploy_by_param = (
+        access_type
+        or cluster_name
+        or scaling_min
+        or scaling_max
+        or instance_type
+        or strategy
+        or envs
+        or extras
+    )
+    if (
+        config_dct
+        and config_file
+        or config_dct
+        and deploy_by_param
+        or config_file
+        and deploy_by_param
+    ):
+        raise BentoMLException(
+            "Configure a deployment can only use one of the following: config_dct, config_file, or the other parameters"
+        )
+    if bento and project_path:
+        raise BentoMLException("Only one of bento or project_path can be provided")
+    if bento is None and project_path is None:
+        raise BentoMLException("Either bento or project_path must be provided")
+    bento = get_real_bento_tag(
+        project_path=project_path,
+        bento=bento,
+        context=context,
+        _cloud_client=BentoCloudClient(),
+    )
+
+    return Deployment.create(
+        bento=bento,
+        access_type=access_type,
+        name=name,
+        cluster_name=cluster_name,
+        scaling_min=scaling_min,
+        scaling_max=scaling_max,
+        instance_type=instance_type,
+        strategy=strategy,
+        envs=envs,
+        extras=extras,
+        config_dct=config_dct,
+        config_file=config_file,
+        path_context=path_context,
+        context=context,
+    )
+
+
+@t.overload
+def update(
+    name: str,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    cluster_name: str | None = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: str | None = ...,
+    access_type: str | None = ...,
+    scaling_min: int | None = ...,
+    scaling_max: int | None = ...,
+    instance_type: str | None = ...,
+    strategy: str | None = ...,
+    envs: t.List[dict[str, t.Any]] | None = ...,
+    extras: dict[str, t.Any] | None = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def update(
+    name: str,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    cluster_name: str | None = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    bento: Tag | str | None = ...,
+    access_type: str | None = ...,
+    scaling_min: int | None = ...,
+    scaling_max: int | None = ...,
+    instance_type: str | None = ...,
+    strategy: str | None = ...,
+    envs: t.List[dict[str, t.Any]] | None = ...,
+    extras: dict[str, t.Any] | None = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def update(
+    name: str,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    cluster_name: str | None = None,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: str | None = ...,
+    config_file: str | None = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def update(
+    name: str,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    cluster_name: str | None = None,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    bento: Tag | str | None = ...,
+    config_file: str | None = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def update(
+    name: str,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    cluster_name: str | None = None,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: str | None = ...,
+    config_dct: dict[str, t.Any] | None = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def update(
+    name: str,
+    path_context: str | None = ...,
+    context: str | None = ...,
+    cluster_name: str | None = None,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    bento: Tag | str | None = ...,
+    config_dct: dict[str, t.Any] | None = ...,
+) -> Deployment:
+    ...
+
+
+@inject
+def update(
+    name: str,
+    path_context: str | None = None,
+    context: str | None = None,
+    cluster_name: str | None = None,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: str | None = None,
+    bento: Tag | str | None = None,
+    access_type: str | None = None,
+    scaling_min: int | None = None,
+    scaling_max: int | None = None,
+    instance_type: str | None = None,
+    strategy: str | None = None,
+    envs: t.List[dict[str, t.Any]] | None = None,
+    extras: dict[str, t.Any] | None = None,
+    config_dct: dict[str, t.Any] | None = None,
+    config_file: str | None = None,
+) -> Deployment:
+    deploy_by_param = (
+        access_type
+        or scaling_min
+        or scaling_max
+        or instance_type
+        or strategy
+        or envs
+        or extras
+    )
+    if (
+        config_dct
+        and config_file
+        or config_dct
+        and deploy_by_param
+        or config_file
+        and deploy_by_param
+    ):
+        raise BentoMLException(
+            "Configure a deployment can only use one of the following: config_dct, config_file, or the other parameters"
+        )
+    if bento and project_path:
+        raise BentoMLException("Only one of bento or project_path can be provided")
+    if bento is None and project_path is None:
+        bento = None
+    else:
+        bento = get_real_bento_tag(
+            project_path=project_path,
+            bento=bento,
+            context=context,
+            _cloud_client=BentoCloudClient(),
+        )
+
+    return Deployment.update(
+        bento=bento,
+        access_type=access_type,
+        name=name,
+        cluster_name=cluster_name,
+        scaling_min=scaling_min,
+        scaling_max=scaling_max,
+        instance_type=instance_type,
+        strategy=strategy,
+        envs=envs,
+        extras=extras,
+        config_dct=config_dct,
+        config_file=config_file,
+        path_context=path_context,
+        context=context,
+    )
+
+
+@t.overload
+def apply(
+    name: str,
+    cluster_name: t.Optional[str] = ...,
+    path_context: t.Optional[str] = ...,
+    context: t.Optional[str] = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: t.Optional[str] = ...,
+    config_dct: t.Optional[dict[str, t.Any]] = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def apply(
+    name: str,
+    cluster_name: t.Optional[str] = ...,
+    path_context: t.Optional[str] = ...,
+    context: t.Optional[str] = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    bento: t.Optional[t.Union[Tag, str]] = ...,
+    config_dct: t.Optional[dict[str, t.Any]] = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def apply(
+    name: str,
+    cluster_name: t.Optional[str] = ...,
+    path_context: t.Optional[str] = ...,
+    context: t.Optional[str] = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: t.Optional[str] = ...,
+    config_file: t.Optional[str] = ...,
+) -> Deployment:
+    ...
+
+
+@t.overload
+def apply(
+    name: str,
+    cluster_name: t.Optional[str] = ...,
+    path_context: t.Optional[str] = ...,
+    context: t.Optional[str] = ...,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    bento: t.Optional[t.Union[Tag, str]] = ...,
+    config_file: t.Optional[str] = ...,
+) -> Deployment:
+    ...
+
+
+@inject
+def apply(
+    name: str,
+    cluster_name: str | None = None,
+    path_context: str | None = None,
+    context: str | None = None,
+    _bento_store: BentoStore = Provide[BentoMLContainer.bento_store],
+    *,
+    project_path: str | None = None,
+    bento: Tag | str | None = None,
+    config_dct: dict[str, t.Any] | None = None,
+    config_file: str | None = None,
+) -> Deployment:
+    if config_dct and config_file:
+        raise BentoMLException(
+            "Configure a deployment can only use one of the following: config_dct, config_file"
+        )
+    if bento and project_path:
+        raise BentoMLException("Only one of bento or project_path can be provided")
+    if bento is None and project_path is None:
+        bento = None
+    else:
+        bento = get_real_bento_tag(
+            project_path=project_path,
+            bento=bento,
+            context=context,
+            _cloud_client=BentoCloudClient(),
+        )
+
+    return Deployment.apply(
+        name=name,
+        bento=bento,
+        cluster_name=cluster_name,
+        context=context,
+        path_context=path_context,
+        config_dct=config_dct,
+        config_file=config_file,
+    )
+
+
+def get(
+    name: str,
+    context: str | None = None,
+    cluster_name: str | None = None,
+) -> Deployment:
+    return Deployment.get(
+        name=name,
+        context=context,
+        cluster_name=cluster_name,
+    )
+
+
+def terminate(
+    name: str,
+    context: str | None = None,
+    cluster_name: str | None = None,
+) -> Deployment:
+    return Deployment.terminate(
+        name=name,
+        context=context,
+        cluster_name=cluster_name,
+    )
+
+
+def delete(
+    name: str,
+    context: str | None = None,
+    cluster_name: str | None = None,
+) -> None:
+    Deployment.delete(
+        name=name,
+        context=context,
+        cluster_name=cluster_name,
+    )
+
+
+def list(
+    context: str | None = None,
+    cluster_name: str | None = None,
+    search: str | None = None,
+) -> t.List[Deployment]:
+    return Deployment.list(context=context, cluster_name=cluster_name, search=search)
+
+
+__all__ = ["create", "get", "update", "apply", "terminate", "delete", "list"]
diff --git a/src/bentoml_cli/cloud.py b/src/bentoml_cli/cloud.py
index 2155efcbf6f..8f11b030e3b 100644
--- a/src/bentoml_cli/cloud.py
+++ b/src/bentoml_cli/cloud.py
@@ -47,12 +47,12 @@ def cloud():
     def login(shared_options: SharedOptions, endpoint: str, api_token: str) -> None:  # type: ignore (not accessed)
         """Login to BentoCloud or Yatai server."""
         cloud_rest_client = RestApiClient(endpoint, api_token)
-        user = cloud_rest_client.get_current_user()
+        user = cloud_rest_client.v1.get_current_user()
 
         if user is None:
             raise CLIException("current user is not found")
 
-        org = cloud_rest_client.get_current_organization()
+        org = cloud_rest_client.v1.get_current_organization()
 
         if org is None:
             raise CLIException("current organization is not found")
diff --git a/src/bentoml_cli/deployment.py b/src/bentoml_cli/deployment.py
index d2363edf2ac..226cc7c0b89 100644
--- a/src/bentoml_cli/deployment.py
+++ b/src/bentoml_cli/deployment.py
@@ -3,11 +3,14 @@
 import typing as t
 
 import click
+import yaml
+from rich.syntax import Syntax
+
+from bentoml._internal.cloud.schemas.modelschemas import AccessControl
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentStrategy
 
 if t.TYPE_CHECKING:
     TupleStrAny = tuple[str, ...]
-    from bentoml._internal.cloud.schemas import DeploymentListSchema
-    from bentoml._internal.cloud.schemas import DeploymentSchema
 
     from .utils import SharedOptions
 else:
@@ -20,17 +23,145 @@ def add_deployment_command(cli: click.Group) -> None:
     import click_option_group as cog
     from rich.table import Table
 
-    from bentoml._internal.configuration.containers import BentoMLContainer
-    from bentoml._internal.utils import bentoml_cattr
+    from bentoml._internal.cloud.deployment import Deployment
+    from bentoml._internal.cloud.deployment import get_real_bento_tag
     from bentoml._internal.utils import rich_console as console
     from bentoml_cli.utils import BentoMLCommandGroup
 
-    client = BentoMLContainer.bentocloud_client.get()
+    @cli.command()
+    @click.argument(
+        "target",
+        type=click.STRING,
+        required=True,
+    )
+    @click.option(
+        "-n",
+        "--name",
+        type=click.STRING,
+        help="Deployment name",
+    )
+    @click.option(
+        "--cluster",
+        type=click.STRING,
+        help="Name of the cluster",
+    )
+    @click.option(
+        "--access-type",
+        type=click.Choice(
+            [access_ctrl_type.value for access_ctrl_type in AccessControl]
+        ),
+        help="Type of access",
+    )
+    @click.option(
+        "--scaling-min",
+        type=click.INT,
+        help="Minimum scaling value",
+    )
+    @click.option(
+        "--scaling-max",
+        type=click.INT,
+        help="Maximum scaling value",
+    )
+    @click.option(
+        "--instance-type",
+        type=click.STRING,
+        help="Type of instance",
+    )
+    @click.option(
+        "--strategy",
+        type=click.Choice(
+            [deployment_strategy.value for deployment_strategy in DeploymentStrategy]
+        ),
+        help="Deployment strategy",
+    )
+    @click.option(
+        "--env",
+        type=click.STRING,
+        help="List of environment variables pass by --env key=value, --env ...",
+        multiple=True,
+    )
+    @click.option(
+        "--config-file",
+        type=click.File(),
+        help="Configuration file path",
+        default=None,
+    )
+    @click.option(
+        "--wait/--no-wait",
+        type=click.BOOL,
+        is_flag=True,
+        help="Do not wait for deployment to be ready",
+        default=True,
+    )
+    @click.option(
+        "--timeout",
+        type=click.INT,
+        default=300,
+        help="Timeout for deployment to be ready in seconds",
+    )
+    @click.pass_obj
+    def deploy(
+        shared_options: SharedOptions,
+        target: str,
+        name: str | None,
+        cluster: str | None,
+        access_type: str | None,
+        scaling_min: int | None,
+        scaling_max: int | None,
+        instance_type: str | None,
+        strategy: str | None,
+        env: tuple[str] | None,
+        config_file: t.TextIO | None,
+        wait: bool,
+        timeout: int,
+    ) -> None:
+        """Create a deployment on BentoCloud.
+
+        \b
+        Create a deployment using parameters (standalone mode only), or using config yaml file.
+        """
+        from os import path
+
+        # determine if target is a path or a name
+        if path.exists(target):
+            # target is a path
+            click.echo(f"building bento from {target} ...")
+            bento_tag = get_real_bento_tag(project_path=target)
+        else:
+            click.echo(f"using bento {target}...")
+            bento_tag = get_real_bento_tag(bento=target)
+
+        deployment = Deployment.create(
+            bento=bento_tag,
+            name=name,
+            cluster_name=cluster,
+            access_type=access_type,
+            scaling_min=scaling_min,
+            scaling_max=scaling_max,
+            instance_type=instance_type,
+            strategy=strategy,
+            envs=[
+                {"key": item.split("=")[0], "value": item.split("=")[1]} for item in env
+            ]
+            if env is not None
+            else None,
+            config_file=config_file,
+            context=shared_options.cloud_context,
+        )
+        if wait:
+            deployment.wait_until_ready(timeout=timeout)
+            click.echo(
+                f"Deployment '{deployment.name}' created successfully in cluster '{deployment.cluster_name}'."
+            )
+        click.echo(
+            f"To check the deployment, go to: {deployment.get_bento_cloud_url()}."
+        )
+
     output_option = click.option(
         "-o",
         "--output",
-        type=click.Choice(["json", "default"]),
-        default="default",
+        type=click.Choice(["yaml", "json"]),
+        default="yaml",
         help="Display the output of this command.",
     )
 
@@ -42,26 +173,16 @@ def shared_decorator(
         def decorate(f: t.Callable[..., t.Any]) -> t.Callable[..., t.Any]:
             options = [
                 click.argument(
-                    "deployment-name",
+                    "name",
                     type=click.STRING,
                     required=required_deployment_name,
                 ),
-                cog.optgroup.group(
-                    cls=cog.AllOptionGroup, name="cluster and kube namespace options"
-                ),
-                cog.optgroup.option(
-                    "--cluster-name",
+                click.option(
+                    "--cluster",
                     type=click.STRING,
                     default=None,
                     help="Name of the cluster.",
                 ),
-                cog.optgroup.option(
-                    "--kube-namespace",
-                    type=click.STRING,
-                    default=None,
-                    help="Kubernetes namespace.",
-                ),
-                output_option,
             ]
             for opt in reversed(options):
                 f = opt(f)
@@ -77,238 +198,201 @@ def deployment_cli():
         """Deployment Subcommands Groups"""
 
     @deployment_cli.command()
+    @shared_decorator(required_deployment_name=True)
+    @cog.optgroup.group(cls=cog.MutuallyExclusiveOptionGroup, name="target options")
+    @cog.optgroup.option(
+        "--bento",
+        type=click.STRING,
+        help="Bento name",
+    )
+    @cog.optgroup.option(
+        "--project-path",
+        type=click.Path(exists=True),
+        help="Path to the project",
+    )
     @click.option(
-        "-f",
-        "--file",
-        type=click.File(),
-        help="JSON file path for the deployment configuration",
+        "--access-type",
+        type=click.Choice(
+            [access_ctrl_type.value for access_ctrl_type in AccessControl]
+        ),
+        help="Type of access",
     )
-    @output_option
-    @click.pass_obj
-    def create(  # type: ignore
-        shared_options: SharedOptions,
-        file: str,
-        output: t.Literal["json", "default"],
-    ) -> DeploymentSchema:
-        """Create a deployment on BentoCloud.
-
-        \b
-        A deployment can be created using a json file with configurations.
-        The json file has the exact format as the one on BentoCloud Deployment UI.
-        """
-        res = client.deployment.create_from_file(
-            path_or_stream=file, context=shared_options.cloud_context
-        )
-        if output == "default":
-            console.print(res)
-        elif output == "json":
-            click.echo(json.dumps(bentoml_cattr.unstructure(res), indent=2))
-        return res
-
-    @deployment_cli.command()
-    @shared_decorator(required_deployment_name=False)
     @click.option(
-        "-f",
-        "--file",
-        type=click.File(),
-        help="JSON file path for the deployment configuration",
+        "--scaling-min",
+        type=click.INT,
+        help="Minimum scaling value",
+    )
+    @click.option(
+        "--scaling-max",
+        type=click.INT,
+        help="Maximum scaling value",
+    )
+    @click.option(
+        "--instance-type",
+        type=click.STRING,
+        help="Type of instance",
+    )
+    @click.option(
+        "--strategy",
+        type=click.Choice(
+            [deployment_strategy.value for deployment_strategy in DeploymentStrategy]
+        ),
+        help="Deployment strategy",
     )
     @click.option(
-        "-n", "--name", type=click.STRING, help="Deployment name (deprecated)"
+        "--env",
+        type=click.STRING,
+        help="List of environment variables pass by --env key=value, --env ...",
+        multiple=True,
+    )
+    @click.option(
+        "--config-file",
+        type=click.File(),
+        help="Configuration file path, mututally exclusive with other config options",
+        default=None,
     )
-    @click.option("--bento", type=click.STRING, help="Bento tag")
     @click.pass_obj
     def update(  # type: ignore
         shared_options: SharedOptions,
-        deployment_name: str | None,
-        file: str | None,
-        name: str | None,
+        name: str,
+        cluster: str | None,
+        project_path: str | None,
         bento: str | None,
-        cluster_name: str | None,
-        kube_namespace: str | None,
-        output: t.Literal["json", "default"],
-    ) -> DeploymentSchema:
+        access_type: str | None,
+        scaling_min: int | None,
+        scaling_max: int | None,
+        instance_type: str | None,
+        strategy: str | None,
+        env: tuple[str] | None,
+        config_file: t.TextIO | None,
+    ) -> None:
         """Update a deployment on BentoCloud.
 
         \b
-        A deployment can be updated using a json file with needed configurations.
-        The json file has the exact format as the one on BentoCloud Deployment UI.
+        A deployment can be updated using parameters (standalone mode only), or using config yaml file.
+        You can also update bento by providing a project path or existing bento.
         """
-        if name is not None:
-            click.echo(
-                "--name is deprecated, pass DEPLOYMENT_NAME as an argument instead, e.g., bentoml update deploy-name"
-            )
-        if file is not None:
-            if name is not None:
-                click.echo("Reading from file, ignoring --name", err=True)
-            elif deployment_name is not None:
-                click.echo(
-                    "Reading from file, ignoring argument DEPLOYMENT_NAME", err=True
-                )
-            res = client.deployment.update_from_file(
-                path_or_stream=file, context=shared_options.cloud_context
-            )
-        elif name is not None:
-            res = client.deployment.update(
-                name,
-                bento=bento,
-                context=shared_options.cloud_context,
-                latest_bento=True,
-                cluster_name=cluster_name,
-                kube_namespace=kube_namespace,
-            )
-        elif deployment_name is not None:
-            res = client.deployment.update(
-                deployment_name,
+        if bento is None and project_path is None:
+            target = None
+        else:
+            target = get_real_bento_tag(
+                project_path=project_path,
                 bento=bento,
                 context=shared_options.cloud_context,
-                latest_bento=True,
-                cluster_name=cluster_name,
-                kube_namespace=kube_namespace,
             )
-        else:
-            raise click.BadArgumentUsage(
-                "Either --file or argument DEPLOYMENT_NAME is required for update command"
-            )
-        if output == "default":
-            console.print(res)
-        elif output == "json":
-            unstructured = bentoml_cattr.unstructure(res)
-            click.echo(json.dumps(unstructured, indent=2))
-        return res
+
+        Deployment.update(
+            bento=target,
+            access_type=access_type,
+            name=name,
+            cluster_name=cluster,
+            scaling_min=scaling_min,
+            scaling_max=scaling_max,
+            instance_type=instance_type,
+            strategy=strategy,
+            envs=[
+                {"key": item.split("=")[0], "value": item.split("=")[1]} for item in env
+            ]
+            if env is not None
+            else None,
+            config_file=config_file,
+            context=shared_options.cloud_context,
+        )
+
+        click.echo(f"Deployment '{name}' updated successfully.")
 
     @deployment_cli.command()
     @shared_decorator
+    @output_option
     @click.pass_obj
     def get(  # type: ignore
         shared_options: SharedOptions,
-        deployment_name: str,
-        cluster_name: str | None,
-        kube_namespace: str | None,
+        name: str,
+        cluster: str | None,
         output: t.Literal["json", "default"],
-    ) -> DeploymentSchema:
+    ) -> None:
         """Get a deployment on BentoCloud."""
-        res = client.deployment.get(
-            deployment_name=deployment_name,
-            context=shared_options.cloud_context,
-            cluster_name=cluster_name,
-            kube_namespace=kube_namespace,
+        d = Deployment.get(
+            name, context=shared_options.cloud_context, cluster_name=cluster
         )
-        if output == "default":
-            console.print(res)
-        elif output == "json":
-            unstructured = bentoml_cattr.unstructure(res)
-            click.echo(json.dumps(unstructured, indent=2))
-        return res
+        if output == "json":
+            info = json.dumps(d.info.to_dict(), indent=2, default=str)
+            console.print_json(info)
+        else:
+            info = yaml.dump(d.info.to_dict(), indent=2, sort_keys=False)
+            console.print(Syntax(info, "yaml", background_color="default"))
 
     @deployment_cli.command()
     @shared_decorator
     @click.pass_obj
     def terminate(  # type: ignore
         shared_options: SharedOptions,
-        deployment_name: str,
-        cluster_name: str | None,
-        kube_namespace: str | None,
-        output: t.Literal["json", "default"],
-    ) -> DeploymentSchema:
+        name: str,
+        cluster: str | None,
+    ) -> None:
         """Terminate a deployment on BentoCloud."""
-        res = client.deployment.terminate(
-            deployment_name=deployment_name,
-            context=shared_options.cloud_context,
-            cluster_name=cluster_name,
-            kube_namespace=kube_namespace,
+        Deployment.terminate(
+            name, context=shared_options.cloud_context, cluster_name=cluster
         )
-        if output == "default":
-            console.print(res)
-        elif output == "json":
-            unstructured = bentoml_cattr.unstructure(res)
-            click.echo(json.dumps(unstructured, indent=2))
-        return res
+        click.echo(f"Deployment '{name}' terminated successfully.")
 
     @deployment_cli.command()
     @shared_decorator
     @click.pass_obj
     def delete(  # type: ignore
         shared_options: SharedOptions,
-        deployment_name: str,
-        cluster_name: str | None,
-        kube_namespace: str | None,
-        output: t.Literal["json", "default"],
-    ) -> DeploymentSchema:
+        name: str,
+        cluster: str | None,
+    ) -> None:
         """Delete a deployment on BentoCloud."""
-        res = client.deployment.delete(
-            deployment_name=deployment_name,
-            context=shared_options.cloud_context,
-            cluster_name=cluster_name,
-            kube_namespace=kube_namespace,
+        Deployment.delete(
+            name, context=shared_options.cloud_context, cluster_name=cluster
         )
-        if output == "default":
-            console.print(res)
-        elif output == "json":
-            unstructured = bentoml_cattr.unstructure(res)
-            click.echo(json.dumps(unstructured, indent=2))
-        return res
+        click.echo(f"Deployment '{name}' deleted successfully.")
 
     @deployment_cli.command()
     @click.option(
-        "--cluster-name", type=click.STRING, default=None, help="Name of the cluster."
-    )
-    @click.option(
-        "--query", type=click.STRING, default=None, help="Query for list request."
+        "--cluster", type=click.STRING, default=None, help="Name of the cluster."
     )
     @click.option(
         "--search", type=click.STRING, default=None, help="Search for list request."
     )
-    @click.option(
-        "--start", type=click.STRING, default=None, help="Start for list request."
-    )
-    @click.option(
-        "--count", type=click.STRING, default=None, help="Count for list request."
-    )
     @click.option(
         "-o",
         "--output",
         help="Display the output of this command.",
-        type=click.Choice(["json", "default", "table"]),
+        type=click.Choice(["json", "yaml", "table"]),
         default="table",
     )
     @click.pass_obj
     def list(  # type: ignore
         shared_options: SharedOptions,
-        cluster_name: str | None,
-        query: str | None,
+        cluster: str | None,
         search: str | None,
-        count: int | None,
-        start: int | None,
-        output: t.Literal["json", "default", "table"],
-    ) -> DeploymentListSchema:
+        output: t.Literal["json", "yaml", "table"],
+    ) -> None:
         """List existing deployments on BentoCloud."""
-        res = client.deployment.list(
-            context=shared_options.cloud_context,
-            cluster_name=cluster_name,
-            query=query,
-            search=search,
-            count=count,
-            start=start,
+        d_list = Deployment.list(
+            context=shared_options.cloud_context, cluster_name=cluster, search=search
         )
+        res: list[dict[str, t.Any]] = [d.info.to_dict() for d in d_list]
         if output == "table":
             table = Table(box=None)
             table.add_column("Deployment")
+            table.add_column("created_at")
             table.add_column("Bento")
             table.add_column("Status")
-            table.add_column("Created At")
-            for deployment in res.items:
-                target = deployment.latest_revision.targets[0]
+            for info in res:
                 table.add_row(
-                    deployment.name,
-                    f"{target.bento.repository.name}:{target.bento.name}",
-                    deployment.status.value,
-                    deployment.created_at.astimezone().strftime("%Y-%m-%d %H:%M:%S"),
+                    info["name"],
+                    info["created_at"],
+                    info["bento"],
+                    info["status"],
                 )
             console.print(table)
-        elif output == "default":
-            console.print(res)
         elif output == "json":
-            unstructured = bentoml_cattr.unstructure(res)
-            click.echo(json.dumps(unstructured, indent=2))
-        return res
+            info = json.dumps(res, indent=2, default=str)
+            console.print_json(info)
+        else:
+            info = yaml.dump(res, indent=2, sort_keys=False)
+            console.print(Syntax(info, "yaml", background_color="default"))
diff --git a/tests/unit/_internal/cloud/test_deployment.py b/tests/unit/_internal/cloud/test_deployment.py
index 2fb4ebd3c5d..4bd5fd96259 100644
--- a/tests/unit/_internal/cloud/test_deployment.py
+++ b/tests/unit/_internal/cloud/test_deployment.py
@@ -7,825 +7,534 @@
 import attr
 import pytest
 
-from bentoml._internal.cloud.schemas import BentoFullSchema
-from bentoml._internal.cloud.schemas import BentoImageBuildStatus
-from bentoml._internal.cloud.schemas import BentoManifestSchema
-from bentoml._internal.cloud.schemas import BentoRepositorySchema
-from bentoml._internal.cloud.schemas import BentoUploadStatus
-from bentoml._internal.cloud.schemas import ClusterSchema
-from bentoml._internal.cloud.schemas import CreateDeploymentSchema
-from bentoml._internal.cloud.schemas import CreateDeploymentTargetSchema
-from bentoml._internal.cloud.schemas import DeploymentMode
-from bentoml._internal.cloud.schemas import DeploymentRevisionSchema
-from bentoml._internal.cloud.schemas import DeploymentRevisionStatus
-from bentoml._internal.cloud.schemas import DeploymentSchema
-from bentoml._internal.cloud.schemas import DeploymentStatus
-from bentoml._internal.cloud.schemas import DeploymentTargetCanaryRule
-from bentoml._internal.cloud.schemas import DeploymentTargetCanaryRuleType
-from bentoml._internal.cloud.schemas import DeploymentTargetConfig
-from bentoml._internal.cloud.schemas import DeploymentTargetHPAConf
-from bentoml._internal.cloud.schemas import DeploymentTargetRunnerConfig
-from bentoml._internal.cloud.schemas import DeploymentTargetSchema
-from bentoml._internal.cloud.schemas import DeploymentTargetType
-from bentoml._internal.cloud.schemas import LabelItemSchema
-from bentoml._internal.cloud.schemas import ResourceType
-from bentoml._internal.cloud.schemas import UpdateDeploymentSchema
-from bentoml._internal.cloud.schemas import UserSchema
-from bentoml.cloud import BentoCloudClient
-from bentoml.cloud import Resource
+from bentoml._internal.cloud.client import RestApiClient
+from bentoml._internal.cloud.deployment import Deployment
+from bentoml._internal.cloud.schemas.modelschemas import AccessControl
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentServiceConfig
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentStrategy
+from bentoml._internal.cloud.schemas.modelschemas import DeploymentTargetHPAConf
+from bentoml._internal.cloud.schemas.schemasv1 import BentoFullSchema
+from bentoml._internal.cloud.schemas.schemasv1 import BentoImageBuildStatus
+from bentoml._internal.cloud.schemas.schemasv1 import BentoManifestSchema
+from bentoml._internal.cloud.schemas.schemasv1 import BentoRepositorySchema
+from bentoml._internal.cloud.schemas.schemasv1 import BentoUploadStatus
+from bentoml._internal.cloud.schemas.schemasv1 import ClusterListSchema
+from bentoml._internal.cloud.schemas.schemasv1 import ClusterSchema
+from bentoml._internal.cloud.schemas.schemasv1 import DeploymentRevisionStatus
+from bentoml._internal.cloud.schemas.schemasv1 import DeploymentStatus
+from bentoml._internal.cloud.schemas.schemasv1 import LabelItemSchema
+from bentoml._internal.cloud.schemas.schemasv1 import ResourceType
+from bentoml._internal.cloud.schemas.schemasv1 import UserSchema
+from bentoml._internal.cloud.schemas.schemasv2 import (
+    CreateDeploymentSchema as CreateDeploymentSchemaV2,
+)
+from bentoml._internal.cloud.schemas.schemasv2 import (
+    DeploymentFullSchema as DeploymentFullSchemaV2,
+)
+from bentoml._internal.cloud.schemas.schemasv2 import (
+    DeploymentRevisionSchema as DeploymentRevisionSchemaV2,
+)
+from bentoml._internal.cloud.schemas.schemasv2 import (
+    DeploymentTargetConfig as DeploymentTargetConfigV2,
+)
+from bentoml._internal.cloud.schemas.schemasv2 import (
+    DeploymentTargetSchema as DeploymentTargetSchemaV2,
+)
+from bentoml._internal.cloud.schemas.schemasv2 import (
+    UpdateDeploymentSchema as UpdateDeploymentSchemaV2,
+)
 
 if t.TYPE_CHECKING:
     from unittest.mock import MagicMock
 
 
-def f_create(
-    create_deployment_schema: CreateDeploymentSchema,
-    context: str | None = None,
-    cluster_name: str | None = None,
-):
-    return create_deployment_schema
-
-
-def f_update(
-    deployment_name: str,
-    update_deployment_schema: UpdateDeploymentSchema,
-    kube_namespace: str | None = None,
-    context: str | None = None,
-    cluster_name: str | None = None,
-):
-    return update_deployment_schema
-
-
-@pytest.fixture(name="get_schema", scope="function")
-def fixture_get_schema() -> DeploymentSchema:
-    user = UserSchema(name="", email="", first_name="", last_name="")
-    return DeploymentSchema(
-        latest_revision=DeploymentRevisionSchema(
-            targets=[
-                DeploymentTargetSchema(
-                    type=DeploymentTargetType.STABLE,
-                    bento=BentoFullSchema(
-                        uid="",
-                        created_at=datetime(2023, 5, 25),
-                        updated_at=None,
-                        deleted_at=None,
-                        name="12345",
-                        resource_type=ResourceType.BENTO,
-                        labels=[],
-                        description="",
-                        version="",
-                        image_build_status=BentoImageBuildStatus.PENDING,
-                        upload_status=BentoUploadStatus.SUCCESS,
-                        upload_finished_reason="",
-                        presigned_upload_url="",
-                        presigned_download_url="",
-                        manifest=BentoManifestSchema(
-                            name="",
-                            service="",
-                            bentoml_version="",
-                            size_bytes=0,
-                            apis={},
-                            models=["iris_clf:ddaex6h2vw6kwcvj"],
-                        ),
-                        build_at=datetime(2023, 5, 25),
-                        repository=BentoRepositorySchema(
+@attr.define
+class DummyUpdateSchema(UpdateDeploymentSchemaV2):
+    urls: t.List[str] = attr.Factory(
+        list
+    )  # place holder for urls that's assigned to deployment._urls
+
+
+@pytest.fixture(name="rest_client", scope="function")
+def fixture_rest_client() -> RestApiClient:
+    def dummy_create_deployment(
+        create_schema: CreateDeploymentSchemaV2, cluster_name: str
+    ):
+        return create_schema
+
+    def dummy_update_deployment(
+        update_schema: UpdateDeploymentSchemaV2, cluster_name: str, deployment_name: str
+    ):
+        from bentoml._internal.utils import bentoml_cattr
+
+        return bentoml_cattr.structure(attr.asdict(update_schema), DummyUpdateSchema)
+
+    def dummy_get_deployment(cluster_name: str, deployment_name: str):
+        if deployment_name == "test-distributed":
+            return DeploymentFullSchemaV2(
+                distributed=True,
+                latest_revision=DeploymentRevisionSchemaV2(
+                    targets=[
+                        DeploymentTargetSchemaV2(
+                            bento=BentoFullSchema(
+                                uid="",
+                                created_at=datetime(2023, 5, 25),
+                                updated_at=None,
+                                deleted_at=None,
+                                name="123",
+                                resource_type=ResourceType.BENTO,
+                                labels=[],
+                                description="",
+                                version="",
+                                image_build_status=BentoImageBuildStatus.PENDING,
+                                upload_status=BentoUploadStatus.SUCCESS,
+                                upload_finished_reason="",
+                                presigned_upload_url="",
+                                presigned_download_url="",
+                                manifest=BentoManifestSchema(
+                                    service="",
+                                    bentoml_version="",
+                                    size_bytes=0,
+                                    apis={},
+                                    models=["iris_clf:ddaex6h2vw6kwcvj"],
+                                ),
+                                build_at=datetime(2023, 5, 25),
+                                repository=BentoRepositorySchema(
+                                    uid="",
+                                    created_at="",
+                                    updated_at=None,
+                                    deleted_at=None,
+                                    name="abc",
+                                    resource_type=ResourceType.BENTO_REPOSITORY,
+                                    labels=[],
+                                    description="",
+                                    latest_bento="",
+                                ),
+                            ),
+                            config=DeploymentTargetConfigV2(
+                                access_type=AccessControl.PUBLIC,
+                                envs=[
+                                    LabelItemSchema(key="env_key", value="env_value")
+                                ],
+                                services={
+                                    "irisclassifier": DeploymentServiceConfig(
+                                        instance_type="t3-small",
+                                        scaling=DeploymentTargetHPAConf(
+                                            min_replicas=1, max_replicas=1
+                                        ),
+                                        deployment_strategy=DeploymentStrategy.RollingUpdate,
+                                    ),
+                                    "preprocessing": DeploymentServiceConfig(
+                                        instance_type="t3-small",
+                                        scaling=DeploymentTargetHPAConf(
+                                            min_replicas=1, max_replicas=1
+                                        ),
+                                        deployment_strategy=DeploymentStrategy.RollingUpdate,
+                                    ),
+                                },
+                            ),
                             uid="",
-                            created_at="",
+                            created_at=datetime(2023, 5, 1),
                             updated_at=None,
                             deleted_at=None,
-                            name="iris_classifier",
-                            resource_type=ResourceType.BENTO_REPOSITORY,
+                            name="",
+                            resource_type=ResourceType.DEPLOYMENT_REVISION,
                             labels=[],
-                            description="",
-                            latest_bento="",
-                        ),
-                    ),
-                    config=DeploymentTargetConfig(
-                        resource_instance="t3-micro",
-                        enable_ingress=True,
-                        hpa_conf=DeploymentTargetHPAConf(
-                            min_replicas=2, max_replicas=10
-                        ),
-                        runners={
-                            "runner1": DeploymentTargetRunnerConfig(
-                                resource_instance="t3-small",
-                                hpa_conf=DeploymentTargetHPAConf(
-                                    min_replicas=3, max_replicas=10
+                            creator=user,
+                        )
+                    ],
+                    uid="",
+                    created_at=datetime(2023, 5, 1),
+                    updated_at=None,
+                    deleted_at=None,
+                    name="test=xxx",
+                    resource_type=ResourceType.DEPLOYMENT_REVISION,
+                    labels=[],
+                    creator=user,
+                    status=DeploymentRevisionStatus.ACTIVE,
+                ),
+                uid="",
+                created_at=datetime(2023, 5, 1),
+                updated_at=None,
+                deleted_at=None,
+                name="test=xxx",
+                resource_type=ResourceType.DEPLOYMENT_REVISION,
+                labels=[],
+                creator=user,
+                status=DeploymentStatus.Running,
+                cluster=ClusterSchema(
+                    uid="",
+                    name="default",
+                    resource_type=ResourceType.CLUSTER,
+                    labels=[],
+                    description="",
+                    creator=user,
+                    created_at=datetime(2023, 5, 1),
+                    updated_at=None,
+                    deleted_at=None,
+                ),
+                kube_namespace="",
+            )
+
+        else:
+            return DeploymentFullSchemaV2(
+                distributed=False,
+                latest_revision=DeploymentRevisionSchemaV2(
+                    targets=[
+                        DeploymentTargetSchemaV2(
+                            bento=BentoFullSchema(
+                                uid="",
+                                created_at=datetime(2023, 5, 25),
+                                updated_at=None,
+                                deleted_at=None,
+                                name="123",
+                                resource_type=ResourceType.BENTO,
+                                labels=[],
+                                description="",
+                                version="",
+                                image_build_status=BentoImageBuildStatus.PENDING,
+                                upload_status=BentoUploadStatus.SUCCESS,
+                                upload_finished_reason="",
+                                presigned_upload_url="",
+                                presigned_download_url="",
+                                manifest=BentoManifestSchema(
+                                    service="",
+                                    bentoml_version="",
+                                    size_bytes=0,
+                                    apis={},
+                                    models=["iris_clf:ddaex6h2vw6kwcvj"],
+                                ),
+                                build_at=datetime(2023, 5, 25),
+                                repository=BentoRepositorySchema(
+                                    uid="",
+                                    created_at="",
+                                    updated_at=None,
+                                    deleted_at=None,
+                                    name="abc",
+                                    resource_type=ResourceType.BENTO_REPOSITORY,
+                                    labels=[],
+                                    description="",
+                                    latest_bento="",
                                 ),
                             ),
-                            "runner2": DeploymentTargetRunnerConfig(
-                                resource_instance="t3-medium",
-                                hpa_conf=DeploymentTargetHPAConf(
-                                    min_replicas=5, max_replicas=10
+                            config=DeploymentTargetConfigV2(
+                                access_type=AccessControl.PUBLIC,
+                                scaling=DeploymentTargetHPAConf(
+                                    min_replicas=3, max_replicas=5
                                 ),
+                                deployment_strategy=DeploymentStrategy.RollingUpdate,
+                                envs=[
+                                    LabelItemSchema(key="env_key", value="env_value")
+                                ],
                             ),
-                        },
-                    ),
-                    canary_rules=[],
+                            uid="",
+                            created_at=datetime(2023, 5, 1),
+                            updated_at=None,
+                            deleted_at=None,
+                            name="",
+                            resource_type=ResourceType.DEPLOYMENT_REVISION,
+                            labels=[],
+                            creator=user,
+                        )
+                    ],
                     uid="",
                     created_at=datetime(2023, 5, 1),
                     updated_at=None,
                     deleted_at=None,
-                    name="",
+                    name="test=xxx",
                     resource_type=ResourceType.DEPLOYMENT_REVISION,
                     labels=[],
                     creator=user,
-                )
-            ],
-            uid="",
-            created_at=datetime(2023, 5, 1),
-            updated_at=None,
-            deleted_at=None,
-            name="test=xxx",
-            resource_type=ResourceType.DEPLOYMENT_REVISION,
-            labels=[],
-            creator=user,
-            status=DeploymentRevisionStatus.ACTIVE,
-        ),
-        uid="",
-        created_at=datetime(2023, 5, 1),
-        updated_at=None,
-        deleted_at=None,
-        name="test=xxx",
-        resource_type=ResourceType.DEPLOYMENT_REVISION,
-        labels=[],
-        creator=user,
-        status=DeploymentStatus.Running,
-        cluster=ClusterSchema(
-            uid="",
-            name="default",
-            resource_type=ResourceType.CLUSTER,
-            labels=[],
-            description="",
-            creator=user,
-            created_at=datetime(2023, 5, 1),
-            updated_at=None,
-            deleted_at=None,
-        ),
-        kube_namespace="",
-    )
-
-
-@pytest.fixture(scope="function", name="cloudclient")
-def fixture_cloudclient() -> BentoCloudClient:
-    return BentoCloudClient()
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
-):
-    mock_create_deployment.side_effect = f_create
-
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx", bento="iris_classifier:dqjxjyx2vweogcvj"
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-    )
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_canary_rules(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
-):
-    mock_create_deployment.side_effect = f_create
-    rules = [
-        DeploymentTargetCanaryRule(DeploymentTargetCanaryRuleType.WEIGHT, 3, "", "", "")
-    ]
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        canary_rules=rules,
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(),
-                canary_rules=rules,
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-    )
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_labels(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
-):
-    mock_create_deployment.side_effect = f_create
-
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        labels={"user": "steve"},
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(),
+                    status=DeploymentRevisionStatus.ACTIVE,
+                ),
+                uid="",
+                created_at=datetime(2023, 5, 1),
+                updated_at=None,
+                deleted_at=None,
+                name="test=xxx",
+                resource_type=ResourceType.DEPLOYMENT_REVISION,
+                labels=[],
+                creator=user,
+                status=DeploymentStatus.Running,
+                cluster=ClusterSchema(
+                    uid="",
+                    name="default",
+                    resource_type=ResourceType.CLUSTER,
+                    labels=[],
+                    description="",
+                    creator=user,
+                    created_at=datetime(2023, 5, 1),
+                    updated_at=None,
+                    deleted_at=None,
+                ),
+                kube_namespace="",
             )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-        labels=[LabelItemSchema("user", "steve")],
-    )
 
-
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_resource_instance(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
-):
-    mock_create_deployment.side_effect = f_create
-
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        resource_instance="test-instance",
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(resource_instance="test-instance"),
+    client = RestApiClient("", "")
+    user = UserSchema(name="", email="", first_name="", last_name="")
+    client.v2.create_deployment = dummy_create_deployment  # type: ignore
+    client.v2.update_deployment = dummy_update_deployment  # type: ignore
+    client.v1.get_cluster_list = lambda params: ClusterListSchema(
+        start=0,
+        count=0,
+        total=0,
+        items=[
+            ClusterSchema(
+                uid="",
+                name="default",
+                resource_type=ResourceType.CLUSTER,
+                labels=[],
+                description="",
+                creator=user,
+                created_at=datetime(2023, 5, 1),
+                updated_at=None,
+                deleted_at=None,
             )
         ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-    )
+    )  # type: ignore
 
+    client.v2.get_deployment = dummy_get_deployment
 
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_resource_instance_runner(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
-):
-    mock_create_deployment.side_effect = f_create
-    runner = Resource.for_runner(enable_debug_mode=True)
-
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        resource_instance="test-instance",
-        runners_config={"runner": runner},
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(
-                    resource_instance="test-instance",
-                    runners={
-                        "runner": DeploymentTargetRunnerConfig(
-                            resource_instance="test-instance", enable_debug_mode=True
-                        )
-                    },
-                ),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-    )
+    return client
 
 
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_resource_instance_api_server(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
-):
-    mock_create_deployment.side_effect = f_create
-    api_server = Resource.for_api_server(enable_ingress=True)
-
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        resource_instance="test-resource",
-        api_server_config=api_server,
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(
-                    resource_instance="test-resource", enable_ingress=True
-                ),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_create_deployment(mock_get_client: MagicMock, rest_client: RestApiClient):
+    mock_get_client.return_value = rest_client
+    deployment = Deployment.create(bento="abc:123")
+    # assert expected schema
+    assert deployment._schema == CreateDeploymentSchemaV2(
+        scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=1),
+        bento="abc:123",
+        name="",
+        cluster="default",
+        access_type=AccessControl.PUBLIC,
+        distributed=False,
     )
 
 
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_api_server(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_create_deployment_custom_standalone(
+    mock_get_client: MagicMock, rest_client: RestApiClient
 ):
-    mock_create_deployment.side_effect = f_create
-    api_server_conf = Resource.for_api_server(resource_instance="t3-micro")
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        api_server_config=api_server_conf,
-    )
-
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(resource_instance="t3-micro"),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-    )
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_hpa_conf(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
+    mock_get_client.return_value = rest_client
+    deployment = Deployment.create(
+        bento="abc:123",
+        name="custom-name",
+        scaling_min=2,
+        scaling_max=4,
+        access_type="private",
+        cluster_name="custom-cluster",
+        envs=[{"key": "env_key", "value": "env_value"}],
+        strategy="RollingUpdate",
+    )
+    # assert expected schema
+    assert deployment._schema == CreateDeploymentSchemaV2(
+        bento="abc:123",
+        name="custom-name",
+        cluster="custom-cluster",
+        access_type=AccessControl.PRIVATE,
+        scaling=DeploymentTargetHPAConf(min_replicas=2, max_replicas=4),
+        distributed=False,
+        deployment_strategy=DeploymentStrategy.RollingUpdate,
+        envs=[LabelItemSchema(key="env_key", value="env_value")],
+    )
+
+
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_create_deployment_scailing_only_min(
+    mock_get_client: MagicMock, rest_client: RestApiClient
 ):
-    mock_create_deployment.side_effect = f_create
-    hpa_conf = Resource.for_hpa_conf(min_replicas=2, max_replicas=10)
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        hpa_conf=hpa_conf,
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(hpa_conf=hpa_conf),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-    )
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_runner(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
+    mock_get_client.return_value = rest_client
+    deployment = Deployment.create(bento="abc:123", scaling_min=3)
+    # assert expected schema
+    assert deployment._schema == CreateDeploymentSchemaV2(
+        bento="abc:123",
+        name="",
+        cluster="default",
+        access_type=AccessControl.PUBLIC,
+        scaling=DeploymentTargetHPAConf(min_replicas=3, max_replicas=3),
+        distributed=False,
+    )
+
+
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_create_deployment_scailing_only_max(
+    mock_get_client: MagicMock, rest_client: RestApiClient
 ):
-    mock_create_deployment.side_effect = f_create
-    runner = Resource.for_runner(resource_instance="t3-micro", enable_debug_mode=True)
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        runners_config={"runner1": runner},
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(runners={"runner1": runner}),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-    )
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_runner_hpa_conf(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
+    mock_get_client.return_value = rest_client
+    deployment = Deployment.create(bento="abc:123", scaling_max=3)
+    # assert expected schema
+    assert deployment._schema == CreateDeploymentSchemaV2(
+        bento="abc:123",
+        name="",
+        cluster="default",
+        access_type=AccessControl.PUBLIC,
+        scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=3),
+        distributed=False,
+    )
+
+
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_create_deployment_scailing_mismatch_min_max(
+    mock_get_client: MagicMock, rest_client: RestApiClient
 ):
-    mock_create_deployment.side_effect = f_create
-    hpa_conf = Resource.for_hpa_conf(min_replicas=2, max_replicas=10)
-    runner = Resource.for_runner(resource_instance="t3-micro", enable_debug_mode=True)
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        runners_config={"runner1": runner},
-        hpa_conf=hpa_conf,
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(
-                    hpa_conf=hpa_conf,
-                    runners={
-                        "runner1": DeploymentTargetRunnerConfig(
-                            resource_instance="t3-micro",
-                            hpa_conf=hpa_conf,
-                            enable_debug_mode=True,
-                        )
-                    },
-                ),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-    )
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_api_server_runner(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
+    mock_get_client.return_value = rest_client
+    deployment = Deployment.create(bento="abc:123", scaling_min=3, scaling_max=2)
+    # assert expected schema
+    assert deployment._schema == CreateDeploymentSchemaV2(
+        bento="abc:123",
+        name="",
+        cluster="default",
+        access_type=AccessControl.PUBLIC,
+        scaling=DeploymentTargetHPAConf(min_replicas=2, max_replicas=2),
+        distributed=False,
+    )
+
+
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_create_deployment_config_dct(
+    mock_get_client: MagicMock, rest_client: RestApiClient
 ):
-    mock_create_deployment.side_effect = f_create
-    api_server = Resource.for_api_server(
-        resource_instance="t3-micro", enable_stealing_traffic_debug_mode=True
-    )
-    runner = Resource.for_runner(resource_instance="t3-micro", enable_debug_mode=True)
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        runners_config={"runner1": runner},
-        api_server_config=api_server,
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(
-                    resource_instance="t3-micro",
-                    enable_stealing_traffic_debug_mode=True,
-                    runners={
-                        "runner1": DeploymentTargetRunnerConfig(
-                            resource_instance="t3-micro", enable_debug_mode=True
-                        )
-                    },
-                ),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-    )
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_api_server_hpa_conf(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
+    mock_get_client.return_value = rest_client
+    config_dct = {
+        "services": {
+            "irisclassifier": {"scaling": {"max_replicas": 2, "min_replicas": 1}},
+            "preprocessing": {"scaling": {"max_replicas": 2}},
+        },
+        "envs": [{"key": "env_key", "value": "env_value"}],
+        "bentoml_config_overrides": {
+            "irisclassifier": {
+                "resources": {
+                    "cpu": "300m",
+                    "memory": "500m",
+                },
+            }
+        },
+    }
+    deployment = Deployment.create(bento="abc:123", config_dct=config_dct)
+    # assert expected schema
+    assert deployment._schema == CreateDeploymentSchemaV2(
+        bento="abc:123",
+        name="",
+        cluster="default",
+        access_type=AccessControl.PUBLIC,
+        distributed=True,
+        services={
+            "irisclassifier": DeploymentServiceConfig(
+                scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=2)
+            ),
+            "preprocessing": DeploymentServiceConfig(
+                scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=2)
+            ),
+        },
+        envs=[LabelItemSchema(key="env_key", value="env_value")],
+        bentoml_config_overrides={
+            "irisclassifier": {
+                "resources": {
+                    "cpu": "300m",
+                    "memory": "500m",
+                },
+            }
+        },
+    )
+
+
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_update_deployment(mock_get_client: MagicMock, rest_client: RestApiClient):
+    mock_get_client.return_value = rest_client
+    deployment = Deployment.update(
+        name="test",
+        bento="abc:1234",
+        access_type="private",
+        envs=[{"key": "env_key2", "value": "env_value2"}],
+        strategy="Recreate",
+    )
+    # assert expected schema
+    assert deployment._schema == DummyUpdateSchema(
+        bento="abc:1234",
+        access_type=AccessControl.PRIVATE,
+        scaling=DeploymentTargetHPAConf(min_replicas=3, max_replicas=5),
+        deployment_strategy=DeploymentStrategy.Recreate,
+        envs=[LabelItemSchema(key="env_key2", value="env_value2")],
+    )
+
+
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_update_deployment_scaling_only_min(
+    mock_get_client: MagicMock, rest_client: RestApiClient
 ):
-    mock_create_deployment.side_effect = f_create
-    api_server = Resource.for_api_server(resource_instance="t3-micro")
-    hpa_conf = Resource.for_hpa_conf(min_replicas=2, max_replicas=10)
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        api_server_config=api_server,
-        hpa_conf=hpa_conf,
-    )
-    assert res == CreateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(
-                    resource_instance="t3-micro", hpa_conf=hpa_conf
-                ),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
+    mock_get_client.return_value = rest_client
+    deployment = Deployment.update(name="test", scaling_min=1)
+    # assert expected schema
+    assert deployment._schema == DummyUpdateSchema(
+        bento="abc:123",
+        access_type=AccessControl.PUBLIC,
+        scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=5),
+        deployment_strategy=DeploymentStrategy.RollingUpdate,
+        envs=[LabelItemSchema(key="env_key", value="env_value")],
     )
 
 
-@patch("bentoml._internal.cloud.deployment.Deployment._create_deployment")
-def test_create_deployment_api_server_runner_hpa_conf(
-    mock_create_deployment: MagicMock, cloudclient: BentoCloudClient
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_update_deployment_scaling_only_max(
+    mock_get_client: MagicMock, rest_client: RestApiClient
 ):
-    mock_create_deployment.side_effect = f_create
-    api_server = Resource.for_api_server(resource_instance="t3-micro")
-    runner = Resource.for_runner(
-        resource_instance="t3-small", hpa_conf={"min_replicas": 3}
-    )
-    runner2 = Resource.for_runner(
-        resource_instance="t3-medium", hpa_conf={"min_replicas": 5}
-    )
-    hpa_conf = Resource.for_hpa_conf(min_replicas=2, max_replicas=10)
-    res = cloudclient.deployment.create(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        api_server_config=api_server,
-        hpa_conf=hpa_conf,
-        runners_config={"runner1": runner, "runner2": runner2},
-        expose_endpoint=True,
-        labels={"user": "steve"},
-    )
-    assert res == CreateDeploymentSchema(
-        labels=[LabelItemSchema("user", "steve")],
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="dqjxjyx2vweogcvj",
-                config=DeploymentTargetConfig(
-                    resource_instance="t3-micro",
-                    enable_ingress=True,
-                    hpa_conf=hpa_conf,
-                    runners={
-                        "runner1": DeploymentTargetRunnerConfig(
-                            resource_instance="t3-small",
-                            hpa_conf=DeploymentTargetHPAConf(
-                                min_replicas=3, max_replicas=10
-                            ),
-                        ),
-                        "runner2": DeploymentTargetRunnerConfig(
-                            resource_instance="t3-medium",
-                            hpa_conf=DeploymentTargetHPAConf(
-                                min_replicas=5, max_replicas=10
-                            ),
-                        ),
-                    },
-                ),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        name="test-xxx",
-    )
-
-
-@pytest.fixture(name="update_schema", scope="function")
-def fixture_update_schema() -> UpdateDeploymentSchema:
-    return UpdateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="12345",
-                config=DeploymentTargetConfig(
-                    resource_instance="t3-micro",
-                    enable_ingress=True,
-                    hpa_conf=DeploymentTargetHPAConf(min_replicas=2, max_replicas=10),
-                    runners={
-                        "runner1": DeploymentTargetRunnerConfig(
-                            resource_instance="t3-small",
-                            hpa_conf=DeploymentTargetHPAConf(
-                                min_replicas=3, max_replicas=10
-                            ),
-                        ),
-                        "runner2": DeploymentTargetRunnerConfig(
-                            resource_instance="t3-medium",
-                            hpa_conf=DeploymentTargetHPAConf(
-                                min_replicas=5, max_replicas=10
-                            ),
-                        ),
-                    },
-                ),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        labels=[],
+    mock_get_client.return_value = rest_client
+    deployment = Deployment.update(name="test", scaling_max=3)
+    # assert expected schema
+    assert deployment._schema == DummyUpdateSchema(
+        bento="abc:123",
+        access_type=AccessControl.PUBLIC,
+        scaling=DeploymentTargetHPAConf(min_replicas=3, max_replicas=3),
+        deployment_strategy=DeploymentStrategy.RollingUpdate,
+        envs=[LabelItemSchema(key="env_key", value="env_value")],
     )
 
 
-@patch("bentoml._internal.cloud.deployment.Deployment.get")
-@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment")
-def test_update_deployment_bento(
-    mock_update_deployment: MagicMock,
-    mock_get: MagicMock,
-    update_schema: UpdateDeploymentSchema,
-    get_schema: DeploymentSchema,
-    cloudclient: BentoCloudClient,
-):
-    mock_update_deployment.side_effect = f_update
-    mock_get.return_value = get_schema
-    res = cloudclient.deployment.update(
-        deployment_name="test-xxx",
-        bento="iris_classifier:dqjxjyx2vweogcvj",
-        cluster_name="",
-        kube_namespace="",
-    )
-    update_schema.targets[0].bento = "dqjxjyx2vweogcvj"
-    assert res == update_schema
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment.get")
-@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment")
-def test_update_deployment_runner(
-    mock_update_deployment: MagicMock,
-    mock_get: MagicMock,
-    update_schema: UpdateDeploymentSchema,
-    get_schema: DeploymentSchema,
-    cloudclient: BentoCloudClient,
-):
-    mock_update_deployment.side_effect = f_update
-    mock_get.return_value = get_schema
-    new_runnner = Resource.for_runner(
-        resource_instance="new-resource", hpa_conf={"min_replicas": 6}
-    )
-    res = cloudclient.deployment.update(
-        deployment_name="test-xxx",
-        cluster_name="",
-        kube_namespace="",
-        runners_config={"runner1": new_runnner},
-    )
-    update_schema.targets[0].config.runners["runner1"].hpa_conf.min_replicas = 6
-    update_schema.targets[0].config.runners[
-        "runner1"
-    ].resource_instance = "new-resource"
-    assert res == update_schema
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment.get")
-@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment")
-def test_update_deployment_runner_hpa_conf(
-    mock_update_deployment: MagicMock,
-    mock_get: MagicMock,
-    update_schema: UpdateDeploymentSchema,
-    get_schema: DeploymentSchema,
-    cloudclient: BentoCloudClient,
-):
-    mock_update_deployment.side_effect = f_update
-    mock_get.return_value = get_schema
-    hpa_conf = Resource.for_hpa_conf(min_replicas=5)
-    new_runnner = Resource.for_runner(
-        resource_instance="new-resource", hpa_conf={"min_replicas": 7}
-    )
-    res = cloudclient.deployment.update(
-        deployment_name="test-xxx",
-        cluster_name="",
-        kube_namespace="",
-        runners_config={"runner1": new_runnner},
-        hpa_conf=hpa_conf,
-    )
-    update_schema.targets[0].config.hpa_conf.min_replicas = 5
-    for k, v in update_schema.targets[0].config.runners.items():
-        if k == "runner1":
-            v.hpa_conf.min_replicas = 7
-            v.resource_instance = "new-resource"
-        else:
-            v.hpa_conf.min_replicas = 5
-    assert res == update_schema
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment.get")
-@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment")
-def test_update_deployment_api_server(
-    mock_update_deployment: MagicMock,
-    mock_get: MagicMock,
-    update_schema: UpdateDeploymentSchema,
-    get_schema: DeploymentSchema,
-    cloudclient: BentoCloudClient,
-):
-    mock_update_deployment.side_effect = f_update
-    mock_get.return_value = get_schema
-    api_server = Resource.for_api_server(
-        enable_ingress=False, hpa_conf={"min_replicas": 5}
-    )
-    res = cloudclient.deployment.update(
-        deployment_name="test-xxx",
-        cluster_name="",
-        kube_namespace="",
-        api_server_config=api_server,
-    )
-    update_schema.targets[0].config.hpa_conf.min_replicas = 5
-    update_schema.targets[0].config.enable_ingress = False
-    assert res == update_schema
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment.get")
-@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment")
-def test_update_deployment_api_server_hpa_conf(
-    mock_update_deployment: MagicMock,
-    mock_get: MagicMock,
-    update_schema: UpdateDeploymentSchema,
-    get_schema: DeploymentSchema,
-    cloudclient: BentoCloudClient,
-):
-    mock_update_deployment.side_effect = f_update
-    mock_get.return_value = get_schema
-    api_server = Resource.for_api_server(hpa_conf={"min_replicas": 9})
-    hpa_conf = Resource.for_hpa_conf(min_replicas=8)
-    res = cloudclient.deployment.update(
-        deployment_name="test-xxx",
-        cluster_name="",
-        kube_namespace="",
-        api_server_config=api_server,
-        hpa_conf=hpa_conf,
-    )
-    UpdateDeploymentSchema(
-        targets=[
-            CreateDeploymentTargetSchema(
-                type=DeploymentTargetType.STABLE,
-                bento_repository="iris_classifier",
-                bento="12345",
-                config=DeploymentTargetConfig(
-                    resource_instance="t3-micro",
-                    enable_ingress=True,
-                    hpa_conf=DeploymentTargetHPAConf(min_replicas=2, max_replicas=10),
-                    runners={
-                        "runner1": DeploymentTargetRunnerConfig(
-                            resource_instance="t3-small",
-                            hpa_conf=DeploymentTargetHPAConf(
-                                min_replicas=3, max_replicas=10
-                            ),
-                        ),
-                        "runner2": DeploymentTargetRunnerConfig(
-                            resource_instance="t3-medium",
-                            hpa_conf=DeploymentTargetHPAConf(
-                                min_replicas=5, max_replicas=10
-                            ),
-                        ),
-                    },
-                ),
-            )
-        ],
-        mode=DeploymentMode.Function,
-        labels=[],
-    )
-    update_schema.targets[0].config.hpa_conf.min_replicas = 9
-    for _, v in update_schema.targets[0].config.runners.items():
-        v.hpa_conf.min_replicas = 8
-    assert res == update_schema
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment.get")
-@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment")
-def test_update_deployment_resource_instance(
-    mock_update_deployment: MagicMock,
-    mock_get: MagicMock,
-    update_schema: UpdateDeploymentSchema,
-    get_schema: DeploymentSchema,
-    cloudclient: BentoCloudClient,
-):
-    mock_update_deployment.side_effect = f_update
-    mock_get.return_value = get_schema
-    res = cloudclient.deployment.update(
-        deployment_name="test-xxx",
-        cluster_name="",
-        kube_namespace="",
-        resource_instance="test-resource",
-    )
-    update_schema.targets[0].config.resource_instance = "test-resource"
-    for _, v in update_schema.targets[0].config.runners.items():
-        v.resource_instance = "test-resource"
-    assert res == update_schema
-
-
-@patch("bentoml._internal.cloud.deployment.Deployment.get")
-@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment")
-def test_update_deployment_labels(
-    mock_update_deployment: MagicMock,
-    mock_get: MagicMock,
-    update_schema: UpdateDeploymentSchema,
-    get_schema: DeploymentSchema,
-    cloudclient: BentoCloudClient,
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_update_deployment_scaling_too_big_min(
+    mock_get_client: MagicMock, rest_client: RestApiClient
 ):
-    mock_update_deployment.side_effect = f_update
-    mock_get.return_value = get_schema
-    res = cloudclient.deployment.update(
-        deployment_name="test-xxx",
-        cluster_name="",
-        kube_namespace="",
-        labels={"user": "steve"},
+    mock_get_client.return_value = rest_client
+    deployment = Deployment.update(name="test", scaling_min=10)
+    # assert expected schema
+    assert deployment._schema == DummyUpdateSchema(
+        bento="abc:123",
+        access_type=AccessControl.PUBLIC,
+        scaling=DeploymentTargetHPAConf(min_replicas=5, max_replicas=5),
+        deployment_strategy=DeploymentStrategy.RollingUpdate,
+        envs=[LabelItemSchema(key="env_key", value="env_value")],
     )
-    assert res == attr.evolve(update_schema, labels=[LabelItemSchema("user", "steve")])
 
 
-@patch("bentoml._internal.cloud.deployment.Deployment.get")
-@patch("bentoml._internal.cloud.deployment.Deployment._update_deployment")
-def test_update_deployment_canary_rules(
-    mock_update_deployment: MagicMock,
-    mock_get: MagicMock,
-    update_schema: UpdateDeploymentSchema,
-    get_schema: DeploymentSchema,
-    cloudclient: BentoCloudClient,
+@patch("bentoml._internal.cloud.deployment.get_rest_api_client")
+def test_update_deployment_distributed(
+    mock_get_client: MagicMock, rest_client: RestApiClient
 ):
-    mock_update_deployment.side_effect = f_update
-    mock_get.return_value = get_schema
-    rules = [
-        DeploymentTargetCanaryRule(DeploymentTargetCanaryRuleType.WEIGHT, 3, "", "", "")
-    ]
-    res = cloudclient.deployment.update(
-        deployment_name="test-xxx",
-        cluster_name="",
-        kube_namespace="",
-        canary_rules=rules,
+    mock_get_client.return_value = rest_client
+    config_dct = {
+        "services": {
+            "irisclassifier": {"scaling": {"max_replicas": 50}},
+            "preprocessing": {"instance_type": "t3-large"},
+        }
+    }
+    deployment = Deployment.update(name="test-distributed", config_dct=config_dct)
+    # assert expected schema
+    assert deployment._schema == DummyUpdateSchema(
+        bento="abc:123",
+        access_type=AccessControl.PUBLIC,
+        envs=[LabelItemSchema(key="env_key", value="env_value")],
+        services={
+            "irisclassifier": DeploymentServiceConfig(
+                instance_type="t3-small",
+                scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=50),
+                deployment_strategy=DeploymentStrategy.RollingUpdate,
+            ),
+            "preprocessing": DeploymentServiceConfig(
+                instance_type="t3-large",
+                scaling=DeploymentTargetHPAConf(min_replicas=1, max_replicas=1),
+                deployment_strategy=DeploymentStrategy.RollingUpdate,
+            ),
+        },
     )
-    update_schema.targets[0].canary_rules = rules
-    assert res == update_schema