diff --git a/cli/dstack/_internal/core/error.py b/cli/dstack/_internal/core/error.py index 23c5a456e..aeeec244b 100644 --- a/cli/dstack/_internal/core/error.py +++ b/cli/dstack/_internal/core/error.py @@ -12,12 +12,14 @@ class BackendError(DstackError): class BackendAuthError(BackendError): code = "invalid_backend_credentials" - message = "Backend credentials are invalid" + + +class BackendNotAvailableError(BackendError): + code = "backend_not_available" class NoMatchingInstanceError(BackendError): code = "no_matching_instance" - message = "No instance type matching requirements" class RepoNotInitializedError(DstackError): diff --git a/cli/dstack/_internal/hub/background/tasks/resubmit_jobs.py b/cli/dstack/_internal/hub/background/tasks/resubmit_jobs.py index 88a45e72a..5e0a9b205 100644 --- a/cli/dstack/_internal/hub/background/tasks/resubmit_jobs.py +++ b/cli/dstack/_internal/hub/background/tasks/resubmit_jobs.py @@ -31,8 +31,10 @@ async def _resubmit_projects_jobs(projects: List[Project]): "Credentials for %s project are invalid. Skipping job resubmission.", project.name ) continue - configurator = get_configurator(backend.name) - if configurator is None: + if backend is None or get_configurator(backend.name) is None: + logger.warning( + "Missing dependencies for %s. Skipping job resubmission.", project.backend + ) continue await run_async(_resubmit_backend_jobs, backend) logger.info("Finished resubmitting jobs for %s project", project.name) diff --git a/cli/dstack/_internal/hub/routers/projects.py b/cli/dstack/_internal/hub/routers/projects.py index b7eade317..d324021c5 100644 --- a/cli/dstack/_internal/hub/routers/projects.py +++ b/cli/dstack/_internal/hub/routers/projects.py @@ -114,9 +114,9 @@ async def update_project( configurator = get_backend_configurator(project_info.backend.__root__.type) try: await run_async(configurator.configure_project, project_info.backend.__root__) + await ProjectManager.update_project_from_info(project_info) except BackendConfigError as e: _error_response_on_config_error(e, path_to_config=["backend"]) - await ProjectManager.update_project_from_info(project_info) clear_backend_cache(project_info.project_name) return project_info diff --git a/cli/dstack/_internal/hub/routers/runners.py b/cli/dstack/_internal/hub/routers/runners.py index eb7c4fb7b..3b3932eee 100644 --- a/cli/dstack/_internal/hub/routers/runners.py +++ b/cli/dstack/_internal/hub/routers/runners.py @@ -26,7 +26,7 @@ async def run_runners(project_name: str, job: Job): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=error_detail( - NoMatchingInstanceError.message, code=NoMatchingInstanceError.code + "No instance type matching requirements", code=NoMatchingInstanceError.code ), ) except BuildNotFoundError as e: diff --git a/cli/dstack/_internal/hub/routers/util.py b/cli/dstack/_internal/hub/routers/util.py index 909e2e4e2..1058e6de7 100644 --- a/cli/dstack/_internal/hub/routers/util.py +++ b/cli/dstack/_internal/hub/routers/util.py @@ -3,7 +3,7 @@ from fastapi import HTTPException, status from dstack._internal.backend.base import Backend -from dstack._internal.core.error import BackendAuthError +from dstack._internal.core.error import BackendAuthError, BackendNotAvailableError from dstack._internal.hub.models import Project from dstack._internal.hub.repository.projects import ProjectManager from dstack._internal.hub.services.backends import cache as backends_cache @@ -28,7 +28,7 @@ async def get_backend(project: Project) -> Optional[Backend]: except BackendAuthError: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail=error_detail(BackendAuthError.message, code=BackendAuthError.code), + detail=error_detail("Backend credentials are invalid", code=BackendAuthError.code), ) @@ -37,7 +37,10 @@ def get_backend_configurator(backend_type: str) -> Configurator: if configurator is None: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail=error_detail(f"Backend {backend_type} not available"), + detail=error_detail( + f"Backend {backend_type} not available. Ensure the dependencies for {backend_type} are installed.", + code=BackendNotAvailableError.code, + ), ) return configurator diff --git a/cli/dstack/_internal/hub/services/backends/aws/configurator.py b/cli/dstack/_internal/hub/services/backends/aws/configurator.py index 7118c4508..51a21dc46 100644 --- a/cli/dstack/_internal/hub/services/backends/aws/configurator.py +++ b/cli/dstack/_internal/hub/services/backends/aws/configurator.py @@ -174,11 +174,15 @@ def _get_hub_extra_regions_element( def _get_hub_buckets_element( self, session: Session, region: str, selected: Optional[str] ) -> AWSBucketProjectElement: - if selected is not None: + if selected: self._validate_hub_bucket(session=session, region=region, bucket_name=selected) element = AWSBucketProjectElement(selected=selected) s3_client = session.client("s3") - response = s3_client.list_buckets() + try: + response = s3_client.list_buckets() + except botocore.exceptions.ClientError: + # We'll suggest no buckets if the user has no permission to list them + return element for bucket in response["Buckets"]: element.values.append( AWSBucketProjectElementValue( diff --git a/cli/dstack/_internal/hub/services/backends/lambdalabs/configurator.py b/cli/dstack/_internal/hub/services/backends/lambdalabs/configurator.py index 5d3841e3a..9c0fcf59b 100644 --- a/cli/dstack/_internal/hub/services/backends/lambdalabs/configurator.py +++ b/cli/dstack/_internal/hub/services/backends/lambdalabs/configurator.py @@ -2,6 +2,7 @@ from typing import Dict, List, Optional, Tuple, Union import botocore +import botocore.exceptions from boto3.session import Session from requests import HTTPError @@ -192,7 +193,11 @@ def _get_aws_bucket_element( ) -> ProjectElement: element = ProjectElement(selected=selected) s3_client = session.client("s3") - response = s3_client.list_buckets() + try: + response = s3_client.list_buckets() + except botocore.exceptions.ClientError: + # We'll suggest no buckets if the user has no permission to list them + return element bucket_names = [] for bucket in response["Buckets"]: bucket_names.append(bucket["Name"]) @@ -226,6 +231,16 @@ def _get_aws_storage_backend_config_data( def _get_aws_bucket_region(self, session: Session, bucket: str) -> str: s3_client = session.client("s3") - response = s3_client.head_bucket(Bucket=bucket) + try: + response = s3_client.head_bucket(Bucket=bucket) + except botocore.exceptions.ClientError: + raise BackendConfigError( + "Permissions for getting bucket region are required", + code="permissions_error", + fields=[ + ["storage_backend", "credentials", "access_key"], + ["storage_backend", "credentials", "secret_key"], + ], + ) region = response["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"] return region diff --git a/cli/dstack/api/hub/_api_client.py b/cli/dstack/api/hub/_api_client.py index 07f18c188..271473d7b 100644 --- a/cli/dstack/api/hub/_api_client.py +++ b/cli/dstack/api/hub/_api_client.py @@ -6,7 +6,7 @@ from dstack._internal.core.artifact import Artifact from dstack._internal.core.build import BuildNotFoundError -from dstack._internal.core.error import NoMatchingInstanceError +from dstack._internal.core.error import BackendNotAvailableError, NoMatchingInstanceError from dstack._internal.core.job import Job, JobHead from dstack._internal.core.log_event import LogEvent from dstack._internal.core.plan import RunPlan @@ -675,6 +675,12 @@ def _make_hub_request(request_func, host, *args, **kwargs) -> requests.Response: raise HubClientError( f"Got 500 Server Error from hub: {url}. Check server logs for details." ) + elif resp.status_code == 400: + body = resp.json() + detail = body.get("detail") + if detail is not None: + if detail.get("code") == BackendNotAvailableError.code: + raise HubClientError(detail["msg"]) return resp except requests.ConnectionError: raise HubClientError(f"Cannot connect to hub at {host}") diff --git a/docs/docs/reference/backends/aws.md b/docs/docs/reference/backends/aws.md index d90e8bd8a..eafe478be 100644 --- a/docs/docs/reference/backends/aws.md +++ b/docs/docs/reference/backends/aws.md @@ -38,16 +38,26 @@ services. { "Effect": "Allow", "Action": [ - "s3:PutObject", - "s3:GetObject", - "s3:DeleteObject", - "s3:ListBucket", - "s3:GetLifecycleConfiguration", - "s3:PutLifecycleConfiguration", - "s3:PutObjectTagging", - "s3:GetObjectTagging", - "s3:DeleteObjectTagging", - "s3:GetBucketAcl" + "s3:ListAllMyBuckets", + "s3:GetBucketLocation" + ], + "Resource": [ + "arn:aws:s3:::*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:GetObject", + "s3:DeleteObject", + "s3:ListBucket", + "s3:GetLifecycleConfiguration", + "s3:PutLifecycleConfiguration", + "s3:PutObjectTagging", + "s3:GetObjectTagging", + "s3:DeleteObjectTagging", + "s3:GetBucketAcl" ], "Resource": [ "arn:aws:s3:::{bucket_name}",