Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions cli/dstack/_internal/backend/aws/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Optional

import botocore.exceptions
from boto3 import Session

from dstack._internal.backend.aws import utils as aws_utils
Expand All @@ -10,6 +11,7 @@
from dstack._internal.backend.aws.storage import AWSStorage
from dstack._internal.backend.base import ComponentBasedBackend
from dstack._internal.backend.base import runs as base_runs
from dstack._internal.core.error import BackendAuthError


class AwsBackend(ComponentBasedBackend):
Expand Down Expand Up @@ -46,6 +48,7 @@ def __init__(
logs_client=aws_utils.get_logs_client(self._session),
bucket_name=self.backend_config.bucket_name,
)
self._check_credentials()

@classmethod
def load(cls) -> Optional["AwsBackend"]:
Expand Down Expand Up @@ -73,3 +76,9 @@ def create_run(self, repo_id: str) -> str:
aws_utils.get_logs_client(self._session), self.backend_config.bucket_name, repo_id
)
return base_runs.create_run(self._storage)

def _check_credentials(self):
try:
self.list_repo_heads()
except (botocore.exceptions.ClientError, botocore.exceptions.NoCredentialsError):
raise BackendAuthError()
41 changes: 23 additions & 18 deletions cli/dstack/_internal/backend/azure/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional

from azure.core.credentials import TokenCredential
from azure.core.exceptions import ClientAuthenticationError
from azure.identity import ClientSecretCredential, DefaultAzureCredential

from dstack._internal.backend.azure.compute import AzureCompute
Expand All @@ -9,6 +10,7 @@
from dstack._internal.backend.azure.secrets import AzureSecretsManager
from dstack._internal.backend.azure.storage import AzureStorage
from dstack._internal.backend.base import ComponentBasedBackend
from dstack._internal.core.error import BackendAuthError


class AzureBackend(ComponentBasedBackend):
Expand All @@ -25,24 +27,27 @@ def __init__(self, backend_config: AzureConfig, credential: Optional[TokenCreden
)
else:
credential = DefaultAzureCredential()
self._secrets_manager = AzureSecretsManager(
credential=credential,
vault_url=self.backend_config.vault_url,
)
self._storage = AzureStorage(
credential=credential,
storage_account=self.backend_config.storage_account,
)
self._compute = AzureCompute(
credential=credential,
azure_config=self.backend_config,
)
self._logging = AzureLogging(
credential=credential,
subscription_id=self.backend_config.subscription_id,
resource_group=self.backend_config.resource_group,
storage_account=self.backend_config.storage_account,
)
try:
self._secrets_manager = AzureSecretsManager(
credential=credential,
vault_url=self.backend_config.vault_url,
)
self._storage = AzureStorage(
credential=credential,
storage_account=self.backend_config.storage_account,
)
self._compute = AzureCompute(
credential=credential,
azure_config=self.backend_config,
)
self._logging = AzureLogging(
credential=credential,
subscription_id=self.backend_config.subscription_id,
resource_group=self.backend_config.resource_group,
storage_account=self.backend_config.storage_account,
)
except ClientAuthenticationError:
raise BackendAuthError()

@classmethod
def load(cls) -> Optional["AzureBackend"]:
Expand Down
8 changes: 6 additions & 2 deletions cli/dstack/_internal/backend/gcp/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
import google.auth
import googleapiclient.discovery
import googleapiclient.errors
from google.auth.exceptions import DefaultCredentialsError
from google.oauth2 import service_account

from dstack._internal.backend.gcp import utils as gcp_utils
from dstack._internal.backend.gcp.config import GCPConfig
from dstack._internal.core.error import BackendError
from dstack._internal.core.error import BackendAuthError, BackendError


class NotEnoughPermissionError(BackendError):
Expand All @@ -21,7 +22,10 @@ def authenticate(backend_config: GCPConfig):
return service_account.Credentials.from_service_account_info(
json.loads(backend_config.credentials["data"])
)
default_credentials, _ = google.auth.default()
try:
default_credentials, _ = google.auth.default()
except DefaultCredentialsError:
raise BackendAuthError()
service_account_email = backend_config.credentials["service_account_email"]
iam_service = googleapiclient.discovery.build("iam", "v1", credentials=default_credentials)

Expand Down
8 changes: 6 additions & 2 deletions cli/dstack/_internal/core/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ def __init__(self, message: Optional[str] = None):


class BackendError(DstackError):
def __init__(self, message: Optional[str] = None):
self.message = message
pass


class BackendAuthError(BackendError):
code = "invalid_backend_credentials"
message = "Backend credentials are invalid"


class NoMatchingInstanceError(BackendError):
Expand Down
19 changes: 16 additions & 3 deletions cli/dstack/_internal/hub/background/tasks/resubmit_jobs.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from typing import List

from dstack._internal.backend.base import Backend
from dstack._internal.core.error import BackendAuthError
from dstack._internal.core.job import JobStatus
from dstack._internal.hub.db.models import Project
from dstack._internal.hub.repository.projects import ProjectManager
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.services.backends import get_configurator
from dstack._internal.hub.services.backends.cache import get_backend
from dstack._internal.hub.utils.common import run_async
from dstack._internal.utils.common import get_milliseconds_since_epoch
from dstack._internal.utils.logging import get_logger

logger = get_logger(__name__)

RESUBMISSION_INTERVAL = 60

Expand All @@ -19,11 +23,19 @@ async def resubmit_jobs():

async def _resubmit_projects_jobs(projects: List[Project]):
for project in projects:
backend = await get_backend(project)
configurator = get_configurator(backend)
logger.info("Resubmitting jobs for %s project", project.name)
try:
backend = await get_backend(project)
except BackendAuthError:
logger.warning(
"Credentials for %s project are invalid. Skipping job resubmission.", project.name
)
continue
configurator = get_configurator(backend.name)
if configurator is None:
continue
await run_async(_resubmit_backend_jobs, backend)
logger.info("Finished resubmitting jobs for %s project", project.name)


def _resubmit_backend_jobs(backend: Backend):
Expand Down Expand Up @@ -59,3 +71,4 @@ def _resubmit_backend_jobs(backend: Backend):
job=job,
failed_to_start_job_new_status=JobStatus.FAILED,
)
logger.info("Resubmitted job %s", job.job_id)
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

from dstack._internal.core.artifact import Artifact
from dstack._internal.hub.models import ArtifactsList
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import get_project
from dstack._internal.hub.routers.util import get_backend, get_project
from dstack._internal.hub.security.permissions import ProjectMember
from dstack._internal.hub.utils.common import run_async

Expand Down
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

from dstack._internal.core.repo import RepoRef
from dstack._internal.hub.db.models import User
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import get_project
from dstack._internal.hub.routers.util import get_backend, get_project
from dstack._internal.hub.security.permissions import Authenticated, ProjectMember
from dstack._internal.hub.utils.common import run_async

Expand Down
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from dstack._internal.core.job import Job, JobHead
from dstack._internal.hub.db.models import User
from dstack._internal.hub.models import JobHeadList, JobsGet, JobsList
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import get_project
from dstack._internal.hub.routers.util import get_backend, get_project
from dstack._internal.hub.security.permissions import Authenticated, ProjectMember
from dstack._internal.hub.utils.common import run_async

Expand Down
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

from dstack._internal.backend.local import LocalBackend
from dstack._internal.hub.models import StorageLink
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import get_project
from dstack._internal.hub.routers.util import get_backend, get_project
from dstack._internal.hub.security.permissions import ProjectMember
from dstack._internal.hub.utils.common import run_async

Expand Down
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@

from dstack._internal.core.log_event import LogEvent
from dstack._internal.hub.models import PollLogs
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import get_project
from dstack._internal.hub.routers.util import get_backend, get_project
from dstack._internal.hub.security.permissions import ProjectMember
from dstack._internal.hub.utils.common import run_async
from dstack._internal.utils.common import get_current_datetime
Expand Down
2 changes: 1 addition & 1 deletion cli/dstack/_internal/hub/routers/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
ProjectValues,
)
from dstack._internal.hub.repository.projects import ProjectManager
from dstack._internal.hub.routers.cache import clear_backend_cache
from dstack._internal.hub.routers.util import error_detail, get_backend_configurator, get_project
from dstack._internal.hub.security.permissions import (
Authenticated,
Expand All @@ -21,6 +20,7 @@
ensure_user_project_admin,
)
from dstack._internal.hub.services.backends.base import BackendConfigError
from dstack._internal.hub.services.backends.cache import clear_backend_cache
from dstack._internal.hub.utils.common import run_async

router = APIRouter(prefix="/api/projects", tags=["project"])
Expand Down
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

from dstack._internal.core.repo import RemoteRepoCredentials, RepoHead, RepoRef
from dstack._internal.hub.models import RepoHeadGet, ReposDelete, ReposUpdate, SaveRepoCredentials
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import error_detail, get_project
from dstack._internal.hub.routers.util import error_detail, get_backend, get_project
from dstack._internal.hub.security.permissions import ProjectMember
from dstack._internal.hub.utils.common import run_async

Expand Down
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from dstack._internal.core.error import NoMatchingInstanceError
from dstack._internal.core.job import Job, JobStatus
from dstack._internal.hub.models import StopRunners
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import error_detail, get_project
from dstack._internal.hub.routers.util import error_detail, get_backend, get_project
from dstack._internal.hub.security.permissions import ProjectMember
from dstack._internal.hub.utils.common import run_async

Expand Down
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
from dstack._internal.core.run import RunHead
from dstack._internal.hub.db.models import User
from dstack._internal.hub.models import RunsDelete, RunsGetPlan, RunsList, RunsStop
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import error_detail, get_project
from dstack._internal.hub.routers.util import error_detail, get_backend, get_project
from dstack._internal.hub.security.permissions import Authenticated, ProjectMember
from dstack._internal.hub.utils.common import run_async

Expand Down
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from dstack._internal.core.repo import RepoRef
from dstack._internal.core.secret import Secret
from dstack._internal.hub.models import SecretAddUpdate
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import error_detail, get_project
from dstack._internal.hub.routers.util import error_detail, get_backend, get_project
from dstack._internal.hub.security.permissions import ProjectMember
from dstack._internal.hub.utils.common import run_async

Expand Down
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
from dstack._internal.backend.base import Backend
from dstack._internal.backend.local import LocalBackend
from dstack._internal.hub.models import FileObject
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import error_detail, get_project
from dstack._internal.hub.routers.util import error_detail, get_backend, get_project
from dstack._internal.hub.security.permissions import ProjectMember


Expand Down
3 changes: 1 addition & 2 deletions cli/dstack/_internal/hub/routers/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from dstack._internal.core.repo import RepoRef
from dstack._internal.core.tag import TagHead
from dstack._internal.hub.models import AddTagPath, AddTagRun
from dstack._internal.hub.routers.cache import get_backend
from dstack._internal.hub.routers.util import error_detail, get_project
from dstack._internal.hub.routers.util import error_detail, get_backend, get_project
from dstack._internal.hub.security.permissions import ProjectMember
from dstack._internal.hub.utils.common import run_async

Expand Down
13 changes: 13 additions & 0 deletions cli/dstack/_internal/hub/routers/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

from fastapi import HTTPException, status

from dstack._internal.backend.base import Backend
from dstack._internal.core.error import BackendAuthError
from dstack._internal.hub.models import Project
from dstack._internal.hub.repository.projects import ProjectManager
from dstack._internal.hub.services.backends import cache as backends_cache
from dstack._internal.hub.services.backends import get_configurator
from dstack._internal.hub.services.backends.base import Configurator

Expand All @@ -19,6 +22,16 @@ async def get_project(project_name: str) -> Project:
return project


async def get_backend(project: Project) -> Optional[Backend]:
try:
return await backends_cache.get_backend(project)
except BackendAuthError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=error_detail(BackendAuthError.message, code=BackendAuthError.code),
)


def get_backend_configurator(backend_type: str) -> Configurator:
configurator = get_configurator(backend_type)
if configurator is None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
from typing import Optional

from dstack._internal.backend.base import Backend
Expand Down