Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9b13e28
Fix method return type hints
Egor-S Jul 5, 2023
5a32b99
Implement configuration pydantic models
Egor-S Jul 5, 2023
7bb4864
Outline configurator interface
Egor-S Jul 6, 2023
6f174e2
Implement configurators
Egor-S Jul 6, 2023
28c2845
Implement run command with configurators
Egor-S Jul 7, 2023
d6c8abd
Drop legacy configurations loader
Egor-S Jul 7, 2023
cbaf84f
Print relevant help for dstack run
Egor-S Jul 7, 2023
139e0ea
Handle pydantic validation errors
Egor-S Jul 7, 2023
a08aa68
Allow configuring entrypoint and home dir. Fail if sshd is missing in…
Egor-S Jul 10, 2023
87b8730
Reimplement vscode extension
Egor-S Jul 10, 2023
70175ea
Add ssh support to tasks configuration
Egor-S Jul 10, 2023
8bc8141
Replace literals with enums
Egor-S Jul 10, 2023
e2ea26f
Pin pydantic v1
Egor-S Jul 10, 2023
da99c0a
Make pydantic version flexible
Egor-S Jul 10, 2023
e9e0dfb
Fix entrypoint tests
Egor-S Jul 10, 2023
6286009
Move ports.py
Egor-S Jul 10, 2023
b55066d
Test configurators instead of providers
Egor-S Jul 10, 2023
237d4af
Drop providers
Egor-S Jul 10, 2023
0fb3907
Drop json schemas
Egor-S Jul 11, 2023
9383415
Clean up utils.common
Egor-S Jul 11, 2023
9ff4103
Annotate configuration models
Egor-S Jul 11, 2023
3d7cd16
Generate json schema on build
Egor-S Jul 11, 2023
204606a
Generate json schema on release
Egor-S Jul 11, 2023
4a64574
Update run & configuration docs
Egor-S Jul 11, 2023
32b9125
Support env variables as a list
Egor-S Jul 11, 2023
2ee8638
Explicit build policy args
Egor-S Jul 11, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,26 @@ jobs:
aws s3 cp dstack-runner-${{ matrix.goos }}-${{ matrix.goarch }} "s3://dstack-runner-downloads-stgn/$VERSION/binaries/dstack-runner-${{ matrix.goos }}-${{ matrix.platform }}${{ matrix.extension }}" --acl public-read
aws s3 cp dstack-runner-${{ matrix.goos }}-${{ matrix.goarch }} "s3://dstack-runner-downloads-stgn/latest/binaries/dstack-runner-${{ matrix.goos }}-${{ matrix.platform }}${{ matrix.extension }}" --acl public-read

generate-json-schema:
needs: [ cli-test-master ]
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install AWS
run: pip install awscli
- name: Install dstack
run: pip install .
- name: Generate json schema
run: python -c "from dstack._internal.core.configuration import DstackConfiguration; print(DstackConfiguration.schema_json(indent=2))" > configuration.json
- name: Upload json schema to S3
run: |
VERSION=$((${{ github.run_number }} + 150))
aws s3 cp configuration.json "s3://dstack-runner-downloads-stgn/$VERSION/schemas/configuration.json" --acl public-read
aws s3 cp configuration.json "s3://dstack-runner-downloads-stgn/latest/schemas/configuration.json" --acl public-read

# cli-integration-tests:
# needs: [ runner-upload-master ]
# runs-on: ${{ matrix.os }}
Expand Down
22 changes: 21 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -344,4 +344,24 @@ jobs:
run: pip install awscli
- run: |
VERSION=${GITHUB_REF#refs/tags/}
echo $VERSION | aws s3 cp - s3://get-dstack/cli/latest-version --acl public-read
echo $VERSION | aws s3 cp - s3://get-dstack/cli/latest-version --acl public-read

generate-json-schema:
needs: [ cli-test-tag ]
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install AWS
run: pip install awscli
- name: Install dstack
run: pip install .
- name: Generate json schema
run: python -c "from dstack._internal.core.configuration import DstackConfiguration; print(DstackConfiguration.schema_json(indent=2))" > configuration.json
- name: Upload json schema to S3
run: |
VERSION=${GITHUB_REF#refs/tags/}
aws s3 cp configuration.json "s3://dstack-runner-downloads/$VERSION/schemas/configuration.json" --acl public-read
aws s3 cp configuration.json "s3://dstack-runner-downloads/latest/schemas/configuration.json" --acl public-read
2 changes: 1 addition & 1 deletion cli/dstack/_internal/backend/azure/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def _vm_type_available(vm_resource: ResourceSku) -> bool:
return False


def _get_gpu_name_memory(vm_name: str) -> Tuple[int, str]:
def _get_gpu_name_memory(vm_name: str) -> Tuple[str, int]:
if re.match(r"^Standard_NC\d+ads_A100_v4$", vm_name):
return "A100", 80 * 1024
if re.match(r"^Standard_NC\d+as_T4_v3$", vm_name):
Expand Down
8 changes: 4 additions & 4 deletions cli/dstack/_internal/backend/base/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
import cpuinfo

from dstack._internal.backend.base.storage import Storage
from dstack._internal.core.build import BuildNotFoundError, BuildPlan, DockerPlatform
from dstack._internal.core.build import BuildNotFoundError, BuildPlan, BuildPolicy, DockerPlatform
from dstack._internal.core.job import Job
from dstack._internal.utils.escape import escape_head


def predict_build_plan(
storage: Storage, job: Job, platform: Optional[DockerPlatform]
) -> BuildPlan:
if job.build_policy in ["force-build", "build-only"]:
if job.build_policy in [BuildPolicy.FORCE_BUILD, BuildPolicy.BUILD_ONLY]:
return BuildPlan.yes

if platform is None:
Expand All @@ -22,11 +22,11 @@ def predict_build_plan(
return BuildPlan.use

if job.build_commands:
if job.build_policy == "use-build":
if job.build_policy == BuildPolicy.USE_BUILD:
raise BuildNotFoundError("Build not found. Run `dstack build` or add `--build` flag")
return BuildPlan.yes

if job.optional_build_commands and job.build_policy == "build":
if job.optional_build_commands and job.build_policy == BuildPolicy.BUILD:
return BuildPlan.yes
return BuildPlan.no

Expand Down
13 changes: 6 additions & 7 deletions cli/dstack/_internal/cli/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,18 @@ class BasicCommand(object):
DESCRIPTION = "describe the command"
SUBCOMMANDS = []

def __init__(self, parser: _SubParsersAction):
def __init__(self, parser: _SubParsersAction, store_help: bool = False):
kwargs = {}
if self.description:
kwargs["help"] = self.description
self._parser = parser.add_parser(
self._parser: argparse.ArgumentParser = parser.add_parser(
self.name, add_help=False, formatter_class=RichHelpFormatter, **kwargs
)
help_kwargs = dict(action="help", default=argparse.SUPPRESS)
if store_help:
help_kwargs = dict(action="store_true")
self._parser.add_argument(
"-h",
"--help",
action="help",
default=argparse.SUPPRESS,
help="Show this help message and exit",
"-h", "--help", help="Show this help message and exit", **help_kwargs
)
self._parser.set_defaults(func=self.__command)

Expand Down
108 changes: 45 additions & 63 deletions cli/dstack/_internal/cli/commands/build/__init__.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,14 @@
import argparse
import os
import sys

from jsonschema import ValidationError
from rich.prompt import Confirm

from dstack._internal.api.runs import list_runs_hub
from dstack._internal.cli.commands import BasicCommand
from dstack._internal.cli.commands.run import (
_poll_run,
_print_run_plan,
_read_ssh_key_pub,
configurations,
)
from dstack._internal.cli.common import add_project_argument, check_init, console, print_runs
from dstack._internal.cli.commands.run import _poll_run, _print_run_plan, _read_ssh_key_pub
from dstack._internal.cli.common import add_project_argument, check_init, console
from dstack._internal.cli.config import config, get_hub_client
from dstack._internal.cli.configuration import load_configuration
from dstack._internal.core.error import RepoNotInitializedError
from dstack._internal.core.job import JobStatus


class BuildCommand(BasicCommand):
Expand All @@ -25,64 +17,54 @@ class BuildCommand(BasicCommand):

@check_init
def _command(self, args: argparse.Namespace):
(
configuration_path,
provider_name,
provider_data,
project_name,
) = configurations.parse_configuration_file(
args.working_dir, args.file_name, args.profile_name
)
provider_data["build_policy"] = "build-only"
configurator = load_configuration(args.working_dir, args.file_name, args.profile_name)
configurator.build_policy = "build-only"

project_name = None
if args.project:
project_name = args.project
try:
hub_client = get_hub_client(project_name=project_name)
if (
hub_client.repo.repo_data.repo_type != "local"
and not hub_client.get_repo_credentials()
):
raise RepoNotInitializedError("No credentials", project_name=project_name)
elif configurator.profile.project:
project_name = configurator.profile.project

hub_client = get_hub_client(project_name=project_name)
if (
hub_client.repo.repo_data.repo_type != "local"
and not hub_client.get_repo_credentials()
):
raise RepoNotInitializedError("No credentials", project_name=project_name)

if not config.repo_user_config.ssh_key_path:
ssh_key_pub = None
else:
ssh_key_pub = _read_ssh_key_pub(config.repo_user_config.ssh_key_path)

if not config.repo_user_config.ssh_key_path:
ssh_pub_key = None
else:
ssh_pub_key = _read_ssh_key_pub(config.repo_user_config.ssh_key_path)
configurator_args, run_args = configurator.get_parser().parse_known_args(
args.args + args.unknown
)
configurator.apply_args(configurator_args)

run_plan = hub_client.get_run_plan(
configuration_path=configuration_path,
provider_name=provider_name,
provider_data=provider_data,
args=args,
)
console.print("dstack will execute the following plan:\n")
_print_run_plan(configuration_path, run_plan)
if not args.yes and not Confirm.ask("Continue?"):
console.print("\nExiting...")
exit(0)
console.print("\nProvisioning...\n")
run_plan = hub_client.get_run_plan(configurator)
console.print("dstack will execute the following plan:\n")
_print_run_plan(configurator.configuration_path, run_plan)
if not args.yes and not Confirm.ask("Continue?"):
console.print("\nExiting...")
exit(0)
console.print("\nProvisioning...\n")

run_name, jobs = hub_client.run_provider(
configuration_path=configuration_path,
provider_name=provider_name,
provider_data=provider_data,
ssh_pub_key=ssh_pub_key,
args=args,
)
runs = list_runs_hub(hub_client, run_name=run_name)
run = runs[0]
_poll_run(
hub_client,
run,
jobs,
ssh_key=config.repo_user_config.ssh_key_path,
watcher=None,
)
except ValidationError as e:
sys.exit(
f"There a syntax error in one of the files inside the {os.getcwd()}/.dstack/workflows directory:\n\n{e}"
)
run_name, jobs = hub_client.run_configuration(
configurator=configurator,
ssh_key_pub=ssh_key_pub,
run_args=run_args,
)
runs = list_runs_hub(hub_client, run_name=run_name)
run = runs[0]
_poll_run(
hub_client,
run,
jobs,
ssh_key=config.repo_user_config.ssh_key_path,
watcher=None,
)

def __init__(self, parser):
super().__init__(parser)
Expand Down
6 changes: 2 additions & 4 deletions cli/dstack/_internal/cli/commands/prune/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from rich_argparse import RichHelpFormatter

from dstack._internal.cli.commands import BasicCommand
from dstack._internal.cli.commands.run import configurations
from dstack._internal.cli.common import add_project_argument, check_init, console
from dstack._internal.cli.config import get_hub_client
from dstack._internal.cli.configuration import resolve_configuration_path
from dstack.api.hub import HubClient


Expand Down Expand Up @@ -46,8 +46,6 @@ def _command(self, args: argparse.Namespace):

@staticmethod
def prune_cache(args: argparse.Namespace, hub_client: HubClient):
configuration_path = str(
configurations.get_configuration_path(args.working_dir, args.file_name)
)
configuration_path = str(resolve_configuration_path(args.file_name, args.working_dir))
hub_client.delete_configuration_cache(configuration_path=configuration_path)
console.print(f"[grey58]Cache pruned[/]")
55 changes: 24 additions & 31 deletions cli/dstack/_internal/cli/commands/run/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import copy
import os
import sys
import threading
Expand All @@ -9,7 +10,6 @@

import websocket
from cursor import cursor
from jsonschema import ValidationError
from rich.progress import Progress, SpinnerColumn, TextColumn
from rich.prompt import Confirm
from rich.table import Table
Expand All @@ -18,11 +18,11 @@
from dstack._internal.api.runs import list_runs_hub
from dstack._internal.backend.base.logs import fix_urls
from dstack._internal.cli.commands import BasicCommand
from dstack._internal.cli.commands.run import configurations
from dstack._internal.cli.commands.run.ssh_tunnel import PortsLock, run_ssh_tunnel
from dstack._internal.cli.commands.run.watcher import LocalCopier, SSHCopier, Watcher
from dstack._internal.cli.common import add_project_argument, check_init, console, print_runs
from dstack._internal.cli.config import config, get_hub_client
from dstack._internal.cli.configuration import load_configuration
from dstack._internal.core.error import RepoNotInitializedError
from dstack._internal.core.instance import InstanceType
from dstack._internal.core.job import Job, JobErrorCode, JobHead, JobStatus
Expand All @@ -48,7 +48,7 @@ class RunCommand(BasicCommand):
DESCRIPTION = "Run a configuration"

def __init__(self, parser):
super(RunCommand, self).__init__(parser)
super().__init__(parser, store_help=True)

def register(self):
self._parser.add_argument(
Expand Down Expand Up @@ -107,16 +107,17 @@ def register(self):

@check_init
def _command(self, args: Namespace):
(
configuration_path,
provider_name,
provider_data,
project_name,
) = configurations.parse_configuration_file(
args.working_dir, args.file_name, args.profile_name
)
configurator = load_configuration(args.working_dir, args.file_name, args.profile_name)
if args.help:
configurator.get_parser(parser=copy.deepcopy(self._parser)).print_help()
exit(0)

project_name = None
if args.project:
project_name = args.project
elif configurator.profile.project:
project_name = configurator.profile.project

watcher = Watcher(os.getcwd())
try:
if args.reload:
Expand All @@ -129,30 +130,27 @@ def _command(self, args: Namespace):
raise RepoNotInitializedError("No credentials", project_name=project_name)

if not config.repo_user_config.ssh_key_path:
ssh_pub_key = None
ssh_key_pub = None
else:
ssh_pub_key = _read_ssh_key_pub(config.repo_user_config.ssh_key_path)
ssh_key_pub = _read_ssh_key_pub(config.repo_user_config.ssh_key_path)

run_plan = hub_client.get_run_plan(
configuration_path=configuration_path,
provider_name=provider_name,
provider_data=provider_data,
args=args,
configurator_args, run_args = configurator.get_parser().parse_known_args(
args.args + args.unknown
)
configurator.apply_args(configurator_args)

run_plan = hub_client.get_run_plan(configurator)
console.print("dstack will execute the following plan:\n")
_print_run_plan(configuration_path, run_plan)
_print_run_plan(configurator.configuration_path, run_plan)
if not args.yes and not Confirm.ask("Continue?"):
console.print("\nExiting...")
exit(0)
console.print("\nProvisioning...\n")

run_name, jobs = hub_client.run_provider(
configuration_path=configuration_path,
provider_name=provider_name,
provider_data=provider_data,
ssh_pub_key=ssh_pub_key,
tag_name=args.tag_name,
args=args,
run_name, jobs = hub_client.run_configuration(
configurator=configurator,
ssh_key_pub=ssh_key_pub,
run_args=run_args,
)
runs = list_runs_hub(hub_client, run_name=run_name)
run = runs[0]
Expand All @@ -164,10 +162,6 @@ def _command(self, args: Namespace):
ssh_key=config.repo_user_config.ssh_key_path,
watcher=watcher,
)
except ValidationError as e:
sys.exit(
f"There a syntax error in one of the files inside the {os.getcwd()}/.dstack/workflows directory:\n\n{e}"
)
finally:
if watcher.is_alive():
watcher.stop()
Expand Down Expand Up @@ -426,7 +420,6 @@ def _attach_to_container(hub_client: HubClient, run_name: str, ports_lock: Ports
for run in _poll_run_head(hub_client, run_name, loop_statuses=[JobStatus.BUILDING]):
pass
app_ports = ports_lock.release()
# TODO replace long delay with starting ssh-server in the beginning
for delay in range(0, 60 * 10 + 1, POLL_PROVISION_RATE_SECS): # retry
time.sleep(POLL_PROVISION_RATE_SECS if delay else 0) # skip first sleep
if run_ssh_tunnel(run_name, app_ports):
Expand Down
Loading