Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gateway/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ requires-python = ">=3.10"
dynamic = ["version"]
dependencies = [
# release builds of dstack-gateway depend on a PyPI version of dstack instead
"dstack[gateway] @ git+https://github.com/dstackai/dstack.git@master",
"dstack[gateway] @ https://github.com/dstackai/dstack/archive/refs/heads/master.tar.gz",
]

[tool.setuptools.package-data]
Expand Down
32 changes: 20 additions & 12 deletions src/dstack/_internal/core/backends/kubernetes/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,6 @@ def create_gateway(
# If the cluster does not support Load Balancer, the service will be provisioned but
# the external IP/hostname will never be allocated.

# TODO: This implementation is only tested on EKS. Test other managed Kubernetes.

# TODO: By default EKS creates a Classic Load Balancer for Load Balancer services.
# Consider deploying an NLB. It seems it requires some extra configuration on the cluster:
# https://docs.aws.amazon.com/eks/latest/userguide/network-load-balancing.html
Expand Down Expand Up @@ -397,6 +395,10 @@ def create_gateway(
container_port=443,
),
],
security_context=client.V1SecurityContext(
run_as_user=0,
run_as_group=0,
),
)
]
),
Expand Down Expand Up @@ -435,21 +437,22 @@ def create_gateway(
namespace=self.config.namespace,
body=service,
)
hostname = _wait_for_load_balancer_hostname(
# address is eiher a domain name or an IP address
address = _wait_for_load_balancer_address(
api=self.api,
namespace=self.config.namespace,
service_name=_get_pod_service_name(instance_name),
)
region = DUMMY_REGION
if hostname is None:
if address is None:
self.terminate_instance(instance_name, region=region)
raise ComputeError(
"Failed to get gateway hostname. "
"Ensure the Kubernetes cluster supports Load Balancer services."
)
return GatewayProvisioningData(
instance_id=instance_name,
ip_address=hostname,
ip_address=address,
region=region,
)

Expand Down Expand Up @@ -927,7 +930,7 @@ def _wait_for_pod_ready(
time.sleep(1)


def _wait_for_load_balancer_hostname(
def _wait_for_load_balancer_address(
api: client.CoreV1Api,
namespace: str,
service_name: str,
Expand All @@ -945,10 +948,16 @@ def _wait_for_load_balancer_hostname(
service is not None
and (service_status := service.status) is not None
and (lb_status := service_status.load_balancer) is not None
and (ingresses := lb_status.ingress)
and (hostname := ingresses[0].hostname) is not None
and (ingress_points := lb_status.ingress)
):
return hostname
ingress_point = ingress_points[0]
# > Hostname is set for load-balancer ingress points that are DNS based (typically
# > AWS load-balancers)
# > IP is set for load-balancer ingress points that are IP based (typically GCE or
# > OpenStack load-balancers)
address = ingress_point.hostname or ingress_point.ip
if address is not None:
return address
elapsed_time = time.time() - start_time
if elapsed_time >= timeout_seconds:
logger.warning("Timeout waiting for load balancer %s to get ip", service_name)
Expand Down Expand Up @@ -982,16 +991,15 @@ def _get_gateway_commands(authorized_keys: list[str]) -> list[str]:
"apt-get update && apt-get install -y sudo wget openssh-server nginx python3.10-venv libaugeas0",
# install docker-systemctl-replacement
"wget https://raw.githubusercontent.com/gdraheim/docker-systemctl-replacement/b18d67e521f0d1cf1d705dbb8e0416bef23e377c/files/docker/systemctl3.py -O /usr/bin/systemctl",
"chmod + /usr/bin/systemctl",
"chmod a+rx /usr/bin/systemctl",
# install certbot
"python3 -m venv /root/certbotvenv/",
"/root/certbotvenv/bin/pip install certbot-nginx",
"ln -s /root/certbotvenv/bin/certbot /usr/bin/certbot",
# prohibit password authentication
'sed -i "s/.*PasswordAuthentication.*/PasswordAuthentication no/g" /etc/ssh/sshd_config',
# set up ubuntu user
"adduser ubuntu",
"usermod -aG sudo ubuntu",
"useradd -mUG sudo ubuntu",
"echo 'ubuntu ALL=(ALL:ALL) NOPASSWD: ALL' | tee /etc/sudoers.d/ubuntu",
# create ssh dirs and add public key
"mkdir -p /run/sshd /home/ubuntu/.ssh",
Expand Down
2 changes: 1 addition & 1 deletion src/dstack/_internal/server/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

DSTACK_DIR_PATH = Path("~/.dstack/").expanduser()

SERVER_DIR_PATH = Path(os.getenv("DSTACK_SERVER_DIR", DSTACK_DIR_PATH / "server"))
SERVER_DIR_PATH = Path(os.getenv("DSTACK_SERVER_DIR", DSTACK_DIR_PATH / "server")).resolve()

SERVER_CONFIG_FILE_PATH = SERVER_DIR_PATH / "config.yml"

Expand Down