Skip to content

Commit

Permalink
List machine improvements (#365)
Browse files Browse the repository at this point in the history
- Use free GPUs, not total GPUs when displaying GPUs available
- Sort GPUs in table/json view for consistency (backend does not have
ordered maps)
- Add shebang to agent bootstrap script for better copy/paste into
userdata of provider
- Prevent python-rich from cropping/"correcting" lines in agent
bootstrap script for reliable copy/paste

Resolve BE-1542
  • Loading branch information
nickpetrovic committed Jul 11, 2024
1 parent 01eed27 commit 9f7e9d3
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 22 deletions.
17 changes: 3 additions & 14 deletions pkg/repository/provider_redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,19 +340,6 @@ func (r *ProviderRedisRepository) RemoveMachineLock(providerName, poolName, mach
return r.lock.Release(common.RedisKeys.ProviderMachineLock(providerName, poolName, machineId))
}

// GetGPUAvailability retrieves the GPU availability status for all machines
// managed by various providers and pools.
//
// Usage:
//
// availability, err := machineRepo.GetGPUAvailability(pools)
// if err != nil {
// log.Fatal(err)
// }
//
// for gpu, isAvailable := range availability {
// fmt.Printf("GPU: %s, Available: %t\n", gpu, isAvailable)
// }
func (r *ProviderRedisRepository) GetGPUAvailability(pools map[string]types.WorkerPoolConfig) (map[string]bool, error) {
gpuAvailability := map[string]bool{}

Expand All @@ -376,7 +363,9 @@ func (r *ProviderRedisRepository) GetGPUAvailability(pools map[string]types.Work

// Update availability of GPU based on machine states
for _, machine := range machines {
gpuAvailability[machine.State.Gpu] = machine.State.GpuCount > 0
if machine.Metrics != nil {
gpuAvailability[machine.State.Gpu] = machine.Metrics.FreeGpuCount > 0
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion sdk/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "beta9"
version = "0.1.50"
version = "0.1.51"
description = ""
authors = ["beam.cloud <support@beam.cloud>"]
packages = [
Expand Down
23 changes: 16 additions & 7 deletions sdk/src/beta9/cli/machine.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import textwrap
from datetime import datetime, timezone

import click
Expand Down Expand Up @@ -78,6 +79,8 @@ def list_machines(
if not res.ok:
terminal.error(res.err_msg)

res.gpus = {gpu: res.gpus[gpu] for gpu in sorted(res.gpus)}

if format == "json":
machines = [d.to_dict(casing=Casing.SNAKE) for d in res.machines] # type:ignore
terminal.print_json({"machines": machines, "gpus": res.gpus})
Expand Down Expand Up @@ -165,13 +168,19 @@ def create_machine(service: ServiceClient, pool: str):
f"Created machine with ID: '{res.machine.id}'. Use the following command to setup the node:"
)
terminal.detail(
f"""sudo curl -L -o agent https://release.beam.cloud/agent/agent && \\
sudo chmod +x agent && \\
sudo ./agent --token "{res.machine.registration_token}" --machine-id "{res.machine.id}" \\
--tailscale-url "{res.machine.tailscale_url}" \\
--tailscale-auth "{res.machine.tailscale_auth}" \\
--pool-name "{res.machine.pool_name}" \\
--provider-name "{res.machine.provider_name}" """
textwrap.dedent(f"""
#!/bin/bash
sudo curl -L -o agent https://release.beam.cloud/agent/agent && \\
sudo chmod +x agent && \\
sudo ./agent --token "{res.machine.registration_token}" \\
--machine-id "{res.machine.id}" \\
--tailscale-url "{res.machine.tailscale_url}" \\
--tailscale-auth "{res.machine.tailscale_auth}" \\
--pool-name "{res.machine.pool_name}" \\
--provider-name "{res.machine.provider_name}"
"""),
crop=False,
overflow="ignore",
)

else:
Expand Down

0 comments on commit 9f7e9d3

Please sign in to comment.