From 9e6c6f19c30d01bde258dab184dd7c934821dd2b Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Thu, 21 May 2026 07:31:55 +0000 Subject: [PATCH] lakebox: read SSH gateway host from Sandbox response, cache per profile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consumes `Sandbox.gateway_host` (universe#1966484) so the CLI stops hardcoding regional defaults. Falls through to the existing `resolveGatewayHost` heuristic when the field is absent — safe to land against any manager build. - api.go: add `gatewayHost` to `sandboxEntry` and `createResponse` with `omitempty` so old/new wire shapes round-trip cleanly. - state.go: per-profile `gatewayHosts` map in `lakebox.json`, with `getGatewayHost` / `setGatewayHost` helpers that skip the write when the value is unchanged or empty (matches the existing `setDefault` pattern from f6f28ebcb). - ssh.go: 4-tier gateway resolution: `--gateway` flag → fresh API response (default's `get` or auto-create) → cached value for profile → workspace-host heuristic. Explicit-id ssh does a one-time `get` to warm the cache only when the cache is empty, so steady-state SSH against a known sandbox stays at zero round-trips. - list.go: after fetching all pages, validate the saved default still exists (clear + warn if not) and cache the gateway from any returned entry. Self-heals the cache on the cheapest user-driven path. - create.go, status.go, config.go, stop.go: opportunistically cache the gateway host from each Sandbox response. Net wire change vs. today: zero extra round-trips once warm; one extra `get` on the very first explicit-id ssh per (machine × profile) until something else populates the cache. The hardcoded constants and `resolveGatewayHost` stay until every deployed manager stamps `gateway_host` — separate cleanup PR later. Co-authored-by: Isaac --- cmd/lakebox/api.go | 17 ++++++++++------- cmd/lakebox/config.go | 6 ++++++ cmd/lakebox/create.go | 1 + cmd/lakebox/list.go | 41 ++++++++++++++++++++++++++++++++++------ cmd/lakebox/ssh.go | 44 ++++++++++++++++++++++++++++++++++++------- cmd/lakebox/state.go | 38 ++++++++++++++++++++++++++++++++++++- cmd/lakebox/status.go | 6 ++++++ cmd/lakebox/stop.go | 7 +++++++ 8 files changed, 139 insertions(+), 21 deletions(-) diff --git a/cmd/lakebox/api.go b/cmd/lakebox/api.go index 219a7db142a..840a8316792 100644 --- a/cmd/lakebox/api.go +++ b/cmd/lakebox/api.go @@ -47,14 +47,16 @@ type createRequest struct { // createResponse is the JSON body returned by POST /api/2.0/lakebox/sandboxes. // Mirrors the `Sandbox` proto message after JSON transcoding. // -// `FQDN` is the manager's internal routing hostname — not user-actionable, -// SSH always goes through the gateway. Tagged `omitempty` so the day the -// manager stops returning it, both this struct and downstream `--json` -// output drop the field cleanly instead of leaking a ghost empty string. +// `FQDN` is the manager's internal routing hostname — not user-actionable. +// `GatewayHost` is the public SSH gateway hostname for the workspace, +// stamped by the manager (universe#1966484) so the CLI no longer needs to +// hardcode regional defaults. Both are `omitempty` so old/new wire shapes +// round-trip cleanly. type createResponse struct { - SandboxID string `json:"sandboxId"` - Status string `json:"status"` - FQDN string `json:"fqdn,omitempty"` + SandboxID string `json:"sandboxId"` + Status string `json:"status"` + FQDN string `json:"fqdn,omitempty"` + GatewayHost string `json:"gatewayHost,omitempty"` } // sandboxEntry is a single item in the list response. @@ -71,6 +73,7 @@ type sandboxEntry struct { SandboxID string `json:"sandboxId"` Status string `json:"status"` FQDN string `json:"fqdn,omitempty"` + GatewayHost string `json:"gatewayHost,omitempty"` Name string `json:"name,omitempty"` CreateTime string `json:"createTime,omitempty"` LastStartTime string `json:"lastStartTime,omitempty"` diff --git a/cmd/lakebox/config.go b/cmd/lakebox/config.go index 9f1ef6429a5..55cc47f0f60 100644 --- a/cmd/lakebox/config.go +++ b/cmd/lakebox/config.go @@ -101,6 +101,12 @@ Examples: return fmt.Errorf("failed to update lakebox %s: %w", id, err) } + profile := w.Config.Profile + if profile == "" { + profile = w.Config.Host + } + _ = setGatewayHost(ctx, profile, updated.GatewayHost) + blank(out) field(ctx, out, "id", cmdio.Bold(ctx, updated.SandboxID)) if updated.Name != "" { diff --git a/cmd/lakebox/create.go b/cmd/lakebox/create.go index c4ec24c9a85..552b36f8ef4 100644 --- a/cmd/lakebox/create.go +++ b/cmd/lakebox/create.go @@ -47,6 +47,7 @@ Examples: if profile == "" { profile = w.Config.Host } + _ = setGatewayHost(ctx, profile, result.GatewayHost) currentDefault := getDefault(ctx, profile) shouldSetDefault := currentDefault == "" diff --git a/cmd/lakebox/list.go b/cmd/lakebox/list.go index 6dc1b42fb1a..aa5b76f2447 100644 --- a/cmd/lakebox/list.go +++ b/cmd/lakebox/list.go @@ -39,6 +39,41 @@ Example: return fmt.Errorf("failed to list lakeboxes: %w", err) } + profile := w.Config.Profile + if profile == "" { + profile = w.Config.Host + } + + // `list` returns the full set (the API client loops through every + // page), so it's the cheapest place to keep local state coherent: + // + // - If our saved default isn't in the result, the lakebox was + // deleted elsewhere — clear so the next `ssh` provisions fresh + // instead of erroring against a missing ID. + // - Cache the gateway hostname stamped on any returned entry so + // subsequent `ssh ` invocations don't need their own `get`. + defaultID := getDefault(ctx, profile) + if defaultID != "" { + found := false + for _, e := range entries { + if e.SandboxID == defaultID { + found = true + break + } + } + if !found { + warn(ctx, fmt.Sprintf("Saved default %s no longer exists; clearing", defaultID)) + _ = clearDefault(ctx, profile) + defaultID = "" + } + } + for _, e := range entries { + if e.GatewayHost != "" { + _ = setGatewayHost(ctx, profile, e.GatewayHost) + break + } + } + if outputJSON { enc := json.NewEncoder(cmd.OutOrStdout()) enc.SetIndent("", " ") @@ -50,12 +85,6 @@ Example: return nil } - profile := w.Config.Profile - if profile == "" { - profile = w.Config.Host - } - defaultID := getDefault(ctx, profile) - out := cmd.OutOrStdout() // Compute column widths. AUTOSTOP holds short tokens like diff --git a/cmd/lakebox/ssh.go b/cmd/lakebox/ssh.go index be7d8c3665b..e4d436a0f69 100644 --- a/cmd/lakebox/ssh.go +++ b/cmd/lakebox/ssh.go @@ -87,21 +87,28 @@ Examples: extraArgs = args[dashAt:] } - // Determine lakebox ID if not explicit. - if lakeboxID == "" { - api, err := newLakeboxAPI(w) - if err != nil { - return err - } + // sandboxGatewayHost captures the gateway hostname from any + // Sandbox response we touch in this command, so the resolution + // below can prefer it over the cached value. Stays "" when we + // never hit the API in this invocation (e.g. explicit-id ssh + // with a warm cache). + var sandboxGatewayHost string + + api, err := newLakeboxAPI(w) + if err != nil { + return err + } + if lakeboxID == "" { // If we have a saved default, confirm it still exists on the // server. The lakebox may have been auto-stopped, deleted from // another machine, or reaped by an admin since we wrote the // state file. Clear the stale entry and fall through to // provisioning a fresh one. if def := getDefault(ctx, profile); def != "" { - if _, err := api.get(ctx, def); err == nil { + if sb, err := api.get(ctx, def); err == nil { lakeboxID = def + sandboxGatewayHost = sb.GatewayHost } else { warn(ctx, fmt.Sprintf("Saved default %s is gone; provisioning a new lakebox", def)) _ = clearDefault(ctx, profile) @@ -117,19 +124,42 @@ Examples: return fmt.Errorf("failed to create lakebox: %w", err) } lakeboxID = result.SandboxID + sandboxGatewayHost = result.GatewayHost s.ok("Lakebox " + cmdio.Bold(ctx, lakeboxID) + " ready") if err := setDefault(ctx, profile, lakeboxID); err != nil { warn(ctx, fmt.Sprintf("Could not save default: %v", err)) } } + } else if getGatewayHost(ctx, profile) == "" { + // Explicit-id ssh on a profile we have no cached gateway for: + // one-time `get` to learn it. Subsequent invocations hit the + // cache and skip the round-trip. Failure here is non-fatal — + // we fall through to the workspace-host heuristic. + if sb, err := api.get(ctx, lakeboxID); err == nil { + sandboxGatewayHost = sb.GatewayHost + } } + // Resolution precedence: --gateway flag → fresh API response → + // cached value for this profile → workspace-host heuristic. host := gatewayHost + if host == "" { + host = sandboxGatewayHost + } + if host == "" { + host = getGatewayHost(ctx, profile) + } if host == "" { host = resolveGatewayHost(w.Config.Host) } + // Persist whatever the server just told us, so the next invocation + // can short-circuit the explicit-id `get` above. + if sandboxGatewayHost != "" { + _ = setGatewayHost(ctx, profile, sandboxGatewayHost) + } + s := spin(ctx, "Connecting to "+cmdio.Bold(ctx, lakeboxID)+"…") defer s.Close() s.ok("Connected to " + cmdio.Bold(ctx, lakeboxID)) diff --git a/cmd/lakebox/state.go b/cmd/lakebox/state.go index 5be3da1d4ab..17d098bcc70 100644 --- a/cmd/lakebox/state.go +++ b/cmd/lakebox/state.go @@ -12,11 +12,16 @@ import ( "github.com/databricks/cli/libs/env" ) -// stateFile stores per-profile lakebox defaults on the local filesystem. +// stateFile stores per-profile lakebox state on the local filesystem. // Located at ~/.databricks/lakebox.json. type stateFile struct { // Profile name → default lakebox ID. Defaults map[string]string `json:"defaults"` + // Profile name → SSH gateway hostname returned by the manager for any + // sandbox in that workspace. Cached so `ssh ` does not need to fetch + // the sandbox just to learn where to connect. Empty until the first + // command that reads a sandbox response populates it. + GatewayHosts map[string]string `json:"gatewayHosts,omitempty"` } func stateFilePath(ctx context.Context) (string, error) { @@ -99,3 +104,34 @@ func clearDefault(ctx context.Context, profile string) error { delete(state.Defaults, profile) return saveState(ctx, state) } + +// getGatewayHost returns the cached SSH gateway hostname for the workspace +// behind `profile`, or "" if nothing has been cached yet. +func getGatewayHost(ctx context.Context, profile string) string { + state, err := loadState(ctx) + if err != nil { + return "" + } + return state.GatewayHosts[profile] +} + +// setGatewayHost caches the SSH gateway hostname for `profile`. No-op when +// `host` is empty or already equal to the cached value, so callers can pipe +// every Sandbox response through here without churning the state file. +func setGatewayHost(ctx context.Context, profile, host string) error { + if host == "" { + return nil + } + state, err := loadState(ctx) + if err != nil { + return err + } + if state.GatewayHosts[profile] == host { + return nil + } + if state.GatewayHosts == nil { + state.GatewayHosts = make(map[string]string) + } + state.GatewayHosts[profile] = host + return saveState(ctx, state) +} diff --git a/cmd/lakebox/status.go b/cmd/lakebox/status.go index 75b36276116..10aef7daa69 100644 --- a/cmd/lakebox/status.go +++ b/cmd/lakebox/status.go @@ -38,6 +38,12 @@ Example: return fmt.Errorf("failed to get lakebox %s: %w", lakeboxID, err) } + profile := w.Config.Profile + if profile == "" { + profile = w.Config.Host + } + _ = setGatewayHost(ctx, profile, entry.GatewayHost) + if outputJSON { enc := json.NewEncoder(cmd.OutOrStdout()) enc.SetIndent("", " ") diff --git a/cmd/lakebox/stop.go b/cmd/lakebox/stop.go index 98e28f412bc..f40d632729a 100644 --- a/cmd/lakebox/stop.go +++ b/cmd/lakebox/stop.go @@ -42,6 +42,13 @@ Example: s.fail("Failed to stop " + lakeboxID) return fmt.Errorf("failed to stop lakebox %s: %w", lakeboxID, err) } + + profile := w.Config.Profile + if profile == "" { + profile = w.Config.Host + } + _ = setGatewayHost(ctx, profile, updated.GatewayHost) + s.ok("Stopped " + cmdio.Bold(ctx, updated.SandboxID)) return nil },