From f6e65350ac4345ccc058934ff90266534176ec74 Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Fri, 29 May 2026 07:18:54 +0000 Subject: [PATCH 1/5] =?UTF-8?q?lakebox:=20fix=20F22=20=E2=80=94=20`ssh=20-?= =?UTF-8?q?-=20bash=20-c=20'...'`=20swallows=20stdout?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symptom: $ databricks lakebox ssh -- 'echo hi' # works hi $ databricks lakebox ssh -- bash -c 'echo hi' # broken, empty ssh joins remote-command words with spaces and the remote shell re-parses them. So the user's local args `["bash", "-c", "echo hi"]` land on the wire as the literal string `bash -c echo hi`, which bash splits into `-c=echo` and `$0=hi` — running `echo` with no args and printing an empty line. Fix: shell-quote each extra arg before append, using the repo's existing `libs/shellquote.BashArg`. Single-arg-with-whitespace is left untouched so the existing `lakebox ssh -- 'cmd | head'` shape (user expects the remote shell to do the split) keeps working; multi-arg invocations get exec-style preservation. Extracted `buildSSHArgs` so the argv construction is unit-testable; table-driven test covers the regression cases plus single-quote escaping, globs, and empty args. Co-authored-by: Isaac --- cmd/lakebox/ssh.go | 46 ++++++++++++++---- cmd/lakebox/ssh_test.go | 101 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 8 deletions(-) create mode 100644 cmd/lakebox/ssh_test.go diff --git a/cmd/lakebox/ssh.go b/cmd/lakebox/ssh.go index 465d59d451..0eca26f185 100644 --- a/cmd/lakebox/ssh.go +++ b/cmd/lakebox/ssh.go @@ -11,6 +11,7 @@ import ( "github.com/databricks/cli/libs/cmdctx" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/execv" + "github.com/databricks/cli/libs/shellquote" "github.com/databricks/databricks-sdk-go/apierr" "github.com/spf13/cobra" ) @@ -201,9 +202,37 @@ Examples: // execSSHDirect replaces the CLI process with ssh (or simulates that on // Windows via execv). All options are passed on the command line, so no -// ~/.ssh/config entry is required. Extra args are appended after the -// destination for remote commands or ssh flags. +// ~/.ssh/config entry is required. func execSSHDirect(lakeboxID, host, port, keyPath string, extraArgs []string) error { + return execv.Execv(execv.Options{ + Args: buildSSHArgs(lakeboxID, host, port, keyPath, extraArgs), + Env: os.Environ(), + }) +} + +// buildSSHArgs assembles the argv we hand to the `ssh` binary. +// +// `ssh` concatenates remote-command words with spaces and the remote +// shell re-parses them. That makes the two natural user shapes +// incompatible by default: +// +// - Single arg that's already a complete shell command: +// `lakebox ssh -- 'echo hi | head -3'` +// The user expects the remote shell to split and execute this +// string. ssh's "concat with spaces" is a no-op here, so we hand +// the arg through untouched. +// +// - Multi-arg exec-style invocation: +// `lakebox ssh -- bash -c 'echo hi'` +// Cobra splits this into `["bash", "-c", "echo hi"]`. ssh's join +// produces `bash -c echo hi` on the wire, which bash re-splits into +// `-c=echo` and `$0=hi` — bug F22. We fix that by shell-quoting +// each arg before append, so the remote sees `bash -c 'echo hi'`. +// +// The heuristic: if there's exactly one extra arg, pass it untouched; +// otherwise quote every arg. shellquote.BashArg leaves safe args alone, +// so `ls -la /tmp` round-trips unchanged in the multi-arg path. +func buildSSHArgs(lakeboxID, host, port, keyPath string, extraArgs []string) []string { args := []string{ "ssh", "-i", keyPath, @@ -215,10 +244,11 @@ func execSSHDirect(lakeboxID, host, port, keyPath string, extraArgs []string) er "-o", "LogLevel=ERROR", fmt.Sprintf("%s@%s", lakeboxID, host), } - args = append(args, extraArgs...) - - return execv.Execv(execv.Options{ - Args: args, - Env: os.Environ(), - }) + if len(extraArgs) == 1 { + return append(args, extraArgs[0]) + } + for _, a := range extraArgs { + args = append(args, shellquote.BashArg(a)) + } + return args } diff --git a/cmd/lakebox/ssh_test.go b/cmd/lakebox/ssh_test.go new file mode 100644 index 0000000000..a88ad47c67 --- /dev/null +++ b/cmd/lakebox/ssh_test.go @@ -0,0 +1,101 @@ +package lakebox + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBuildSSHArgsBaseFlags(t *testing.T) { + got := buildSSHArgs("happy-panda-1234", "gw.example.test", "2222", "/keys/id", nil) + // Sanity: the destination is the last arg when no extras. + assert.Equal(t, "happy-panda-1234@gw.example.test", got[len(got)-1]) + // The base flag set is fixed; spot-check a couple. + assert.Contains(t, got, "-i") + assert.Contains(t, got, "/keys/id") + assert.Contains(t, got, "-p") + assert.Contains(t, got, "2222") +} + +func TestBuildSSHArgsQuoting(t *testing.T) { + for _, tc := range []struct { + name string + extraArgs []string + // expected is what we expect at the end of args, after the destination. + expected []string + }{ + { + name: "no extras", + extraArgs: nil, + expected: nil, + }, + { + name: "single safe word — passed through", + extraArgs: []string{"uname"}, + expected: []string{"uname"}, + }, + { + name: "single string with spaces — passed through so the remote shell can split it", + extraArgs: []string{"echo hello-from-string"}, + expected: []string{"echo hello-from-string"}, + }, + { + name: "single string with a pipe — passed through to the remote shell", + extraArgs: []string{"cat /etc/os-release | head -3"}, + expected: []string{"cat /etc/os-release | head -3"}, + }, + { + name: "multi safe words — no quoting", + extraArgs: []string{"ls", "-la", "/tmp"}, + expected: []string{"ls", "-la", "/tmp"}, + }, + { + name: "bash -c '' — third arg gets quoted (this is F22)", + // The whole point: without the quoting, the remote shell + // re-splits "bash -c echo hi" and bash's -c eats just "echo". + extraArgs: []string{"bash", "-c", "echo hi"}, + expected: []string{"bash", "-c", "'echo hi'"}, + }, + { + name: "arg with single quote — escaped, not lost", + extraArgs: []string{"echo", "it's fine"}, + expected: []string{"echo", `'it'\''s fine'`}, + }, + { + name: "glob pattern — quoted so the remote (not the local) shell expands it", + extraArgs: []string{"find", ".", "-name", "*.txt"}, + expected: []string{"find", ".", "-name", "'*.txt'"}, + }, + { + name: "ssh -o key=val style — gets quoted because of '='; harmless given current arg ordering", + extraArgs: []string{"-o", "StrictHostKeyChecking=no"}, + expected: []string{"-o", "'StrictHostKeyChecking=no'"}, + }, + { + name: "empty arg gets explicit empty quotes (not lost)", + extraArgs: []string{"sh", "-c", ""}, + expected: []string{"sh", "-c", "''"}, + }, + } { + t.Run(tc.name, func(t *testing.T) { + got := buildSSHArgs("id", "host", "2222", "/k", tc.extraArgs) + // Trim everything up to and including the destination so we + // can assert just on the appended extras. + dest := "id@host" + cut := -1 + for i, a := range got { + if a == dest { + cut = i + 1 + break + } + } + assert.GreaterOrEqual(t, cut, 0, "destination not found in args") + tail := got[cut:] + if tc.expected == nil { + assert.Empty(t, tail) + } else { + assert.Equal(t, tc.expected, tail) + } + }) + } +} From d25aa76fef1b028f0557b1dac6f7cc58a7856b68 Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Fri, 29 May 2026 07:24:52 +0000 Subject: [PATCH 2/5] =?UTF-8?q?lakebox:=20fix=20F2=20=E2=80=94=20honor=20g?= =?UTF-8?q?lobal=20`-o=20json`=20on=20every=20JSON-producing=20subcommand?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The framework-global `-o, --output` flag validates its argument but lakebox subcommands never read it: passing `-o json` silently produced text. The lakebox commands had grown their own `--json` flag instead. Wire both forms through a tiny `jsonOutput(cmd, jsonFlag) bool` helper checked at the rendering branch. Touches the four subcommands that have a JSON branch (list, status, create, ssh-key list). `--json` stays as a short-form alias; `-o json` now works the same way users expect from every other databricks subcommand. The other lakebox verbs (stop, start, delete, config, ssh, default, register, ssh-key delete) emit only human-readable status, so they're untouched. Co-authored-by: Isaac --- cmd/lakebox/create.go | 8 +++++--- cmd/lakebox/list.go | 2 +- cmd/lakebox/ssh.go | 4 ++-- cmd/lakebox/sshkey.go | 2 +- cmd/lakebox/status.go | 2 +- cmd/lakebox/ui.go | 16 ++++++++++++++++ 6 files changed, 26 insertions(+), 8 deletions(-) diff --git a/cmd/lakebox/create.go b/cmd/lakebox/create.go index 5b6a795b43..0d6ea43e9f 100644 --- a/cmd/lakebox/create.go +++ b/cmd/lakebox/create.go @@ -35,11 +35,13 @@ Examples: return err } + wantJSON := jsonOutput(cmd, outputJSON) + // In JSON mode, suppress the spinner+ok lines so the only thing // on stdout/stderr that a scripted caller has to parse is the // JSON body itself. var result *createResponse - if outputJSON { + if wantJSON { result, err = api.create(ctx, name) if err != nil { return fmt.Errorf("failed to create lakebox: %w", err) @@ -72,12 +74,12 @@ Examples: if shouldSetDefault { if err := setDefault(ctx, profile, result.SandboxID); err != nil { warn(ctx, fmt.Sprintf("Could not save default: %v", err)) - } else if !outputJSON { + } else if !wantJSON { field(ctx, cmd.ErrOrStderr(), "default", result.SandboxID) } } - if outputJSON { + if wantJSON { enc := json.NewEncoder(cmd.OutOrStdout()) enc.SetIndent("", " ") return enc.Encode(result) diff --git a/cmd/lakebox/list.go b/cmd/lakebox/list.go index 4bd8b22deb..2bcef9c424 100644 --- a/cmd/lakebox/list.go +++ b/cmd/lakebox/list.go @@ -83,7 +83,7 @@ Example: } _ = setSandboxes(ctx, profile, refs) - if outputJSON { + if jsonOutput(cmd, outputJSON) { enc := json.NewEncoder(cmd.OutOrStdout()) enc.SetIndent("", " ") return enc.Encode(entries) diff --git a/cmd/lakebox/ssh.go b/cmd/lakebox/ssh.go index 0eca26f185..3c085a9b9f 100644 --- a/cmd/lakebox/ssh.go +++ b/cmd/lakebox/ssh.go @@ -217,13 +217,13 @@ func execSSHDirect(lakeboxID, host, port, keyPath string, extraArgs []string) er // incompatible by default: // // - Single arg that's already a complete shell command: -// `lakebox ssh -- 'echo hi | head -3'` +// `lakebox ssh -- 'echo hi | head -3'` // The user expects the remote shell to split and execute this // string. ssh's "concat with spaces" is a no-op here, so we hand // the arg through untouched. // // - Multi-arg exec-style invocation: -// `lakebox ssh -- bash -c 'echo hi'` +// `lakebox ssh -- bash -c 'echo hi'` // Cobra splits this into `["bash", "-c", "echo hi"]`. ssh's join // produces `bash -c echo hi` on the wire, which bash re-splits into // `-c=echo` and `$0=hi` — bug F22. We fix that by shell-quoting diff --git a/cmd/lakebox/sshkey.go b/cmd/lakebox/sshkey.go index 35714b21f6..c0a5d91531 100644 --- a/cmd/lakebox/sshkey.go +++ b/cmd/lakebox/sshkey.go @@ -90,7 +90,7 @@ Examples: return fmt.Errorf("failed to list ssh keys: %w", err) } - if outputJSON { + if jsonOutput(cmd, outputJSON) { enc := json.NewEncoder(cmd.OutOrStdout()) enc.SetIndent("", " ") return enc.Encode(keys) diff --git a/cmd/lakebox/status.go b/cmd/lakebox/status.go index 924a69f6b3..495426ebf0 100644 --- a/cmd/lakebox/status.go +++ b/cmd/lakebox/status.go @@ -50,7 +50,7 @@ Example: _ = setGatewayHost(ctx, profile, entry.GatewayHost) _ = upsertSandbox(ctx, profile, entry.SandboxID, entry.Name) - if outputJSON { + if jsonOutput(cmd, outputJSON) { enc := json.NewEncoder(cmd.OutOrStdout()) enc.SetIndent("", " ") return enc.Encode(entry) diff --git a/cmd/lakebox/ui.go b/cmd/lakebox/ui.go index 338080c9e5..82f2c45c31 100644 --- a/cmd/lakebox/ui.go +++ b/cmd/lakebox/ui.go @@ -7,8 +7,24 @@ import ( "strings" "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" ) +// jsonOutput reports whether the user asked for JSON output, via either +// the lakebox-local `--json` flag or the framework-global `-o json`. The +// global flag is the standard shape across other `databricks` commands +// and the validator already rejects bogus values, but until this helper +// existed lakebox commands silently ignored it and emitted text anyway. +func jsonOutput(cmd *cobra.Command, jsonFlag bool) bool { + if jsonFlag { + return true + } + if f := cmd.Flag("output"); f != nil && f.Value.String() == "json" { + return true + } + return false +} + // cmdioSpinner is the subset of *cmdio.spinner's method set we need. // Defining the interface locally lets us hold the unexported type as a // struct field; cmdio's spinner satisfies it structurally. From 49fcf40418773e5e1fd627fcf4ce68d1071b1e29 Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Fri, 29 May 2026 07:32:32 +0000 Subject: [PATCH 3/5] =?UTF-8?q?lakebox:=20fix=20N1=20=E2=80=94=20drop=20pr?= =?UTF-8?q?emature=20"=E2=9C=93=20Connected=20to"=20line=20in=20`ssh`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this change ssh.go printed `✓ Connected to ` immediately before handing off to the ssh binary, with no actual handshake having happened. On any ssh failure (gateway timeout, missing key, network hiccup) the user saw a confident "Connected" line *followed* by the real error from ssh. Drop the s.ok call. The Connecting spinner stays — it shows for the brief window before exec replaces this process. ssh's own stdout/stderr is the success signal; nothing the CLI adds at this point can be more truthful than that. Co-authored-by: Isaac --- cmd/lakebox/ssh.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/lakebox/ssh.go b/cmd/lakebox/ssh.go index 3c085a9b9f..2e8670667b 100644 --- a/cmd/lakebox/ssh.go +++ b/cmd/lakebox/ssh.go @@ -187,9 +187,14 @@ Examples: _ = setGatewayHost(ctx, profile, sandboxGatewayHost) } + // Spinner runs until exec replaces this process (Linux/macOS) + // or until the ssh subprocess returns (Windows execv shim). + // We deliberately don't print "Connected" — at this point ssh + // hasn't actually handshaken yet, so any success affirmation + // here would be a lie that gets contradicted by ssh's own + // error output on the failure path. s := spin(ctx, "Connecting to "+cmdio.Bold(ctx, lakeboxID)+"…") defer s.Close() - s.ok("Connected to " + cmdio.Bold(ctx, lakeboxID)) return execSSHDirect(lakeboxID, host, gatewayPort, keyPath, extraArgs) }, } From 1d5fed2ae6c74721c0a8e813e38ab67472c69f59 Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Fri, 29 May 2026 07:38:08 +0000 Subject: [PATCH 4/5] =?UTF-8?q?lakebox:=20fix=20N3=20=E2=80=94=20surface?= =?UTF-8?q?=20byte=20count=20on=20`--name`=20length=20rejections?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The server rejected oversize names with `name exceeds 256 bytes` but didn't say how many bytes the client sent, which makes the error useless when emoji (4 bytes each) push the input past 256 in fewer visible characters than the user expects. Mirror the limit client-side and validate `--name` before issuing `create` / `config update`. The new error names the observed byte count and reminds the user that non-ASCII characters count for more: Error: --name is 260 bytes; limit is 256 (emoji and most non-ASCII characters count as 2-4 bytes each) Side effect: zero-RTT failure — bad names never reach the server. Tracking deprecation: if the server-side cap ever changes, this constant goes out of date; the worst case is the CLI is briefly too strict, and the server still enforces. Co-authored-by: Isaac --- cmd/lakebox/api.go | 18 ++++++++++++++++++ cmd/lakebox/api_test.go | 31 +++++++++++++++++++++++++++++++ cmd/lakebox/config.go | 3 +++ cmd/lakebox/create.go | 4 ++++ 4 files changed, 56 insertions(+) create mode 100644 cmd/lakebox/api_test.go diff --git a/cmd/lakebox/api.go b/cmd/lakebox/api.go index 59c42b7b4c..2f19722541 100644 --- a/cmd/lakebox/api.go +++ b/cmd/lakebox/api.go @@ -37,6 +37,24 @@ const lakeboxKeysAPIPath = "/api/2.0/lakebox/ssh-keys" // for this API." const orgIDHeader = "X-Databricks-Org-Id" +// maxNameBytes mirrors the manager-side `Sandbox.name` length cap. The +// server measures bytes, not characters, so a name made of emoji or other +// multi-byte UTF-8 hits the limit in fewer visible characters than the user +// expects. Mirroring the constant client-side lets us fail fast with the +// observed byte count instead of paying a round-trip for a 400. +const maxNameBytes = 256 + +// validateName returns an error when `name` exceeds the wire limit. The +// error names the observed byte count so the user can recover in one shot +// (the original server message just said "exceeds 256 bytes" without saying +// by how much, which is unhelpful when emoji are in play). +func validateName(name string) error { + if n := len(name); n > maxNameBytes { + return fmt.Errorf("--name is %d bytes; limit is %d (emoji and most non-ASCII characters count as 2-4 bytes each)", n, maxNameBytes) + } + return nil +} + // lakeboxAPI wraps the SDK ApiClient with workspace-id-aware request headers. type lakeboxAPI struct { c *client.DatabricksClient diff --git a/cmd/lakebox/api_test.go b/cmd/lakebox/api_test.go new file mode 100644 index 0000000000..9664f19f7d --- /dev/null +++ b/cmd/lakebox/api_test.go @@ -0,0 +1,31 @@ +package lakebox + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestValidateNameAcceptsAscii(t *testing.T) { + require.NoError(t, validateName("")) + require.NoError(t, validateName("my-project")) + require.NoError(t, validateName(strings.Repeat("a", 256))) // boundary: exactly the limit +} + +func TestValidateNameRejectsOversize(t *testing.T) { + err := validateName(strings.Repeat("a", 257)) + require.Error(t, err) + assert.Contains(t, err.Error(), "257 bytes") + assert.Contains(t, err.Error(), "256") +} + +func TestValidateNameCountsBytesNotRunes(t *testing.T) { + // 64 panda emoji = 64 × 4 bytes = 256 bytes — at the limit, OK. + require.NoError(t, validateName(strings.Repeat("🐼", 64))) + // 65 = 260 bytes, rejected. + err := validateName(strings.Repeat("🐼", 65)) + require.Error(t, err) + assert.Contains(t, err.Error(), "260 bytes") +} diff --git a/cmd/lakebox/config.go b/cmd/lakebox/config.go index 0d63edcf4b..f5ad8ad808 100644 --- a/cmd/lakebox/config.go +++ b/cmd/lakebox/config.go @@ -97,6 +97,9 @@ Examples: var name *string if cmd.Flags().Changed("name") { + if err := validateName(nameFlag); err != nil { + return err + } n := nameFlag name = &n } diff --git a/cmd/lakebox/create.go b/cmd/lakebox/create.go index 0d6ea43e9f..c6e5b7e1be 100644 --- a/cmd/lakebox/create.go +++ b/cmd/lakebox/create.go @@ -28,6 +28,10 @@ Examples: databricks lakebox create --json`, PreRunE: root.MustWorkspaceClient, RunE: func(cmd *cobra.Command, args []string) error { + if err := validateName(name); err != nil { + return err + } + ctx := cmd.Context() w := cmdctx.WorkspaceClient(ctx) api, err := newLakeboxAPI(w) From dc2dcf12638cd139b1db1e2e30398cd1ce2b5ac7 Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Fri, 29 May 2026 07:45:40 +0000 Subject: [PATCH 5/5] lakebox: show gateway host in `status` text output The gateway hostname was already on the JSON side of `status` but missing from text. It's the most useful field for debugging an `ssh` failure (wrong region, stale cache, gateway down), so omitting it from the human-readable view forced users to round-trip through `--json` to see it. Render it in Faint between `status` and any existing `fqdn`, only when populated. Co-authored-by: Isaac --- cmd/lakebox/status.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/lakebox/status.go b/cmd/lakebox/status.go index 495426ebf0..97744e0303 100644 --- a/cmd/lakebox/status.go +++ b/cmd/lakebox/status.go @@ -63,6 +63,9 @@ Example: field(ctx, out, "name", entry.Name) } field(ctx, out, "status", status(ctx, entry.Status)) + if entry.GatewayHost != "" { + field(ctx, out, "gateway", cmdio.Faint(ctx, entry.GatewayHost)) + } if entry.FQDN != "" { field(ctx, out, "fqdn", cmdio.Faint(ctx, entry.FQDN)) }