Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

* Added `databricks aitools` command group for installing Databricks skills into your coding agents (Claude Code, Cursor, Codex CLI, OpenCode, GitHub Copilot, Antigravity). Skills are fetched from [github.com/databricks/databricks-agent-skills](https://github.com/databricks/databricks-agent-skills) and either symlinked into each agent's skills directory or copied into the current project. Use `databricks aitools install` to set up, `update` to pull newer versions, `list` to see what's available, and `uninstall` to remove them. Pick where they go with `--scope=project|global` (`--scope=both` is accepted on `update` and `list`).
* `[__settings__].default_profile` is now consulted as a fallback by `databricks api`, `databricks auth token`, and bundle commands when neither `--profile` nor `DATABRICKS_CONFIG_PROFILE` is set. `databricks auth token` continues to give precedence to `DATABRICKS_HOST` over `default_profile`. For bundle commands, `default_profile` only applies when the bundle does not pin its own `workspace.host`.
* `databricks workspace import-dir` now skips `.git`, `.databricks`, and `node_modules` directories during recursive imports. To import one of these directories deliberately, pass it as `SOURCE_PATH` ([#5118](https://github.com/databricks/cli/pull/5118)).
* `databricks postgres create-role --help` now documents the `--json` body shape and rejects the common mistake of wrapping the body in `{"role": ...}` client-side with a hint pointing at the correct shape ([#5111](https://github.com/databricks/cli/pull/5111)).

### Bundles
Expand Down
29 changes: 29 additions & 0 deletions cmd/workspace/workspace/import_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ type importDirOptions struct {
overwrite bool
}

// defaultSkipDirs are directory names skipped when walking the source tree.
// The previous behavior copied these verbatim into the workspace, which:
// - leaks .git/config (often containing template-repo origin URLs and
// occasionally cached credentials) into deployed app source trees
// - copies the local bundle cache (.databricks) on top of any remote one
// - uploads node_modules/ for JS/TS apps, which is large and gets
// reinstalled in the runtime anyway
//
// Reported as DEPLOY-04 #2 in the EMEA Apps gaps doc; users have been
// working around it by post-deploy scrubbing scripts.
var defaultSkipDirs = map[string]struct{}{
".git": {},
".databricks": {},
"node_modules": {},
}

// The callback function imports the file specified at sourcePath. This function is
// meant to be used in conjunction with fs.WalkDir
//
Expand All @@ -48,6 +64,15 @@ func (opts importDirOptions) callback(ctx context.Context, workspaceFiler filer.
return err
}

// Skip default-excluded directories (e.g. .git, .databricks). The check
// excludes the explicit root so a user who passes ".git" as the source
// can still copy it deliberately.
if d.IsDir() && sourcePath != sourceDir {
if _, skip := defaultSkipDirs[d.Name()]; skip {
return fs.SkipDir
}
}

// localName is the name for the file in the local file system
localName, err := filepath.Rel(sourceDir, sourcePath)
if err != nil {
Expand Down Expand Up @@ -117,6 +142,10 @@ func newImportDir() *cobra.Command {
cmd.Long = `
Import a directory recursively from the local file system to a Databricks workspace.
Notebooks will have their extensions (one of .scala, .py, .sql, .ipynb, .r) stripped

By default, .git, .databricks, and node_modules directories encountered during
the recursive import are skipped. To import one of these directories deliberately,
pass it as SOURCE_PATH.
`

cmd.Annotations = make(map[string]string)
Expand Down
156 changes: 156 additions & 0 deletions cmd/workspace/workspace/import_dir_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package workspace

import (
"context"
"io"
"io/fs"
"os"
"path/filepath"
"slices"
"testing"

"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/filer"
"github.com/databricks/cli/libs/flags"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// recordingFiler captures Mkdir and Write calls so a test can assert which
// paths were visited by the import-dir walker.
type recordingFiler struct {
dirs []string
files []string
}

func (r *recordingFiler) Mkdir(ctx context.Context, p string) error {
r.dirs = append(r.dirs, p)
return nil
}

func (r *recordingFiler) Write(ctx context.Context, p string, reader io.Reader, mode ...filer.WriteMode) error {
r.files = append(r.files, p)
return nil
}

func (r *recordingFiler) Read(ctx context.Context, p string) (io.ReadCloser, error) {
return nil, fs.ErrNotExist
}

func (r *recordingFiler) Delete(ctx context.Context, p string, mode ...filer.DeleteMode) error {
return nil
}

func (r *recordingFiler) ReadDir(ctx context.Context, p string) ([]fs.DirEntry, error) {
return nil, fs.ErrNotExist
}

func (r *recordingFiler) Stat(ctx context.Context, name string) (fs.FileInfo, error) {
return nil, fs.ErrNotExist
}

func writeFile(t *testing.T, root, rel, contents string) {
t.Helper()
full := filepath.Join(root, rel)
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
require.NoError(t, os.WriteFile(full, []byte(contents), 0o644))
}

func runWalk(t *testing.T, sourceDir string) *recordingFiler {
t.Helper()
rec := &recordingFiler{}
ctx := cmdio.InContext(t.Context(),
cmdio.NewIO(t.Context(), flags.OutputText, nil, io.Discard, io.Discard, "", ""))
opts := importDirOptions{sourceDir: sourceDir, targetDir: "/Workspace/x", overwrite: true}
cb := opts.callback(ctx, rec)
require.NoError(t, filepath.WalkDir(sourceDir, cb))
return rec
}

func TestImportDirSkipsGitDirectory(t *testing.T) {
src := t.TempDir()
writeFile(t, src, "app.py", "print('hi')")
writeFile(t, src, ".git/config", "[remote]\n url = git@github.com:org/template.git")
writeFile(t, src, ".git/HEAD", "ref: refs/heads/main")
writeFile(t, src, ".git/objects/abc123", "binary")

rec := runWalk(t, src)

slices.Sort(rec.files)
assert.Equal(t, []string{"app.py"}, rec.files)
for _, d := range rec.dirs {
assert.NotContains(t, d, ".git", "no .git directory should be created in the workspace")
}
}

func TestImportDirSkipsNestedGitDirectory(t *testing.T) {
src := t.TempDir()
writeFile(t, src, "app.py", "print('hi')")
writeFile(t, src, "vendor/sub/.git/config", "[remote]\n url = ...")
writeFile(t, src, "vendor/sub/lib.py", "def f(): pass")

rec := runWalk(t, src)

slices.Sort(rec.files)
assert.Equal(t, []string{"app.py", filepath.ToSlash("vendor/sub/lib.py")}, rec.files)
for _, d := range rec.dirs {
assert.NotContains(t, d, ".git")
}
}

func TestImportDirSkipsDatabricksCacheDirectory(t *testing.T) {
src := t.TempDir()
writeFile(t, src, "databricks.yml", "bundle:\n name: x")
writeFile(t, src, ".databricks/bundle/state.json", "{}")

rec := runWalk(t, src)

slices.Sort(rec.files)
assert.Equal(t, []string{"databricks.yml"}, rec.files)
for _, d := range rec.dirs {
assert.NotContains(t, d, ".databricks")
}
}

func TestImportDirSkipsNodeModulesDirectory(t *testing.T) {
src := t.TempDir()
writeFile(t, src, "package.json", "{}")
writeFile(t, src, "app.js", "console.log('hi')")
writeFile(t, src, "node_modules/react/index.js", "module.exports = {}")
writeFile(t, src, "node_modules/.package-lock.json", "{}")

rec := runWalk(t, src)

slices.Sort(rec.files)
assert.Equal(t, []string{"app.js", "package.json"}, rec.files)
for _, d := range rec.dirs {
assert.NotContains(t, d, "node_modules")
}
}

func TestImportDirCopiesGitignoreFile(t *testing.T) {
src := t.TempDir()
writeFile(t, src, ".gitignore", "*.pyc\n")
writeFile(t, src, "app.py", "print('hi')")

rec := runWalk(t, src)

slices.Sort(rec.files)
assert.Equal(t, []string{".gitignore", "app.py"}, rec.files)
}

func TestImportDirAllowsExplicitGitRoot(t *testing.T) {
// If a user explicitly passes a .git directory as the source root, copy
// it: the skip rule applies to .git dirs encountered during the walk,
// not to a deliberately-named root.
src := t.TempDir()
gitRoot := filepath.Join(src, ".git")
require.NoError(t, os.MkdirAll(gitRoot, 0o755))
writeFile(t, gitRoot, "HEAD", "ref: refs/heads/main")
writeFile(t, gitRoot, "config", "[core]\n")

rec := runWalk(t, gitRoot)

slices.Sort(rec.files)
assert.Equal(t, []string{"HEAD", "config"}, rec.files)
}
Loading