Skip to content

Commit

Permalink
Sync: Gracefully handle broken notebook files (#398)
Browse files Browse the repository at this point in the history
## Changes
Ignore broken notebook files during sync.

Fixes databricks/databricks-vscode#712
  • Loading branch information
fjakobs committed May 23, 2023
1 parent cc8e9b7 commit aa85638
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 22 deletions.
17 changes: 10 additions & 7 deletions libs/sync/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func loadOrNewSnapshot(ctx context.Context, opts *SyncOptions) (*Snapshot, error
return snapshot, nil
}

func (s *Snapshot) diff(all []fileset.File) (change diff, err error) {
func (s *Snapshot) diff(ctx context.Context, all []fileset.File) (change diff, err error) {
lastModifiedTimes := s.LastUpdatedTimes
remoteToLocalNames := s.RemoteToLocalNames
localToRemoteNames := s.LocalToRemoteNames
Expand All @@ -191,18 +191,21 @@ func (s *Snapshot) diff(all []fileset.File) (change diff, err error) {
if !seen || modified.After(lastSeenModified) {
lastModifiedTimes[f.Relative] = modified

// get file metadata about whether it's a notebook
isNotebook, _, err := notebook.Detect(f.Absolute)
if err != nil {
// Ignore this file if we're unable to determine the notebook type.
// Trying to upload such a file to the workspace would fail anyway.
log.Warnf(ctx, err.Error())
continue
}

// change separators to '/' for file paths in remote store
unixFileName := filepath.ToSlash(f.Relative)

// put file in databricks workspace
change.put = append(change.put, unixFileName)

// get file metadata about whether it's a notebook
isNotebook, _, err := notebook.Detect(f.Absolute)
if err != nil {
return change, err
}

// Strip extension for notebooks.
remoteName := unixFileName
if isNotebook {
Expand Down
40 changes: 26 additions & 14 deletions libs/sync/snapshot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ func assertKeysOfMap(t *testing.T, m map[string]time.Time, expectedKeys []string
}

func TestDiff(t *testing.T) {
ctx := context.Background()

// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
Expand All @@ -44,7 +46,7 @@ func TestDiff(t *testing.T) {
// New files are put
files, err := fileSet.All()
assert.NoError(t, err)
change, err := state.diff(files)
change, err := state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
assert.Len(t, change.put, 2)
Expand All @@ -59,7 +61,7 @@ func TestDiff(t *testing.T) {
assert.NoError(t, err)
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
change, err = state.diff(ctx, files)
assert.NoError(t, err)

assert.Len(t, change.delete, 0)
Expand All @@ -74,7 +76,7 @@ func TestDiff(t *testing.T) {
assert.NoError(t, err)
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
change, err = state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 0)
Expand All @@ -85,6 +87,8 @@ func TestDiff(t *testing.T) {
}

func TestSymlinkDiff(t *testing.T) {
ctx := context.Background()

// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
Expand All @@ -106,12 +110,14 @@ func TestSymlinkDiff(t *testing.T) {

files, err := fileSet.All()
assert.NoError(t, err)
change, err := state.diff(files)
change, err := state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.put, 1)
}

func TestFolderDiff(t *testing.T) {
ctx := context.Background()

// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
Expand All @@ -130,7 +136,7 @@ func TestFolderDiff(t *testing.T) {

files, err := fileSet.All()
assert.NoError(t, err)
change, err := state.diff(files)
change, err := state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
assert.Len(t, change.put, 1)
Expand All @@ -139,14 +145,16 @@ func TestFolderDiff(t *testing.T) {
f1.Remove(t)
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
change, err = state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 0)
assert.Contains(t, change.delete, "foo/bar")
}

func TestPythonNotebookDiff(t *testing.T) {
ctx := context.Background()

// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
Expand All @@ -164,7 +172,7 @@ func TestPythonNotebookDiff(t *testing.T) {
files, err := fileSet.All()
assert.NoError(t, err)
foo.Overwrite(t, "# Databricks notebook source\nprint(\"abc\")")
change, err := state.diff(files)
change, err := state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
assert.Len(t, change.put, 1)
Expand All @@ -178,7 +186,7 @@ func TestPythonNotebookDiff(t *testing.T) {
foo.Overwrite(t, "print(\"abc\")")
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
change, err = state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 1)
Expand All @@ -192,7 +200,7 @@ func TestPythonNotebookDiff(t *testing.T) {
foo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")")
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
change, err = state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 1)
Expand All @@ -207,7 +215,7 @@ func TestPythonNotebookDiff(t *testing.T) {
assert.NoError(t, err)
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
change, err = state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 0)
Expand All @@ -218,6 +226,8 @@ func TestPythonNotebookDiff(t *testing.T) {
}

func TestErrorWhenIdenticalRemoteName(t *testing.T) {
ctx := context.Background()

// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
Expand All @@ -235,7 +245,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) {
defer vanillaFoo.Close(t)
files, err := fileSet.All()
assert.NoError(t, err)
change, err := state.diff(files)
change, err := state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
assert.Len(t, change.put, 2)
Expand All @@ -246,11 +256,13 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) {
pythonFoo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")")
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
change, err = state.diff(ctx, files)
assert.ErrorContains(t, err, "both foo and foo.py point to the same remote file location foo. Please remove one of them from your local project")
}

func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) {
ctx := context.Background()

// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(projectDir)
Expand All @@ -267,7 +279,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) {
pythonFoo.Overwrite(t, "# Databricks notebook source\n")
files, err := fileSet.All()
assert.NoError(t, err)
change, err := state.diff(files)
change, err := state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
assert.Len(t, change.put, 1)
Expand All @@ -279,7 +291,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) {
sqlFoo.Overwrite(t, "-- Databricks notebook source\n")
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
change, err = state.diff(ctx, files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 1)
Expand Down
2 changes: 1 addition & 1 deletion libs/sync/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func (s *Sync) RunOnce(ctx context.Context) error {
return err
}

change, err := s.snapshot.diff(all)
change, err := s.snapshot.diff(ctx, all)
if err != nil {
return err
}
Expand Down

0 comments on commit aa85638

Please sign in to comment.