From aa8563807075a37504cc543d4d1b896c5dde6336 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Tue, 23 May 2023 12:10:15 +0200 Subject: [PATCH] Sync: Gracefully handle broken notebook files (#398) ## Changes Ignore broken notebook files during sync. Fixes https://github.com/databricks/databricks-vscode/issues/712 --- libs/sync/snapshot.go | 17 +++++++++------- libs/sync/snapshot_test.go | 40 +++++++++++++++++++++++++------------- libs/sync/sync.go | 2 +- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/libs/sync/snapshot.go b/libs/sync/snapshot.go index b79202e7d9..1ea7b18bdb 100644 --- a/libs/sync/snapshot.go +++ b/libs/sync/snapshot.go @@ -172,7 +172,7 @@ func loadOrNewSnapshot(ctx context.Context, opts *SyncOptions) (*Snapshot, error return snapshot, nil } -func (s *Snapshot) diff(all []fileset.File) (change diff, err error) { +func (s *Snapshot) diff(ctx context.Context, all []fileset.File) (change diff, err error) { lastModifiedTimes := s.LastUpdatedTimes remoteToLocalNames := s.RemoteToLocalNames localToRemoteNames := s.LocalToRemoteNames @@ -191,18 +191,21 @@ func (s *Snapshot) diff(all []fileset.File) (change diff, err error) { if !seen || modified.After(lastSeenModified) { lastModifiedTimes[f.Relative] = modified + // get file metadata about whether it's a notebook + isNotebook, _, err := notebook.Detect(f.Absolute) + if err != nil { + // Ignore this file if we're unable to determine the notebook type. + // Trying to upload such a file to the workspace would fail anyway. + log.Warnf(ctx, err.Error()) + continue + } + // change separators to '/' for file paths in remote store unixFileName := filepath.ToSlash(f.Relative) // put file in databricks workspace change.put = append(change.put, unixFileName) - // get file metadata about whether it's a notebook - isNotebook, _, err := notebook.Detect(f.Absolute) - if err != nil { - return change, err - } - // Strip extension for notebooks. remoteName := unixFileName if isNotebook { diff --git a/libs/sync/snapshot_test.go b/libs/sync/snapshot_test.go index 54cb86fc55..8154b79141 100644 --- a/libs/sync/snapshot_test.go +++ b/libs/sync/snapshot_test.go @@ -25,6 +25,8 @@ func assertKeysOfMap(t *testing.T, m map[string]time.Time, expectedKeys []string } func TestDiff(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -44,7 +46,7 @@ func TestDiff(t *testing.T) { // New files are put files, err := fileSet.All() assert.NoError(t, err) - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) assert.Len(t, change.put, 2) @@ -59,7 +61,7 @@ func TestDiff(t *testing.T) { assert.NoError(t, err) files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) @@ -74,7 +76,7 @@ func TestDiff(t *testing.T) { assert.NoError(t, err) files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 0) @@ -85,6 +87,8 @@ func TestDiff(t *testing.T) { } func TestSymlinkDiff(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -106,12 +110,14 @@ func TestSymlinkDiff(t *testing.T) { files, err := fileSet.All() assert.NoError(t, err) - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.put, 1) } func TestFolderDiff(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -130,7 +136,7 @@ func TestFolderDiff(t *testing.T) { files, err := fileSet.All() assert.NoError(t, err) - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) assert.Len(t, change.put, 1) @@ -139,7 +145,7 @@ func TestFolderDiff(t *testing.T) { f1.Remove(t) files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 0) @@ -147,6 +153,8 @@ func TestFolderDiff(t *testing.T) { } func TestPythonNotebookDiff(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -164,7 +172,7 @@ func TestPythonNotebookDiff(t *testing.T) { files, err := fileSet.All() assert.NoError(t, err) foo.Overwrite(t, "# Databricks notebook source\nprint(\"abc\")") - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) assert.Len(t, change.put, 1) @@ -178,7 +186,7 @@ func TestPythonNotebookDiff(t *testing.T) { foo.Overwrite(t, "print(\"abc\")") files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 1) @@ -192,7 +200,7 @@ func TestPythonNotebookDiff(t *testing.T) { foo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")") files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 1) @@ -207,7 +215,7 @@ func TestPythonNotebookDiff(t *testing.T) { assert.NoError(t, err) files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 0) @@ -218,6 +226,8 @@ func TestPythonNotebookDiff(t *testing.T) { } func TestErrorWhenIdenticalRemoteName(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -235,7 +245,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { defer vanillaFoo.Close(t) files, err := fileSet.All() assert.NoError(t, err) - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) assert.Len(t, change.put, 2) @@ -246,11 +256,13 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { pythonFoo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")") files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.ErrorContains(t, err, "both foo and foo.py point to the same remote file location foo. Please remove one of them from your local project") } func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -267,7 +279,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { pythonFoo.Overwrite(t, "# Databricks notebook source\n") files, err := fileSet.All() assert.NoError(t, err) - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) assert.Len(t, change.put, 1) @@ -279,7 +291,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { sqlFoo.Overwrite(t, "-- Databricks notebook source\n") files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 1) diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 64c2328ace..54d0624e77 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -134,7 +134,7 @@ func (s *Sync) RunOnce(ctx context.Context) error { return err } - change, err := s.snapshot.diff(all) + change, err := s.snapshot.diff(ctx, all) if err != nil { return err }