Skip to content

Commit eec87b8

Browse files
Set notebook directory as current directory for pip installs from workspace fs (#961)
When users use a relative path to install a notebook scoped library with pip (eg `%pip install ../../foo`) it can fail since cwd for shell commands can be an ephemeral directory in DBRs < 14.0. For other shell commands, our preamble handles the cwd, but pip commands are moved to the beginning of the notebook, since they can lead to a kernel restart and hence loss of preamble. This means, they can't access the features of the full preamble and need an explicit `os.chdir` to set the correct path. ## Changes * Add an explicit `os.chdir(notebook_dir)` before each `%pip install`. * Also remove code to pass source_file and project_root to the wrappers as notebook params (for notebook jobs) and cli args (for python file jobs). Now these parameters are hard coded into the wrapper. These are updated on every run since the wrapper is recreated on every run. ## Tests * manual Fixes #958
1 parent c9864e1 commit eec87b8

File tree

5 files changed

+124
-116
lines changed

5 files changed

+124
-116
lines changed

packages/databricks-vscode/resources/python/file.workflow-wrapper.py

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,8 @@
44
import sys
55
import os
66

7-
databricks_arg_idx = []
8-
for i, arg in enumerate(sys.argv):
9-
if i == 0:
10-
continue
11-
if sys.argv[i-1] == "--databricks-source-file":
12-
python_file = arg
13-
elif sys.argv[i-1] == "--databricks-project-root":
14-
project_root = arg
15-
else:
16-
continue
17-
databricks_arg_idx.extend([i, i-1])
18-
19-
if python_file is None:
20-
raise Exception("--databricks-source-file argument not specified")
21-
22-
if project_root is None:
23-
raise Exception("--databricks-project-root argument not specified")
24-
25-
#remove databricks args from argv
26-
sys.argv = [value for i,value in enumerate(sys.argv) if i not in databricks_arg_idx]
7+
python_file = '{{DATABRICKS_SOURCE_FILE}}'
8+
project_root = '{{DATABRICKS_PROJECT_ROOT}}'
279

2810
# change working directory
2911
os.chdir(os.path.dirname(python_file))

packages/databricks-vscode/resources/python/notebook.workflow-wrapper.py

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,17 @@ def databricks_preamble():
33
from IPython import get_ipython
44
from typing import List
55
from shlex import quote
6+
import os
7+
import sys
68

7-
dbutils.widgets.text("DATABRICKS_SOURCE_FILE",
8-
"{{DATABRICKS_SOURCE_FILE}}")
9-
dbutils.widgets.text("DATABRICKS_PROJECT_ROOT",
10-
"{{DATABRICKS_PROJECT_ROOT}}")
11-
src_file_dir = None
12-
project_root_dir = None
9+
src_file_dir = os.path.dirname("{{DATABRICKS_SOURCE_FILE}}")
10+
os.chdir(src_file_dir)
1311

14-
if dbutils.widgets.get("DATABRICKS_SOURCE_FILE") != "":
15-
import os
16-
src_file_dir = os.path.dirname(
17-
dbutils.widgets.get("DATABRICKS_SOURCE_FILE"))
18-
os.chdir(src_file_dir)
19-
20-
if dbutils.widgets.get("DATABRICKS_PROJECT_ROOT") != "":
21-
import sys
22-
project_root_dir = dbutils.widgets.get("DATABRICKS_PROJECT_ROOT")
23-
sys.path.insert(0, project_root_dir)
12+
project_root_dir = "{{DATABRICKS_PROJECT_ROOT}}"
13+
sys.path.insert(0, project_root_dir)
2414

2515
def parse_databricks_magic_lines(lines: List[str]):
26-
if len(lines) == 0 or src_file_dir is None:
16+
if len(lines) == 0:
2717
return lines
2818

2919
first = ""

packages/databricks-vscode/src/run/WorkflowRunner.ts

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,7 @@ export class WorkflowRunner implements Disposable {
173173
panel.showExportedRun(
174174
await cluster.runNotebookAndWait({
175175
path: remoteFilePath,
176-
parameters: {
177-
// eslint-disable-next-line @typescript-eslint/naming-convention
178-
DATABRICKS_SOURCE_FILE:
179-
syncDestination.localToRemote(program)
180-
.workspacePrefixPath,
181-
// eslint-disable-next-line @typescript-eslint/naming-convention
182-
DATABRICKS_PROJECT_ROOT:
183-
syncDestination.remoteUri.workspacePrefixPath,
184-
...parameters,
185-
},
176+
parameters,
186177
onProgress: (
187178
state: jobs.RunLifeCycleState,
188179
run: WorkflowRun
@@ -200,16 +191,14 @@ export class WorkflowRunner implements Disposable {
200191
? await new WorkspaceFsWorkflowWrapper(
201192
this.connectionManager,
202193
this.context
203-
).createPythonFileWrapper(originalFileUri)
194+
).createPythonFileWrapper(
195+
originalFileUri,
196+
syncDestination.remoteUri
197+
)
204198
: undefined;
205199
const response = await cluster.runPythonAndWait({
206200
path: wrappedFile ? wrappedFile.path : originalFileUri.path,
207-
args: (args ?? []).concat([
208-
"--databricks-source-file",
209-
originalFileUri.workspacePrefixPath,
210-
"--databricks-project-root",
211-
syncDestination.remoteUri.workspacePrefixPath,
212-
]),
201+
args: args ?? [],
213202
onProgress: (
214203
state: jobs.RunLifeCycleState,
215204
run: WorkflowRun

packages/databricks-vscode/src/workspace-fs/WorkspaceFsWorkflowWrapper.test.ts

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,25 @@ describe(__filename, async () => {
7676
);
7777
}
7878

79+
async function getWrapperData(
80+
wrapperName:
81+
| "notebook.workflow-wrapper.py"
82+
| "file.workflow-wrapper.py",
83+
originalFilePath: string
84+
) {
85+
return (
86+
await readFile(path.join(resourceDirPath, wrapperName), "utf-8")
87+
)
88+
.replace(
89+
"{{DATABRICKS_SOURCE_FILE}}",
90+
new RemoteUri(originalFilePath).workspacePrefixPath
91+
)
92+
.replace(
93+
"{{DATABRICKS_PROJECT_ROOT}}",
94+
new RemoteUri(testDirPath).workspacePrefixPath
95+
);
96+
}
97+
7998
describe("python files", () => {
8099
beforeEach(createMocks);
81100
it("should create wrapper for files", async () => {
@@ -100,11 +119,14 @@ describe(__filename, async () => {
100119
await new WorkspaceFsWorkflowWrapper(
101120
instance(mockConnectionManager),
102121
instance(mockExtensionContext)
103-
).createPythonFileWrapper(new RemoteUri(originalFilePath));
122+
).createPythonFileWrapper(
123+
new RemoteUri(originalFilePath),
124+
new RemoteUri(testDirPath)
125+
);
104126

105-
const wrapperData = await readFile(
106-
path.join(resourceDirPath, "file.workflow-wrapper.py"),
107-
"utf-8"
127+
const wrapperData = await getWrapperData(
128+
"file.workflow-wrapper.py",
129+
originalFilePath
108130
);
109131
verify(
110132
mockWorkspaceService.import(
@@ -165,23 +187,6 @@ describe(__filename, async () => {
165187
});
166188
});
167189

168-
async function getWrapperData() {
169-
return (
170-
await readFile(
171-
path.join(resourceDirPath, "notebook.workflow-wrapper.py"),
172-
"utf-8"
173-
)
174-
)
175-
.replace(
176-
"{{DATABRICKS_SOURCE_FILE}}",
177-
new RemoteUri(originalFilePath).workspacePrefixPath
178-
)
179-
.replace(
180-
"{{DATABRICKS_PROJECT_ROOT}}",
181-
new RemoteUri(testDirPath).workspacePrefixPath
182-
);
183-
}
184-
185190
it("should create wrapper for databricks python notebook", async () => {
186191
await withFile(async (localFilePath) => {
187192
const comment = ["# Databricks notebook source"];
@@ -202,7 +207,10 @@ describe(__filename, async () => {
202207
"PY_DBNB"
203208
);
204209

205-
const wrapperData = await getWrapperData();
210+
const wrapperData = await getWrapperData(
211+
"notebook.workflow-wrapper.py",
212+
originalFilePath
213+
);
206214
const expected = comment
207215
.concat(wrapperData.split(/\r?\n/))
208216
.concat(["# COMMAND ----------"])
@@ -244,9 +252,16 @@ describe(__filename, async () => {
244252
"PY_DBNB"
245253
);
246254

247-
const wrapperData = await getWrapperData();
255+
const wrapperData = await getWrapperData(
256+
"notebook.workflow-wrapper.py",
257+
originalFilePath
258+
);
248259
const expected = comment
249260
.concat([
261+
"import os",
262+
`os.chdir(os.path.dirname('${
263+
new RemoteUri(originalFilePath).workspacePrefixPath
264+
}'))`,
250265
"# MAGIC %pip install pandas",
251266
"# COMMAND ----------",
252267
"dbutils.library.restartPython()",
@@ -377,7 +392,7 @@ describe(__filename, async () => {
377392
},
378393
},
379394
outputs: [],
380-
execution_count: 0,
395+
execution_count: null,
381396
};
382397

383398
it("should create wrapper for databricks jupyter notebook", async () => {
@@ -448,7 +463,19 @@ describe(__filename, async () => {
448463
const expected = {
449464
...notebookMetadata,
450465
cells: [
451-
{...wrapperData, source: ["%pip install x"]},
466+
{
467+
...wrapperData,
468+
source: [
469+
[
470+
"import os",
471+
`os.chdir(os.path.dirname('${
472+
new RemoteUri(originalFilePath)
473+
.workspacePrefixPath
474+
}'))`,
475+
"%pip install x",
476+
].join("\n"),
477+
],
478+
},
452479
{
453480
...wrapperData,
454481
source: ["dbutils.library.restartPython()"],

0 commit comments

Comments
 (0)