diff --git a/.prettierrc.js b/.prettierrc.js index e584661edd..3e7a4c6ad5 100644 --- a/.prettierrc.js +++ b/.prettierrc.js @@ -11,6 +11,12 @@ module.exports = { tabWidth: 2 } }, + { + files: ['*.md'], + options: { + tabWidth: 2 + } + }, { files: ['**/datascience/serviceRegistry.ts'], options: { diff --git a/DEEPNOTE_KERNEL_IMPLEMENTATION.md b/DEEPNOTE_KERNEL_IMPLEMENTATION.md index 91caf6208a..ade1164c25 100644 --- a/DEEPNOTE_KERNEL_IMPLEMENTATION.md +++ b/DEEPNOTE_KERNEL_IMPLEMENTATION.md @@ -15,6 +15,7 @@ This implementation adds automatic kernel selection and startup for `.deepnote` ### Components Created #### 1. **Deepnote Kernel Types** (`src/kernels/deepnote/types.ts`) + - `DeepnoteKernelConnectionMetadata`: Connection metadata for Deepnote kernels (similar to RemoteKernelSpecConnectionMetadata) - `IDeepnoteToolkitInstaller`: Interface for toolkit installation service - `IDeepnoteServerStarter`: Interface for server management @@ -23,22 +24,25 @@ This implementation adds automatic kernel selection and startup for `.deepnote` - Constants for wheel URL, default port, and notebook type #### 2. **Deepnote Toolkit Installer** (`src/kernels/deepnote/deepnoteToolkitInstaller.node.ts`) + - Creates a dedicated virtual environment per `.deepnote` file +- **Upgrades pip** in the venv to the latest version (ensures latest pip features and fixes) - Checks if `deepnote-toolkit` is installed in the venv - Installs the toolkit and `ipykernel` from the hardcoded S3 wheel URL -- **Registers a kernel spec** using `ipykernel install --user --name deepnote-venv-` that points to the venv's Python interpreter -- This ensures packages installed via `pip` are available to the kernel +- Installs ipykernel in the venv to enable kernel functionality - Outputs installation progress to the output channel - Verifies successful installation - Reuses existing venvs for the same `.deepnote` file **Key Methods:** + - `getVenvInterpreter(deepnoteFileUri)`: Gets the venv Python interpreter for a specific file -- `ensureInstalled(interpreter, deepnoteFileUri)`: Creates venv, installs toolkit and ipykernel, and registers kernel spec -- `getVenvHash(deepnoteFileUri)`: Creates a unique hash for both kernel naming and venv directory paths +- `ensureInstalled(interpreter, deepnoteFileUri)`: Creates venv, installs toolkit and ipykernel +- `getVenvHash(deepnoteFileUri)`: Creates a unique hash for venv directory paths - `getDisplayName(deepnoteFileUri)`: Gets a friendly display name for the kernel #### 3. **Deepnote Server Starter** (`src/kernels/deepnote/deepnoteServerStarter.node.ts`) + - Manages the lifecycle of deepnote-toolkit Jupyter servers (one per `.deepnote` file) - Finds an available port (starting from 8888) - Starts the server with `python -m deepnote_toolkit server --jupyter-port ` @@ -52,29 +56,33 @@ This implementation adds automatic kernel selection and startup for `.deepnote` - Manages multiple servers for different `.deepnote` files simultaneously **Key Methods:** + - `getOrStartServer(interpreter, deepnoteFileUri)`: Returns server info for a file, starting if needed - `stopServer(deepnoteFileUri)`: Stops the running server for a specific file - `isServerRunning(serverInfo)`: Checks if server is responsive #### 4. **Deepnote Server Provider** (`src/kernels/deepnote/deepnoteServerProvider.node.ts`) + - Jupyter server provider that registers and resolves Deepnote toolkit servers - Implements `JupyterServerProvider` interface from VSCode Jupyter API - Maintains a map of server handles to server connection information - Allows the kernel infrastructure to resolve server connections **Key Methods:** + - `activate()`: Registers the server provider with the Jupyter server provider registry - `registerServer(handle, serverInfo)`: Registers a Deepnote server for a specific handle - `provideJupyterServers(token)`: Lists all registered Deepnote servers - `resolveJupyterServer(server, token)`: Resolves server connection info by handle #### 5. **Deepnote Kernel Auto-Selector** (`src/notebooks/deepnote/deepnoteKernelAutoSelector.node.ts`) + - Activation service that listens for notebook open events and controller selection changes - Automatically selects Deepnote kernel for `.deepnote` files - Queries the Deepnote server for available kernel specs -- **Prefers the venv kernel spec** (`deepnote-venv-`) that was registered by the installer and uses the venv's Python interpreter -- This ensures the kernel uses the same environment where packages are installed -- Falls back to other Python kernel specs if venv kernel not found +- **Selects a server-native Python kernel spec** (e.g., `python3-venv` or any available Python kernel) +- The Deepnote server is started with the venv's Python interpreter, ensuring the kernel uses the venv environment +- Environment variables (`PATH`, `VIRTUAL_ENV`) are configured so the server and kernel use the venv's Python - Registers the server with the server provider - Creates kernel connection metadata - Registers the controller with VSCode @@ -84,6 +92,7 @@ This implementation adds automatic kernel selection and startup for `.deepnote` - Tracks controllers per notebook file for efficient reuse **Key Methods:** + - `activate()`: Registers event listeners for notebook open/close and controller selection changes - `ensureKernelSelected(notebook)`: Main logic for auto-selection, kernel spec selection, and kernel reuse - `onDidOpenNotebook(notebook)`: Event handler for notebook opens @@ -91,91 +100,101 @@ This implementation adds automatic kernel selection and startup for `.deepnote` - `onDidCloseNotebook(notebook)`: Event handler for notebook closes (preserves controllers for reuse) #### 6. **Service Registry Updates** (`src/notebooks/serviceRegistry.node.ts`) + - Registers all new Deepnote kernel services - Binds `DeepnoteServerProvider` as an activation service - Binds `IDeepnoteKernelAutoSelector` as an activation service #### 7. **Kernel Types Updates** (`src/kernels/types.ts`) + - Adds `DeepnoteKernelConnectionMetadata` to `RemoteKernelConnectionMetadata` union type - Adds deserialization support for `'startUsingDeepnoteKernel'` kind ## Flow Diagram -``` +```text User opens .deepnote file - ↓ + ↓ DeepnoteKernelAutoSelector.onDidOpenNotebook() - ↓ + ↓ Check if kernel already selected → Yes → Exit - ↓ No + ↓ No Get active Python interpreter - ↓ + ↓ DeepnoteToolkitInstaller.ensureInstalled() - ↓ + ↓ Extract base file URI (remove query params) - ↓ + ↓ Check if venv exists for this file → Yes → Skip to server - ↓ No + ↓ No Create venv for this .deepnote file - ↓ + ↓ +Upgrade pip to latest version in venv + ↓ pip install deepnote-toolkit[server] and ipykernel in venv - ↓ -Register kernel spec pointing to venv's Python - ↓ + ↓ Verify installation - ↓ + ↓ DeepnoteServerStarter.getOrStartServer(venv, fileUri) - ↓ + ↓ Check if server running for this file → Yes → Return info - ↓ No + ↓ No Find available port - ↓ + ↓ Start: python -m deepnote_toolkit server --jupyter-port - ↓ + ↓ Wait for server to be ready (poll /api endpoint) - ↓ + ↓ Register server with DeepnoteServerProvider - ↓ + ↓ Query server for available kernel specs - ↓ -Select venv kernel spec (deepnote-venv-) or fall back to other Python kernel - ↓ + ↓ +Select any available Python kernel spec (e.g., python3-venv) + ↓ Create DeepnoteKernelConnectionMetadata with server kernel spec - ↓ + ↓ Register controller with IControllerRegistration - ↓ + ↓ Set controller affinity to Preferred (auto-selects kernel) - ↓ + ↓ User runs cell → Executes on Deepnote kernel ``` ## Configuration ### Hardcoded Values (as requested) + - **Wheel URL**: `https://deepnote-staging-runtime-artifactory.s3.amazonaws.com/deepnote-toolkit-packages/0.2.30.post20/deepnote_toolkit-0.2.30.post20-py3-none-any.whl` - **Default Port**: `8888` (will find next available if occupied) - **Notebook Type**: `deepnote` - **Venv Location**: `~/.vscode/extensions/storage/deepnote-venvs//` (e.g., `venv_a1b2c3d4`) - **Server Provider ID**: `deepnote-server` -- **Kernel Spec Name**: `deepnote-venv-` (registered via ipykernel to point to venv Python) +- **Kernel Spec**: Uses server-native Python kernel specs (e.g., `python3-venv`) - **Kernel Display Name**: `Deepnote ()` ## Usage 1. **Open a .deepnote file** in VSCode - - A temporary "Loading Deepnote Kernel..." controller is automatically selected - - A progress notification appears in the bottom-right + +- A temporary "Loading Deepnote Kernel..." controller is automatically selected +- A progress notification appears in the bottom-right + 2. **You can immediately click "Run All" or run individual cells** - - Cells will wait for the kernel to be ready before executing - - No kernel selection dialog will appear + +- Cells will wait for the kernel to be ready before executing +- No kernel selection dialog will appear + 3. **Once the progress notification shows "Kernel ready!"**: - - The loading controller is automatically replaced with the real Deepnote kernel - - Your cells start executing + +- The loading controller is automatically replaced with the real Deepnote kernel +- Your cells start executing + 4. The extension automatically: - - Installs deepnote-toolkit in a dedicated virtual environment (first time only) - - Starts a Deepnote server on an available port (if not already running) - - Selects the appropriate Deepnote kernel - - Executes your cells + +- Installs deepnote-toolkit in a dedicated virtual environment (first time only) +- Starts a Deepnote server on an available port (if not already running) +- Selects the appropriate Deepnote kernel +- Executes your cells **First-time setup** takes 15-30 seconds. **Subsequent opens** of the same file reuse the existing environment and server, taking less than 1 second. @@ -200,20 +219,23 @@ User runs cell → Executes on Deepnote kernel To provide a cleaner experience for Deepnote notebooks, the following UI elements are hidden when working with `.deepnote` files: 1. **Notebook Toolbar Buttons:** - - Restart Kernel - - Variable View - - Outline View - - Export - - Codespace Integration + +- Restart Kernel +- Variable View +- Outline View +- Export +- Codespace Integration 2. **Cell Title Menu Items:** - - Run by Line - - Run by Line Next/Stop - - Select Precedent/Dependent Cells + +- Run by Line +- Run by Line Next/Stop +- Select Precedent/Dependent Cells 3. **Cell Execute Menu Items:** - - Run and Debug Cell - - Run Precedent/Dependent Cells + +- Run and Debug Cell +- Run Precedent/Dependent Cells These items are hidden by adding `notebookType != 'deepnote'` conditions to the `when` clauses in `package.json`. The standard cell run buttons (play icons) remain visible as they are the primary way to execute cells. @@ -253,14 +275,16 @@ To test the implementation: 2. Add Python code cells 3. Run a cell 4. Verify: - - Toolkit gets installed (check output channel) - - Server starts (check output channel) - - Kernel is auto-selected (check kernel picker) - - Code executes successfully + +- Toolkit gets installed (check output channel) +- Server starts (check output channel) +- Kernel is auto-selected (check kernel picker) +- Code executes successfully ## Files Modified/Created ### Created: + - `src/kernels/deepnote/types.ts` - Type definitions and interfaces - `src/kernels/deepnote/deepnoteToolkitInstaller.node.ts` - Toolkit installation service - `src/kernels/deepnote/deepnoteServerStarter.node.ts` - Server lifecycle management @@ -268,6 +292,7 @@ To test the implementation: - `src/notebooks/deepnote/deepnoteKernelAutoSelector.node.ts` - Automatic kernel selection ### Modified: + - `src/kernels/types.ts` - Added DeepnoteKernelConnectionMetadata to union types - `src/notebooks/serviceRegistry.node.ts` - Registered new services @@ -294,13 +319,15 @@ The implementation uses VSCode's Jupyter server provider API to properly integra ### Kernel Spec Resolution -The implementation uses a hybrid approach: +The implementation uses server-native kernel specs with venv isolation: -1. **Registers per-venv kernel specs**: The installer registers kernel specs using `ipykernel install --user --name deepnote-venv-` that point to each venv's Python interpreter -2. **Queries server for available specs**: Connects to the running Deepnote server using `JupyterLabHelper` and queries available kernel specs via `getKernelSpecs()` -3. **Prefers venv kernel specs**: Looks for the registered venv kernel spec (`deepnote-venv-`) first -4. **Falls back gracefully**: Falls back to other Python kernel specs (like `python3-venv`) if the venv kernel spec is not found -5. **Uses server-compatible specs**: This ensures compatibility with the Deepnote server's kernel configuration while maintaining venv isolation +1. **Queries server for available specs**: Connects to the running Deepnote server using `JupyterLabHelper` and queries available kernel specs via `getKernelSpecs()` +2. **Selects any Python kernel spec**: Selects any available Python kernel spec (e.g., `python3-venv`, `python3`, or the first available kernel) +3. **Venv isolation via server environment**: The Deepnote server is started with the venv's Python interpreter and environment variables configured: + - `PATH` is prepended with the venv's `bin/` directory + - `VIRTUAL_ENV` points to the venv path + - `PYTHONHOME` is removed to avoid conflicts +4. **Result**: The kernel uses the venv's Python environment even though it's using a server-native kernel spec, ensuring packages installed via `pip` are available ### Virtual Environment Path Handling @@ -310,10 +337,11 @@ The implementation uses a robust hashing approach for virtual environment direct 2. **Hash Algorithm**: Implements a djb2-style hash function for better distribution 3. **Format**: Generates identifiers like `venv_a1b2c3d4` (max 16 characters) 4. **Benefits**: - - Avoids Windows MAX_PATH (260 character) limitations - - Prevents directory structure leakage into extension storage - - Provides consistent naming for both venv directories and kernel specs - - Reduces collision risk with better hash distribution + +- Avoids Windows MAX_PATH (260 character) limitations +- Prevents directory structure leakage into extension storage +- Provides consistent naming for both venv directories and kernel specs +- Reduces collision risk with better hash distribution ## Troubleshooting & Key Fixes @@ -328,6 +356,7 @@ The implementation uses a robust hashing approach for virtual environment direct **Problem**: The extension was creating a custom kernel spec name based on the interpreter hash, but this kernel spec didn't exist on the Deepnote server. **Solution**: Instead of creating a custom kernel spec, the implementation now: + - Queries the Deepnote server for available kernel specs - Selects an existing Python kernel (typically `python3-venv`) - Uses this server-native kernel spec for the connection @@ -337,6 +366,7 @@ The implementation uses a robust hashing approach for virtual environment direct **Problem**: When a cell execution resulted in an error, the kernel controller would sometimes become unselected or disposed. Subsequent attempts to run cells would fail because no kernel was selected, requiring manual intervention. **Solution**: Implemented persistent kernel tracking and automatic reselection: + - Controllers and connection metadata are stored per notebook file and reused across sessions - Listens to `onControllerSelectionChanged` events to detect when a Deepnote kernel becomes unselected - Automatically reselects the same kernel controller when it becomes deselected @@ -346,6 +376,7 @@ The implementation uses a robust hashing approach for virtual environment direct ### Issue 4: "Controllers getting disposed causing repeated recreation" **Problem**: Controllers were being automatically disposed by VSCode's `ControllerRegistration` system when: + 1. The kernel finder refreshed its list of available kernels 2. The Deepnote kernel wasn't in that list (because it's created dynamically) 3. The `loadControllers()` method would dispose controllers that weren't in the kernel finder's list @@ -354,6 +385,7 @@ The implementation uses a robust hashing approach for virtual environment direct **Root Cause**: The `ControllerRegistration.loadControllers()` method periodically checks if controllers are still valid by comparing them against the kernel finder's list. Controllers that aren't found and aren't "protected" get disposed. Deepnote controllers weren't protected, so they were being disposed and recreated repeatedly. **Solution**: Mark Deepnote controllers as protected using `trackActiveInterpreterControllers()`: + - Call `controllerRegistration.trackActiveInterpreterControllers(controllers)` when creating Deepnote controllers - This adds them to the `_activeInterpreterControllerIds` set, which prevents disposal in `canControllerBeDisposed()` - Controllers are now created **once** and persist for the entire session @@ -361,31 +393,49 @@ The implementation uses a robust hashing approach for virtual environment direct - The same controller instance handles all cell executions, even after errors These changes ensure that Deepnote notebooks can execute cells reliably by: + 1. Providing a valid server provider that can be resolved -2. Using kernel specs that actually exist on the Deepnote server +2. Using kernel specs that actually exist on the Deepnote server 3. Maintaining persistent kernel sessions that survive errors and can be reused indefinitely 4. **Preventing controller disposal entirely** - controllers are created once and reused forever ### Issue 5: "Packages installed via pip not available in kernel" **Problem**: When users ran `!pip install matplotlib`, the package was installed successfully, but when they tried to import it, they got `ModuleNotFoundError`. This happened because: + 1. The Jupyter server was running in the venv 2. But the kernel was using a different Python interpreter (system Python or different environment) 3. So `pip install` went to one environment, but imports came from another -**Root Cause**: The kernel was using the venv's Python (correct), but shell commands (`!pip install`) were using the system Python or pyenv (wrong) because they inherit the shell's PATH environment variable. - -**Solution**: Two-part fix: -1. **Kernel spec registration** (ensures kernel uses venv Python): - - Install `ipykernel` in the venv along with deepnote-toolkit - - Use `python -m ipykernel install --user --name deepnote-venv-` to register a kernel spec that points to the venv's Python interpreter - - In the kernel selection logic, prefer the venv kernel spec (`deepnote-venv-`) when querying the server for available specs - -2. **Environment variable configuration** (ensures shell commands use venv Python): - - When starting the Jupyter server, set environment variables: - - Prepend venv's `bin/` directory to `PATH` - - Set `VIRTUAL_ENV` to point to the venv - - Remove `PYTHONHOME` (can interfere with venv) - - This ensures `!pip install` and other shell commands use the venv's Python - -**Result**: Both the kernel and shell commands now use the same Python environment (the venv), so packages installed via `!pip install` or `%pip install` are immediately available for import. +**Root Cause**: Both the kernel and shell commands (`!pip install`) need to use the venv's Python interpreter, but by default they would use whatever Python is in the system PATH. + +**Solution**: Configure the Deepnote server's environment to use the venv: + +- When starting the Jupyter server, set environment variables: + - Prepend venv's `bin/` directory to `PATH` + - Set `VIRTUAL_ENV` to point to the venv + - Remove `PYTHONHOME` (can interfere with venv) +- Install `ipykernel` in the venv along with deepnote-toolkit +- Use any server-native Python kernel spec (e.g., `python3-venv`) +- The kernel inherits the server's environment, so it uses the venv's Python +- Shell commands (`!pip install`) also inherit the server's environment, so they use the venv's Python + +**Result**: Both the kernel and shell commands use the same Python environment (the venv), so packages installed via `!pip install` or `%pip install` are immediately available for import. + +### Issue 6: "Venv created with outdated pip version" + +**Problem**: When creating a virtual environment using `python -m venv`, Python's venv module bundles its own version of pip (e.g., 25.0.1) rather than copying the pip from the host system (e.g., 25.2). This could lead to: + +1. Missing features or bug fixes available in newer pip versions +2. Installation failures for packages that require newer pip features +3. Inconsistent behavior between host and venv environments + +**Root Cause**: The `venv` module includes a bundled version of pip that may be older than the pip installed on the host system. This bundled version is used when creating new virtual environments. + +**Solution**: Explicitly upgrade pip in the venv after creation: + +- After creating the venv and verifying the interpreter exists, run `python -m pip install --upgrade pip` +- This ensures the venv uses the latest available pip version from PyPI +- The upgrade happens before installing deepnote-toolkit, ensuring all package installations use the latest pip + +**Result**: The venv now uses the latest pip version (e.g., 25.2), ensuring compatibility with modern package installations and access to the latest pip features and bug fixes. diff --git a/src/kernels/deepnote/deepnoteToolkitInstaller.node.ts b/src/kernels/deepnote/deepnoteToolkitInstaller.node.ts index f893947a22..7aa5c19177 100644 --- a/src/kernels/deepnote/deepnoteToolkitInstaller.node.ts +++ b/src/kernels/deepnote/deepnoteToolkitInstaller.node.ts @@ -158,12 +158,31 @@ export class DeepnoteToolkitInstaller implements IDeepnoteToolkitInstaller { return undefined; } + // Use undefined as resource to get full system environment (including git in PATH) + const venvProcessService = await this.processServiceFactory.create(undefined); + + // Upgrade pip in the venv to the latest version + logger.info('Upgrading pip in venv to latest version...'); + this.outputChannel.appendLine('Upgrading pip...'); + const pipUpgradeResult = await venvProcessService.exec( + venvInterpreter.uri.fsPath, + ['-m', 'pip', 'install', '--upgrade', 'pip'], + { throwOnStdErr: false } + ); + + if (pipUpgradeResult.stdout) { + logger.info(`pip upgrade output: ${pipUpgradeResult.stdout}`); + } + if (pipUpgradeResult.stderr) { + logger.info(`pip upgrade stderr: ${pipUpgradeResult.stderr}`); + } + + Cancellation.throwIfCanceled(token); + // Install deepnote-toolkit and ipykernel in venv logger.info(`Installing deepnote-toolkit and ipykernel in venv from ${DEEPNOTE_TOOLKIT_WHEEL_URL}`); this.outputChannel.appendLine('Installing deepnote-toolkit and ipykernel...'); - // Use undefined as resource to get full system environment (including git in PATH) - const venvProcessService = await this.processServiceFactory.create(undefined); const installResult = await venvProcessService.exec( venvInterpreter.uri.fsPath, [ @@ -174,7 +193,7 @@ export class DeepnoteToolkitInstaller implements IDeepnoteToolkitInstaller { `deepnote-toolkit[server] @ ${DEEPNOTE_TOOLKIT_WHEEL_URL}`, 'ipykernel' ], - { throwOnStdErr: true } + { throwOnStdErr: false } ); Cancellation.throwIfCanceled(token);