From c147723555bee61cf8c1dd9b869392beef73c2ae Mon Sep 17 00:00:00 2001 From: jankuca Date: Sun, 5 Oct 2025 16:42:17 +0200 Subject: [PATCH 1/6] fix: auto-kill orphaned processes --- .../deepnote/deepnoteServerStarter.node.ts | 148 +++++++++++++++++- src/kernels/deepnote/types.ts | 8 +- 2 files changed, 149 insertions(+), 7 deletions(-) diff --git a/src/kernels/deepnote/deepnoteServerStarter.node.ts b/src/kernels/deepnote/deepnoteServerStarter.node.ts index cf4ee4e34f..a3d569997c 100644 --- a/src/kernels/deepnote/deepnoteServerStarter.node.ts +++ b/src/kernels/deepnote/deepnoteServerStarter.node.ts @@ -7,7 +7,7 @@ import { PythonEnvironment } from '../../platform/pythonEnvironments/info'; import { IDeepnoteServerStarter, IDeepnoteToolkitInstaller, DeepnoteServerInfo, DEEPNOTE_DEFAULT_PORT } from './types'; import { IProcessServiceFactory, ObservableExecutionResult } from '../../platform/common/process/types.node'; import { logger } from '../../platform/logging'; -import { IOutputChannel, IDisposable, IHttpClient } from '../../platform/common/types'; +import { IOutputChannel, IDisposable, IHttpClient, IAsyncDisposableRegistry } from '../../platform/common/types'; import { STANDARD_OUTPUT_CHANNEL } from '../../platform/common/constants'; import { sleep } from '../../platform/common/utils/async'; import { Cancellation, raceCancellationError } from '../../platform/common/cancellation'; @@ -28,8 +28,17 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { @inject(IProcessServiceFactory) private readonly processServiceFactory: IProcessServiceFactory, @inject(IDeepnoteToolkitInstaller) private readonly toolkitInstaller: IDeepnoteToolkitInstaller, @inject(IOutputChannel) @named(STANDARD_OUTPUT_CHANNEL) private readonly outputChannel: IOutputChannel, - @inject(IHttpClient) private readonly httpClient: IHttpClient - ) {} + @inject(IHttpClient) private readonly httpClient: IHttpClient, + @inject(IAsyncDisposableRegistry) asyncRegistry: IAsyncDisposableRegistry + ) { + // Register for disposal when the extension deactivates + asyncRegistry.push(this); + + // Clean up any orphaned deepnote-toolkit processes from previous sessions + this.cleanupOrphanedProcesses().catch((ex) => { + logger.warn(`Failed to cleanup orphaned processes: ${ex}`); + }); + } public async getOrStartServer( interpreter: PythonEnvironment, @@ -247,19 +256,56 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { } } - public dispose(): void { + public async dispose(): Promise { logger.info('Disposing DeepnoteServerStarter - stopping all servers...'); - // Stop all server processes + // Wait for any pending operations to complete (with timeout) + const pendingOps = Array.from(this.pendingOperations.values()); + if (pendingOps.length > 0) { + logger.info(`Waiting for ${pendingOps.length} pending operations to complete...`); + await Promise.allSettled(pendingOps.map((op) => Promise.race([op, sleep(2000)]))); + } + + // Stop all server processes and wait for them to exit + const killPromises: Promise[] = []; for (const [fileKey, serverProcess] of this.serverProcesses.entries()) { try { logger.info(`Stopping Deepnote server for ${fileKey}...`); - serverProcess.proc?.kill(); + const proc = serverProcess.proc; + if (proc && !proc.killed) { + // Create a promise that resolves when the process exits + const exitPromise = new Promise((resolve) => { + const timeout = setTimeout(() => { + logger.warn(`Process for ${fileKey} did not exit gracefully, force killing...`); + try { + proc.kill('SIGKILL'); + } catch { + // Ignore errors on force kill + } + resolve(); + }, 3000); // Wait up to 3 seconds for graceful exit + + proc.once('exit', () => { + clearTimeout(timeout); + resolve(); + }); + }); + + // Send SIGTERM for graceful shutdown + proc.kill('SIGTERM'); + killPromises.push(exitPromise); + } } catch (ex) { logger.error(`Error stopping Deepnote server for ${fileKey}: ${ex}`); } } + // Wait for all processes to exit + if (killPromises.length > 0) { + logger.info(`Waiting for ${killPromises.length} server processes to exit...`); + await Promise.allSettled(killPromises); + } + // Dispose all tracked disposables for (const [fileKey, disposables] of this.disposablesByFile.entries()) { try { @@ -277,4 +323,94 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { logger.info('DeepnoteServerStarter disposed successfully'); } + + /** + * Cleans up any orphaned deepnote-toolkit processes from previous VS Code sessions. + * This prevents port conflicts when starting new servers. + */ + private async cleanupOrphanedProcesses(): Promise { + try { + logger.info('Checking for orphaned deepnote-toolkit processes...'); + const processService = await this.processServiceFactory.create(undefined); + + // Find all deepnote-toolkit server processes + let command: string; + let args: string[]; + + if (process.platform === 'win32') { + // Windows: use tasklist and findstr + command = 'tasklist'; + args = ['/FI', 'IMAGENAME eq python.exe', '/FO', 'CSV', '/NH']; + } else { + // Unix-like: use ps and grep + command = 'ps'; + args = ['aux']; + } + + const result = await processService.exec(command, args, { throwOnStdErr: false }); + + if (result.stdout) { + const lines = result.stdout.split('\n'); + const pidsToKill: number[] = []; + + for (const line of lines) { + // Look for processes running deepnote_toolkit server + if (line.includes('deepnote_toolkit') && line.includes('server')) { + // Extract PID based on platform + let pid: number | undefined; + + if (process.platform === 'win32') { + // Windows CSV format: "python.exe","12345",... + const match = line.match(/"python\.exe","(\d+)"/); + if (match) { + pid = parseInt(match[1], 10); + } + } else { + // Unix format: user PID ... + const parts = line.trim().split(/\s+/); + if (parts.length > 1) { + pid = parseInt(parts[1], 10); + } + } + + if (pid && !isNaN(pid)) { + pidsToKill.push(pid); + } + } + } + + if (pidsToKill.length > 0) { + logger.info( + `Found ${pidsToKill.length} orphaned deepnote-toolkit processes: ${pidsToKill.join(', ')}` + ); + this.outputChannel.appendLine( + `Cleaning up ${pidsToKill.length} orphaned deepnote-toolkit process(es) from previous session...` + ); + + // Kill each process + for (const pid of pidsToKill) { + try { + if (process.platform === 'win32') { + await processService.exec('taskkill', ['/F', '/T', '/PID', pid.toString()], { + throwOnStdErr: false + }); + } else { + await processService.exec('kill', ['-9', pid.toString()], { throwOnStdErr: false }); + } + logger.info(`Killed orphaned process ${pid}`); + } catch (ex) { + logger.warn(`Failed to kill process ${pid}: ${ex}`); + } + } + + this.outputChannel.appendLine('✓ Cleanup complete'); + } else { + logger.info('No orphaned deepnote-toolkit processes found'); + } + } + } catch (ex) { + // Don't fail startup if cleanup fails + logger.warn(`Error during orphaned process cleanup: ${ex}`); + } + } } diff --git a/src/kernels/deepnote/types.ts b/src/kernels/deepnote/types.ts index 2d74fdad57..3114669b90 100644 --- a/src/kernels/deepnote/types.ts +++ b/src/kernels/deepnote/types.ts @@ -88,7 +88,7 @@ export interface IDeepnoteToolkitInstaller { } export const IDeepnoteServerStarter = Symbol('IDeepnoteServerStarter'); -export interface IDeepnoteServerStarter extends vscode.Disposable { +export interface IDeepnoteServerStarter { /** * Starts or gets an existing deepnote-toolkit Jupyter server. * @param interpreter The Python interpreter to use @@ -107,6 +107,12 @@ export interface IDeepnoteServerStarter extends vscode.Disposable { * @param deepnoteFileUri The URI of the .deepnote file */ stopServer(deepnoteFileUri: vscode.Uri): Promise; + + /** + * Disposes all server processes and resources. + * Called when the extension is deactivated. + */ + dispose(): Promise; } export interface DeepnoteServerInfo { From 24b38909fd3461cd3dbc42992ce0e97fa59ad2a9 Mon Sep 17 00:00:00 2001 From: jankuca Date: Sun, 5 Oct 2025 17:05:41 +0200 Subject: [PATCH 2/6] feat: implement session-based orphan process cleanup with lock files Previously, cleanupOrphanedProcesses() would force-kill every process matching 'deepnote_toolkit server', which could terminate active servers from other VS Code windows. This commit implements a sophisticated cleanup mechanism that only kills genuine orphans by: 1. Session Management: - Each VS Code window gets a unique session ID (UUID) - Lock files stored in OS temp dir track server ownership - Lock files contain: sessionId, pid, timestamp 2. Lock File Lifecycle: - Created when server starts successfully - Deleted when server stops normally - Deleted when orphaned process is killed - Cleaned up during extension disposal 3. Orphan Detection: - Verifies parent process exists before killing - Unix: uses 'ps -o ppid=' and checks if PPID is 1 or parent missing - Windows: uses 'wmic' to get parent PID and verifies existence - Only kills if process is truly orphaned (no parent or parent is init) 4. Smart Cleanup Logic: - Checks lock file for each candidate process - If lock file exists with different session ID: * Verifies process is orphaned before killing * Skips if parent exists (active in another window) - If no lock file exists: * Still verifies orphan status (backward compatibility) * Skips if parent exists - Comprehensive logging of all decisions Benefits: - Multi-window safe: won't kill servers from other VS Code windows - Robust orphan detection using OS-level parent process verification - Full audit trail with detailed logging - Automatic cleanup of stale lock files - Backward compatible with processes from older versions Added documentation in ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md --- ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md | 161 ++++++++++ .../deepnote/deepnoteServerStarter.node.ts | 274 ++++++++++++++++-- 2 files changed, 415 insertions(+), 20 deletions(-) create mode 100644 ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md diff --git a/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md b/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md new file mode 100644 index 0000000000..329c54ba85 --- /dev/null +++ b/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md @@ -0,0 +1,161 @@ +# Orphan Process Cleanup Implementation + +## Overview +This document describes the implementation of a sophisticated orphan process cleanup mechanism for the Deepnote server starter that prevents terminating active servers from other VS Code windows. + +## Problem Statement +Previously, the cleanup logic in `cleanupOrphanedProcesses()` would force-kill **every** process matching "deepnote_toolkit server", which could terminate active servers from other VS Code windows, causing disruption to users working in multiple windows. + +## Solution +The new implementation uses a lock file system combined with parent process verification to only kill genuine orphan processes. + +## Key Components + +### 1. Session Management +- **Session ID**: Each VS Code window instance generates a unique session ID using `generateUuid()` +- **Lock File Directory**: Lock files are stored in `${os.tmpdir()}/vscode-deepnote-locks/` +- **Lock File Format**: JSON files named `server-{pid}.json` containing: + ```typescript + interface ServerLockFile { + sessionId: string; // Unique ID for the VS Code window + pid: number; // Process ID of the server + timestamp: number; // When the server was started + } + ``` + +### 2. Lock File Lifecycle + +#### Creation +- When a server starts successfully, a lock file is written with the server's PID and current session ID +- Location: `writeLockFile()` called in `startServerImpl()` after the server process is spawned + +#### Deletion +- Lock files are deleted when: + 1. The server is explicitly stopped via `stopServerImpl()` + 2. The extension is disposed and all servers are shut down + 3. An orphaned process is successfully killed during cleanup + +### 3. Orphan Detection Logic + +The `isProcessOrphaned()` method checks if a process is truly orphaned by verifying its parent process: + +#### Unix/Linux/macOS +```bash +# Get parent process ID +ps -o ppid= -p + +# Check if parent exists +ps -p +``` +- If PPID is 1 (init/systemd), the process is orphaned +- If parent process doesn't exist, the process is orphaned + +#### Windows +```cmd +# Get parent process ID +wmic process where ProcessId= get ParentProcessId + +# Check if parent exists +tasklist /FI "PID eq " /FO CSV /NH +``` +- If parent process doesn't exist or PPID is 0, the process is orphaned + +### 4. Cleanup Decision Flow + +When `cleanupOrphanedProcesses()` runs (at extension startup): + +1. **Find all deepnote_toolkit server processes** + - Use `ps aux` (Unix) or `tasklist` (Windows) + - Extract PIDs of matching processes + +2. **For each candidate PID:** + + a. **Check for lock file** + - If lock file exists: + - If session ID matches current session → **SKIP** (shouldn't happen at startup) + - If session ID differs: + - Check if process is orphaned + - If orphaned → **KILL** + - If not orphaned → **SKIP** (active in another window) + + - If no lock file exists: + - Check if process is orphaned + - If orphaned → **KILL** + - If not orphaned → **SKIP** (might be from older version without lock files) + +3. **Kill orphaned processes** + - Use `kill -9` (Unix) or `taskkill /F /T` (Windows) + - Delete lock file after successful kill + +4. **Log all decisions** + - Processes to kill: logged with reason + - Processes to skip: logged with reason + - Provides full audit trail for debugging + +## Code Changes + +### Modified Files +- `src/kernels/deepnote/deepnoteServerStarter.node.ts` + +### New Imports +```typescript +import * as fs from 'fs-extra'; +import * as os from 'os'; +import * as path from '../../platform/vscode-path/path'; +import { generateUuid } from '../../platform/common/uuid'; +``` + +### New Class Members +```typescript +private readonly sessionId: string = generateUuid(); +private readonly lockFileDir: string = path.join(os.tmpdir(), 'vscode-deepnote-locks'); +``` + +### New Methods +1. `initializeLockFileDirectory()` - Creates lock file directory +2. `getLockFilePath(pid)` - Returns path to lock file for a PID +3. `writeLockFile(pid)` - Creates lock file for a server process +4. `readLockFile(pid)` - Reads lock file data +5. `deleteLockFile(pid)` - Removes lock file +6. `isProcessOrphaned(pid)` - Checks if process is orphaned by verifying parent + +### Modified Methods +1. `constructor()` - Initializes lock file directory +2. `startServerImpl()` - Writes lock file after server starts +3. `stopServerImpl()` - Deletes lock file when server stops +4. `dispose()` - Deletes lock files for all stopped servers +5. `cleanupOrphanedProcesses()` - Implements sophisticated orphan detection + +## Benefits + +1. **Multi-Window Safety**: Active servers in other VS Code windows are never killed +2. **Backward Compatible**: Handles processes from older versions without lock files +3. **Robust Orphan Detection**: Uses OS-level parent process verification +4. **Full Audit Trail**: Comprehensive logging of all cleanup decisions +5. **Automatic Cleanup**: Stale lock files are removed when processes are killed +6. **Session Isolation**: Each VS Code window operates independently + +## Testing Recommendations + +1. **Single Window**: Verify servers start and stop correctly +2. **Multiple Windows**: Open multiple VS Code windows with Deepnote files, verify servers in other windows aren't killed +3. **Orphan Cleanup**: Kill VS Code process forcefully, restart, verify orphaned servers are cleaned up +4. **Lock File Cleanup**: Verify lock files are created and deleted appropriately +5. **Cross-Platform**: Test on Windows, macOS, and Linux + +## Edge Cases Handled + +1. **No lock file + active parent**: Process is skipped (might be from older version) +2. **Lock file + different session + active parent**: Process is skipped (active in another window) +3. **Lock file + same session**: Process is skipped (shouldn't happen at startup) +4. **No lock file + orphaned**: Process is killed (genuine orphan) +5. **Lock file + different session + orphaned**: Process is killed (orphaned from crashed window) +6. **Failed parent check**: Process is assumed not orphaned (safer default) + +## Future Enhancements + +1. **Stale Lock File Cleanup**: Periodically clean up lock files for non-existent processes +2. **Lock File Expiry**: Add TTL to lock files to handle edge cases +3. **Health Monitoring**: Periodic checks to ensure servers are still responsive +4. **Graceful Shutdown**: Try SIGTERM before SIGKILL for orphaned processes + diff --git a/src/kernels/deepnote/deepnoteServerStarter.node.ts b/src/kernels/deepnote/deepnoteServerStarter.node.ts index a3d569997c..084e21a786 100644 --- a/src/kernels/deepnote/deepnoteServerStarter.node.ts +++ b/src/kernels/deepnote/deepnoteServerStarter.node.ts @@ -12,6 +12,19 @@ import { STANDARD_OUTPUT_CHANNEL } from '../../platform/common/constants'; import { sleep } from '../../platform/common/utils/async'; import { Cancellation, raceCancellationError } from '../../platform/common/cancellation'; import getPort from 'get-port'; +import * as fs from 'fs-extra'; +import * as os from 'os'; +import * as path from '../../platform/vscode-path/path'; +import { generateUuid } from '../../platform/common/uuid'; + +/** + * Lock file data structure for tracking server ownership + */ +interface ServerLockFile { + sessionId: string; + pid: number; + timestamp: number; +} /** * Starts and manages the deepnote-toolkit Jupyter server. @@ -23,6 +36,10 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { private readonly disposablesByFile: Map = new Map(); // Track in-flight operations per file to prevent concurrent start/stop private readonly pendingOperations: Map> = new Map(); + // Unique session ID for this VS Code window instance + private readonly sessionId: string = generateUuid(); + // Directory for lock files + private readonly lockFileDir: string = path.join(os.tmpdir(), 'vscode-deepnote-locks'); constructor( @inject(IProcessServiceFactory) private readonly processServiceFactory: IProcessServiceFactory, @@ -34,6 +51,11 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { // Register for disposal when the extension deactivates asyncRegistry.push(this); + // Ensure lock file directory exists + this.initializeLockFileDirectory().catch((ex) => { + logger.warn(`Failed to initialize lock file directory: ${ex}`); + }); + // Clean up any orphaned deepnote-toolkit processes from previous sessions this.cleanupOrphanedProcesses().catch((ex) => { logger.warn(`Failed to cleanup orphaned processes: ${ex}`); @@ -161,6 +183,14 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { const serverInfo = { url, port }; this.serverInfos.set(fileKey, serverInfo); + // Write lock file for the server process + const serverPid = serverProcess.proc?.pid; + if (serverPid) { + await this.writeLockFile(serverPid); + } else { + logger.warn(`Could not get PID for server process for ${fileKey}`); + } + try { const serverReady = await this.waitForServer(serverInfo, 120000, token); if (!serverReady) { @@ -212,12 +242,19 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { const serverProcess = this.serverProcesses.get(fileKey); if (serverProcess) { + const serverPid = serverProcess.proc?.pid; + try { logger.info(`Stopping Deepnote server for ${fileKey}...`); serverProcess.proc?.kill(); this.serverProcesses.delete(fileKey); this.serverInfos.delete(fileKey); this.outputChannel.appendLine(`Deepnote server stopped for ${fileKey}`); + + // Clean up lock file after stopping the server + if (serverPid) { + await this.deleteLockFile(serverPid); + } } catch (ex) { logger.error(`Error stopping Deepnote server: ${ex}`); } @@ -268,11 +305,18 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { // Stop all server processes and wait for them to exit const killPromises: Promise[] = []; + const pidsToCleanup: number[] = []; + for (const [fileKey, serverProcess] of this.serverProcesses.entries()) { try { logger.info(`Stopping Deepnote server for ${fileKey}...`); const proc = serverProcess.proc; if (proc && !proc.killed) { + const serverPid = proc.pid; + if (serverPid) { + pidsToCleanup.push(serverPid); + } + // Create a promise that resolves when the process exits const exitPromise = new Promise((resolve) => { const timeout = setTimeout(() => { @@ -306,6 +350,11 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { await Promise.allSettled(killPromises); } + // Clean up lock files for all stopped processes + for (const pid of pidsToCleanup) { + await this.deleteLockFile(pid); + } + // Dispose all tracked disposables for (const [fileKey, disposables] of this.disposablesByFile.entries()) { try { @@ -324,6 +373,135 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { logger.info('DeepnoteServerStarter disposed successfully'); } + /** + * Initialize the lock file directory + */ + private async initializeLockFileDirectory(): Promise { + try { + await fs.ensureDir(this.lockFileDir); + logger.info(`Lock file directory initialized at ${this.lockFileDir} with session ID ${this.sessionId}`); + } catch (ex) { + logger.error(`Failed to create lock file directory: ${ex}`); + } + } + + /** + * Get the lock file path for a given PID + */ + private getLockFilePath(pid: number): string { + return path.join(this.lockFileDir, `server-${pid}.json`); + } + + /** + * Write a lock file for a server process + */ + private async writeLockFile(pid: number): Promise { + try { + const lockData: ServerLockFile = { + sessionId: this.sessionId, + pid, + timestamp: Date.now() + }; + const lockFilePath = this.getLockFilePath(pid); + await fs.writeJson(lockFilePath, lockData, { spaces: 2 }); + logger.info(`Created lock file for PID ${pid} with session ID ${this.sessionId}`); + } catch (ex) { + logger.warn(`Failed to write lock file for PID ${pid}: ${ex}`); + } + } + + /** + * Read a lock file for a given PID + */ + private async readLockFile(pid: number): Promise { + try { + const lockFilePath = this.getLockFilePath(pid); + if (await fs.pathExists(lockFilePath)) { + return await fs.readJson(lockFilePath); + } + } catch (ex) { + logger.warn(`Failed to read lock file for PID ${pid}: ${ex}`); + } + return null; + } + + /** + * Delete a lock file for a given PID + */ + private async deleteLockFile(pid: number): Promise { + try { + const lockFilePath = this.getLockFilePath(pid); + if (await fs.pathExists(lockFilePath)) { + await fs.remove(lockFilePath); + logger.info(`Deleted lock file for PID ${pid}`); + } + } catch (ex) { + logger.warn(`Failed to delete lock file for PID ${pid}: ${ex}`); + } + } + + /** + * Check if a process is orphaned by verifying its parent process + */ + private async isProcessOrphaned(pid: number): Promise { + try { + const processService = await this.processServiceFactory.create(undefined); + + if (process.platform === 'win32') { + // Windows: use WMIC to get parent process ID + const result = await processService.exec( + 'wmic', + ['process', 'where', `ProcessId=${pid}`, 'get', 'ParentProcessId'], + { throwOnStdErr: false } + ); + + if (result.stdout) { + const lines = result.stdout + .split('\n') + .filter((line) => line.trim() && !line.includes('ParentProcessId')); + if (lines.length > 0) { + const ppid = parseInt(lines[0].trim(), 10); + if (!isNaN(ppid) && ppid > 0) { + // Check if parent process exists + const parentCheck = await processService.exec( + 'tasklist', + ['/FI', `PID eq ${ppid}`, '/FO', 'CSV', '/NH'], + { throwOnStdErr: false } + ); + // If parent doesn't exist or is system process, it's orphaned + return !parentCheck.stdout || parentCheck.stdout.trim().length === 0 || ppid === 0; + } + } + } + } else { + // Unix: use ps to get parent process ID + const result = await processService.exec('ps', ['-o', 'ppid=', '-p', pid.toString()], { + throwOnStdErr: false + }); + + if (result.stdout) { + const ppid = parseInt(result.stdout.trim(), 10); + if (!isNaN(ppid)) { + // PPID of 1 typically means orphaned (adopted by init/systemd) + if (ppid === 1) { + return true; + } + // Check if parent process exists + const parentCheck = await processService.exec('ps', ['-p', ppid.toString()], { + throwOnStdErr: false + }); + return !parentCheck.stdout || parentCheck.stdout.trim().length === 0; + } + } + } + } catch (ex) { + logger.warn(`Failed to check if process ${pid} is orphaned: ${ex}`); + } + + // If we can't determine, assume it's not orphaned (safer) + return false; + } + /** * Cleans up any orphaned deepnote-toolkit processes from previous VS Code sessions. * This prevents port conflicts when starting new servers. @@ -351,7 +529,7 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { if (result.stdout) { const lines = result.stdout.split('\n'); - const pidsToKill: number[] = []; + const candidatePids: number[] = []; for (const line of lines) { // Look for processes running deepnote_toolkit server @@ -374,38 +552,94 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { } if (pid && !isNaN(pid)) { - pidsToKill.push(pid); + candidatePids.push(pid); } } } - if (pidsToKill.length > 0) { + if (candidatePids.length > 0) { logger.info( - `Found ${pidsToKill.length} orphaned deepnote-toolkit processes: ${pidsToKill.join(', ')}` - ); - this.outputChannel.appendLine( - `Cleaning up ${pidsToKill.length} orphaned deepnote-toolkit process(es) from previous session...` + `Found ${candidatePids.length} deepnote-toolkit server process(es): ${candidatePids.join(', ')}` ); - // Kill each process - for (const pid of pidsToKill) { - try { - if (process.platform === 'win32') { - await processService.exec('taskkill', ['/F', '/T', '/PID', pid.toString()], { - throwOnStdErr: false - }); + const pidsToKill: number[] = []; + const pidsToSkip: Array<{ pid: number; reason: string }> = []; + + // Check each process to determine if it should be killed + for (const pid of candidatePids) { + // Check if there's a lock file for this PID + const lockData = await this.readLockFile(pid); + + if (lockData) { + // Lock file exists - check if it belongs to a different session + if (lockData.sessionId !== this.sessionId) { + // Different session - check if the process is actually orphaned + const isOrphaned = await this.isProcessOrphaned(pid); + if (isOrphaned) { + logger.info( + `PID ${pid} belongs to session ${lockData.sessionId} and is orphaned - will kill` + ); + pidsToKill.push(pid); + } else { + pidsToSkip.push({ + pid, + reason: `belongs to active session ${lockData.sessionId.substring(0, 8)}...` + }); + } } else { - await processService.exec('kill', ['-9', pid.toString()], { throwOnStdErr: false }); + // Same session - this shouldn't happen during startup, but skip it + pidsToSkip.push({ pid, reason: 'belongs to current session' }); + } + } else { + // No lock file - check if orphaned before killing + const isOrphaned = await this.isProcessOrphaned(pid); + if (isOrphaned) { + logger.info(`PID ${pid} has no lock file and is orphaned - will kill`); + pidsToKill.push(pid); + } else { + pidsToSkip.push({ pid, reason: 'no lock file but has active parent process' }); } - logger.info(`Killed orphaned process ${pid}`); - } catch (ex) { - logger.warn(`Failed to kill process ${pid}: ${ex}`); } } - this.outputChannel.appendLine('✓ Cleanup complete'); + // Log skipped processes + if (pidsToSkip.length > 0) { + for (const { pid, reason } of pidsToSkip) { + logger.info(`Skipping PID ${pid}: ${reason}`); + } + } + + // Kill orphaned processes + if (pidsToKill.length > 0) { + logger.info(`Killing ${pidsToKill.length} orphaned process(es): ${pidsToKill.join(', ')}`); + this.outputChannel.appendLine( + `Cleaning up ${pidsToKill.length} orphaned deepnote-toolkit process(es)...` + ); + + for (const pid of pidsToKill) { + try { + if (process.platform === 'win32') { + await processService.exec('taskkill', ['/F', '/T', '/PID', pid.toString()], { + throwOnStdErr: false + }); + } else { + await processService.exec('kill', ['-9', pid.toString()], { throwOnStdErr: false }); + } + logger.info(`Killed orphaned process ${pid}`); + + // Clean up the lock file after killing + await this.deleteLockFile(pid); + } catch (ex) { + logger.warn(`Failed to kill process ${pid}: ${ex}`); + } + } + + this.outputChannel.appendLine('✓ Cleanup complete'); + } else { + logger.info('No orphaned deepnote-toolkit processes found (all processes are active)'); + } } else { - logger.info('No orphaned deepnote-toolkit processes found'); + logger.info('No deepnote-toolkit server processes found'); } } } catch (ex) { From 5a3b830d56dfaa0e94f05675cdc10961da13a5b9 Mon Sep 17 00:00:00 2001 From: jankuca Date: Sun, 5 Oct 2025 17:18:01 +0200 Subject: [PATCH 3/6] fix: correctly handle Windows tasklist INFO message in orphan detection The Windows orphan detection was incorrectly treating the tasklist message 'INFO: No tasks are running which match the specified criteria.' as indicating a live parent process, when it actually means the parent process doesn't exist. Changes: - Normalize and trim stdout from tasklist before checking - Return true (orphaned) when stdout is empty - Return true (orphaned) when stdout starts with 'INFO:' (case-insensitive) - Return true (orphaned) when stdout contains 'no tasks are running' (case-insensitive) - Return true (orphaned) when ppid === 0 - Return false (not orphaned) only when parent process actually exists This ensures that processes whose parents have terminated are correctly identified as orphaned and cleaned up, while processes with active parents in other VS Code windows are preserved. --- .../deepnote/deepnoteServerStarter.node.ts | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/kernels/deepnote/deepnoteServerStarter.node.ts b/src/kernels/deepnote/deepnoteServerStarter.node.ts index 084e21a786..e3a7906adf 100644 --- a/src/kernels/deepnote/deepnoteServerStarter.node.ts +++ b/src/kernels/deepnote/deepnoteServerStarter.node.ts @@ -461,15 +461,32 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { .filter((line) => line.trim() && !line.includes('ParentProcessId')); if (lines.length > 0) { const ppid = parseInt(lines[0].trim(), 10); - if (!isNaN(ppid) && ppid > 0) { + if (!isNaN(ppid)) { + // PPID of 0 means orphaned + if (ppid === 0) { + return true; + } + // Check if parent process exists const parentCheck = await processService.exec( 'tasklist', ['/FI', `PID eq ${ppid}`, '/FO', 'CSV', '/NH'], { throwOnStdErr: false } ); - // If parent doesn't exist or is system process, it's orphaned - return !parentCheck.stdout || parentCheck.stdout.trim().length === 0 || ppid === 0; + + // Normalize and check stdout + const stdout = (parentCheck.stdout || '').trim(); + + // Parent is missing if: + // 1. stdout is empty + // 2. stdout starts with "INFO:" (case-insensitive) + // 3. stdout contains "no tasks are running" (case-insensitive) + if (stdout.length === 0 || /^INFO:/i.test(stdout) || /no tasks are running/i.test(stdout)) { + return true; // Parent missing, process is orphaned + } + + // Parent exists + return false; } } } From 9c85cd243d0a782d597aa1ed49c70a041a34ac69 Mon Sep 17 00:00:00 2001 From: jankuca Date: Sun, 5 Oct 2025 17:35:14 +0200 Subject: [PATCH 4/6] Fix parent process existence check to use ps -o pid= for accurate detection --- src/kernels/deepnote/deepnoteServerStarter.node.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels/deepnote/deepnoteServerStarter.node.ts b/src/kernels/deepnote/deepnoteServerStarter.node.ts index e3a7906adf..4207125a7b 100644 --- a/src/kernels/deepnote/deepnoteServerStarter.node.ts +++ b/src/kernels/deepnote/deepnoteServerStarter.node.ts @@ -504,10 +504,10 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { return true; } // Check if parent process exists - const parentCheck = await processService.exec('ps', ['-p', ppid.toString()], { + const parentCheck = await processService.exec('ps', ['-p', ppid.toString(), '-o', 'pid='], { throwOnStdErr: false }); - return !parentCheck.stdout || parentCheck.stdout.trim().length === 0; + return parentCheck.stdout.trim().length === 0; } } } From 608ca124741037928cb757f5dde54b91bf872f38 Mon Sep 17 00:00:00 2001 From: jankuca Date: Sun, 5 Oct 2025 18:28:26 +0200 Subject: [PATCH 5/6] Update docs to reflect ps -o pid= fix for parent process check --- ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md | 57 ++++++++++++++++-------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md b/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md index 329c54ba85..d69f20f49f 100644 --- a/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md +++ b/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md @@ -1,35 +1,41 @@ # Orphan Process Cleanup Implementation ## Overview + This document describes the implementation of a sophisticated orphan process cleanup mechanism for the Deepnote server starter that prevents terminating active servers from other VS Code windows. ## Problem Statement + Previously, the cleanup logic in `cleanupOrphanedProcesses()` would force-kill **every** process matching "deepnote_toolkit server", which could terminate active servers from other VS Code windows, causing disruption to users working in multiple windows. ## Solution + The new implementation uses a lock file system combined with parent process verification to only kill genuine orphan processes. ## Key Components ### 1. Session Management + - **Session ID**: Each VS Code window instance generates a unique session ID using `generateUuid()` - **Lock File Directory**: Lock files are stored in `${os.tmpdir()}/vscode-deepnote-locks/` - **Lock File Format**: JSON files named `server-{pid}.json` containing: ```typescript interface ServerLockFile { - sessionId: string; // Unique ID for the VS Code window - pid: number; // Process ID of the server - timestamp: number; // When the server was started + sessionId: string; // Unique ID for the VS Code window + pid: number; // Process ID of the server + timestamp: number; // When the server was started } ``` ### 2. Lock File Lifecycle #### Creation + - When a server starts successfully, a lock file is written with the server's PID and current session ID - Location: `writeLockFile()` called in `startServerImpl()` after the server process is spawned #### Deletion + - Lock files are deleted when: 1. The server is explicitly stopped via `stopServerImpl()` 2. The extension is disposed and all servers are shut down @@ -40,17 +46,21 @@ The new implementation uses a lock file system combined with parent process veri The `isProcessOrphaned()` method checks if a process is truly orphaned by verifying its parent process: #### Unix/Linux/macOS + ```bash # Get parent process ID ps -o ppid= -p -# Check if parent exists -ps -p +# Check if parent exists (using -o pid= to get only PID with no header) +ps -p -o pid= ``` + - If PPID is 1 (init/systemd), the process is orphaned -- If parent process doesn't exist, the process is orphaned +- If parent process doesn't exist (empty stdout from `ps -o pid=`), the process is orphaned +- The `-o pid=` flag ensures no header is printed, so empty output reliably indicates a missing process #### Windows + ```cmd # Get parent process ID wmic process where ProcessId= get ParentProcessId @@ -58,6 +68,7 @@ wmic process where ProcessId= get ParentProcessId # Check if parent exists tasklist /FI "PID eq " /FO CSV /NH ``` + - If parent process doesn't exist or PPID is 0, the process is orphaned ### 4. Cleanup Decision Flow @@ -65,25 +76,29 @@ tasklist /FI "PID eq " /FO CSV /NH When `cleanupOrphanedProcesses()` runs (at extension startup): 1. **Find all deepnote_toolkit server processes** + - Use `ps aux` (Unix) or `tasklist` (Windows) - Extract PIDs of matching processes 2. **For each candidate PID:** - + a. **Check for lock file** - - If lock file exists: - - If session ID matches current session → **SKIP** (shouldn't happen at startup) - - If session ID differs: - - Check if process is orphaned - - If orphaned → **KILL** - - If not orphaned → **SKIP** (active in another window) - - - If no lock file exists: - - Check if process is orphaned - - If orphaned → **KILL** - - If not orphaned → **SKIP** (might be from older version without lock files) + + - If lock file exists: + + - If session ID matches current session → **SKIP** (shouldn't happen at startup) + - If session ID differs: + - Check if process is orphaned + - If orphaned → **KILL** + - If not orphaned → **SKIP** (active in another window) + + - If no lock file exists: + - Check if process is orphaned + - If orphaned → **KILL** + - If not orphaned → **SKIP** (might be from older version without lock files) 3. **Kill orphaned processes** + - Use `kill -9` (Unix) or `taskkill /F /T` (Windows) - Delete lock file after successful kill @@ -95,9 +110,11 @@ When `cleanupOrphanedProcesses()` runs (at extension startup): ## Code Changes ### Modified Files + - `src/kernels/deepnote/deepnoteServerStarter.node.ts` ### New Imports + ```typescript import * as fs from 'fs-extra'; import * as os from 'os'; @@ -106,12 +123,14 @@ import { generateUuid } from '../../platform/common/uuid'; ``` ### New Class Members + ```typescript private readonly sessionId: string = generateUuid(); private readonly lockFileDir: string = path.join(os.tmpdir(), 'vscode-deepnote-locks'); ``` ### New Methods + 1. `initializeLockFileDirectory()` - Creates lock file directory 2. `getLockFilePath(pid)` - Returns path to lock file for a PID 3. `writeLockFile(pid)` - Creates lock file for a server process @@ -120,6 +139,7 @@ private readonly lockFileDir: string = path.join(os.tmpdir(), 'vscode-deepnote-l 6. `isProcessOrphaned(pid)` - Checks if process is orphaned by verifying parent ### Modified Methods + 1. `constructor()` - Initializes lock file directory 2. `startServerImpl()` - Writes lock file after server starts 3. `stopServerImpl()` - Deletes lock file when server stops @@ -158,4 +178,3 @@ private readonly lockFileDir: string = path.join(os.tmpdir(), 'vscode-deepnote-l 2. **Lock File Expiry**: Add TTL to lock files to handle edge cases 3. **Health Monitoring**: Periodic checks to ensure servers are still responsive 4. **Graceful Shutdown**: Try SIGTERM before SIGKILL for orphaned processes - From af07a237ab2d36912e69e2864ac4becef63f1ee2 Mon Sep 17 00:00:00 2001 From: jankuca Date: Tue, 7 Oct 2025 11:51:27 +0200 Subject: [PATCH 6/6] Refactor DeepnoteServerStarter to use IExtensionSyncActivationService pattern - Move initialization work from constructor to activate() method - Implement IExtensionSyncActivationService interface - Register as activation service in serviceRegistry - Follow codebase convention of minimal constructors with explicit activation - Update documentation to reflect new activation pattern --- ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md | 12 +++++++----- src/kernels/deepnote/deepnoteServerStarter.node.ts | 5 ++++- src/notebooks/serviceRegistry.node.ts | 1 + 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md b/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md index d69f20f49f..7bf862f0bc 100644 --- a/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md +++ b/ORPHAN_PROCESS_CLEANUP_IMPLEMENTATION.md @@ -116,6 +116,7 @@ When `cleanupOrphanedProcesses()` runs (at extension startup): ### New Imports ```typescript +import { IExtensionSyncActivationService } from '../../platform/activation/types'; import * as fs from 'fs-extra'; import * as os from 'os'; import * as path from '../../platform/vscode-path/path'; @@ -140,11 +141,12 @@ private readonly lockFileDir: string = path.join(os.tmpdir(), 'vscode-deepnote-l ### Modified Methods -1. `constructor()` - Initializes lock file directory -2. `startServerImpl()` - Writes lock file after server starts -3. `stopServerImpl()` - Deletes lock file when server stops -4. `dispose()` - Deletes lock files for all stopped servers -5. `cleanupOrphanedProcesses()` - Implements sophisticated orphan detection +1. `constructor()` - Minimal initialization (dependency injection only) +2. `activate()` - Initializes lock file directory and triggers cleanup (implements IExtensionSyncActivationService) +3. `startServerImpl()` - Writes lock file after server starts +4. `stopServerImpl()` - Deletes lock file when server stops +5. `dispose()` - Deletes lock files for all stopped servers +6. `cleanupOrphanedProcesses()` - Implements sophisticated orphan detection ## Benefits diff --git a/src/kernels/deepnote/deepnoteServerStarter.node.ts b/src/kernels/deepnote/deepnoteServerStarter.node.ts index 4207125a7b..b47a74bbd0 100644 --- a/src/kernels/deepnote/deepnoteServerStarter.node.ts +++ b/src/kernels/deepnote/deepnoteServerStarter.node.ts @@ -11,6 +11,7 @@ import { IOutputChannel, IDisposable, IHttpClient, IAsyncDisposableRegistry } fr import { STANDARD_OUTPUT_CHANNEL } from '../../platform/common/constants'; import { sleep } from '../../platform/common/utils/async'; import { Cancellation, raceCancellationError } from '../../platform/common/cancellation'; +import { IExtensionSyncActivationService } from '../../platform/activation/types'; import getPort from 'get-port'; import * as fs from 'fs-extra'; import * as os from 'os'; @@ -30,7 +31,7 @@ interface ServerLockFile { * Starts and manages the deepnote-toolkit Jupyter server. */ @injectable() -export class DeepnoteServerStarter implements IDeepnoteServerStarter { +export class DeepnoteServerStarter implements IDeepnoteServerStarter, IExtensionSyncActivationService { private readonly serverProcesses: Map> = new Map(); private readonly serverInfos: Map = new Map(); private readonly disposablesByFile: Map = new Map(); @@ -50,7 +51,9 @@ export class DeepnoteServerStarter implements IDeepnoteServerStarter { ) { // Register for disposal when the extension deactivates asyncRegistry.push(this); + } + public activate(): void { // Ensure lock file directory exists this.initializeLockFileDirectory().catch((ex) => { logger.warn(`Failed to initialize lock file directory: ${ex}`); diff --git a/src/notebooks/serviceRegistry.node.ts b/src/notebooks/serviceRegistry.node.ts index d0e20c2491..391aaf4679 100644 --- a/src/notebooks/serviceRegistry.node.ts +++ b/src/notebooks/serviceRegistry.node.ts @@ -130,6 +130,7 @@ export function registerTypes(serviceManager: IServiceManager, isDevMode: boolea // Deepnote kernel services serviceManager.addSingleton(IDeepnoteToolkitInstaller, DeepnoteToolkitInstaller); serviceManager.addSingleton(IDeepnoteServerStarter, DeepnoteServerStarter); + serviceManager.addBinding(IDeepnoteServerStarter, IExtensionSyncActivationService); serviceManager.addSingleton(IDeepnoteServerProvider, DeepnoteServerProvider); serviceManager.addBinding(IDeepnoteServerProvider, IExtensionSyncActivationService); serviceManager.addSingleton(IDeepnoteKernelAutoSelector, DeepnoteKernelAutoSelector);