From 72e03b4b0621016217786bed2e4b51e5bba2110b Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Thu, 16 Jan 2025 12:52:05 +0100 Subject: [PATCH 1/2] Rework OPFS, add prepareFileHandle and prepareFileHandles --- .../duckdb-wasm/src/bindings/bindings_base.ts | 21 +++++++++++++++- .../src/bindings/bindings_interface.ts | 1 + packages/duckdb-wasm/src/bindings/runtime.ts | 2 ++ .../src/bindings/runtime_browser.ts | 24 +++++++++++++++---- 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/packages/duckdb-wasm/src/bindings/bindings_base.ts b/packages/duckdb-wasm/src/bindings/bindings_base.ts index b33524e4f..f395bdb10 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_base.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_base.ts @@ -469,6 +469,19 @@ export abstract class DuckDBBindingsBase implements DuckDBBindings { } dropResponseBuffers(this.mod); } + public async prepareFileHandle(fileName: string, protocol: DuckDBDataProtocol): Promise { + if (protocol === DuckDBDataProtocol.BROWSER_FSACCESS && this._runtime.prepareFileHandles) { + const list = await this._runtime.prepareFileHandles([fileName], DuckDBDataProtocol.BROWSER_FSACCESS); + for (const item of list) { + const { handle, path: filePath, fromCached } = item; + if (!fromCached && handle.getSize()) { + await this.registerFileHandleAsync(filePath, handle, DuckDBDataProtocol.BROWSER_FSACCESS, true); + } + } + return; + } + throw new Error(`prepareFileHandle: unsupported protocol ${protocol}`); + } /** Prepare a file handle that could only be acquired aschronously */ public async prepareDBFileHandle(path: string, protocol: DuckDBDataProtocol): Promise { if (protocol === DuckDBDataProtocol.BROWSER_FSACCESS && this._runtime.prepareDBFileHandle) { @@ -601,8 +614,14 @@ export abstract class DuckDBBindingsBase implements DuckDBBindings { dropResponseBuffers(this.mod); return copy; } - /** Enable tracking of file statistics */ + public registerOPFSFileName(file: string): Promise { + if (file.startsWith("opfs://")) { + return this.prepareFileHandle(file, DuckDBDataProtocol.BROWSER_FSACCESS); + } else { + throw new Error("Not an OPFS file name: " + file); + } + } public collectFileStatistics(file: string, enable: boolean): void { const [s, d, n] = callSRet(this.mod, 'duckdb_web_collect_file_stats', ['string', 'boolean'], [file, enable]); if (s !== StatusCode.SUCCESS) { diff --git a/packages/duckdb-wasm/src/bindings/bindings_interface.ts b/packages/duckdb-wasm/src/bindings/bindings_interface.ts index dcf0fb926..a565facbb 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_interface.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_interface.ts @@ -54,6 +54,7 @@ export interface DuckDBBindings { protocol: DuckDBDataProtocol, directIO: boolean, ): Promise; + prepareFileHandle(path: string, protocol: DuckDBDataProtocol): Promise; prepareDBFileHandle(path: string, protocol: DuckDBDataProtocol): Promise; globFiles(path: string): WebFile[]; dropFile(name: string): void; diff --git a/packages/duckdb-wasm/src/bindings/runtime.ts b/packages/duckdb-wasm/src/bindings/runtime.ts index 4bc360db3..b720e6877 100644 --- a/packages/duckdb-wasm/src/bindings/runtime.ts +++ b/packages/duckdb-wasm/src/bindings/runtime.ts @@ -157,6 +157,8 @@ export interface DuckDBRuntime { removeFile(mod: DuckDBModule, pathPtr: number, pathLen: number): void; // Prepare a file handle that could only be acquired aschronously + prepareFileHandle?: (path: string, protocol: DuckDBDataProtocol) => Promise; + prepareFileHandles?: (path: string[], protocol: DuckDBDataProtocol) => Promise; prepareDBFileHandle?: (path: string, protocol: DuckDBDataProtocol) => Promise; // Call a scalar UDF function diff --git a/packages/duckdb-wasm/src/bindings/runtime_browser.ts b/packages/duckdb-wasm/src/bindings/runtime_browser.ts index 8099acde7..0b4ebedca 100644 --- a/packages/duckdb-wasm/src/bindings/runtime_browser.ts +++ b/packages/duckdb-wasm/src/bindings/runtime_browser.ts @@ -19,20 +19,24 @@ import * as udf from './udf_runtime'; const OPFS_PREFIX_LEN = 'opfs://'.length; const PATH_SEP_REGEX = /\/|\\/; + export const BROWSER_RUNTIME: DuckDBRuntime & { _files: Map; _fileInfoCache: Map; _globalFileInfo: DuckDBGlobalFileInfo | null; _preparedHandles: Record; + _opfsRoot: FileSystemDirectoryHandle | null; getFileInfo(mod: DuckDBModule, fileId: number): DuckDBFileInfo | null; getGlobalFileInfo(mod: DuckDBModule): DuckDBGlobalFileInfo | null; + assignOPFSRoot(): Promise; } = { _files: new Map(), _fileInfoCache: new Map(), _udfFunctions: new Map(), _globalFileInfo: null, _preparedHandles: {} as any, + _opfsRoot: null, getFileInfo(mod: DuckDBModule, fileId: number): DuckDBFileInfo | null { try { @@ -101,11 +105,15 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { return null; } }, - + async assignOPFSRoot(): Promise { + if (!BROWSER_RUNTIME._opfsRoot) { + BROWSER_RUNTIME._opfsRoot = await navigator.storage.getDirectory(); + } + }, /** Prepare a file handle that could only be acquired aschronously */ - async prepareDBFileHandle(dbPath: string, protocol: DuckDBDataProtocol): Promise { + async prepareFileHandles(filePaths: string[], protocol: DuckDBDataProtocol): Promise { if (protocol === DuckDBDataProtocol.BROWSER_FSACCESS) { - const filePaths = [dbPath, `${dbPath}.wal`]; + await BROWSER_RUNTIME.assignOPFSRoot(); const prepare = async (path: string): Promise => { if (BROWSER_RUNTIME._files.has(path)) { return { @@ -114,7 +122,7 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { fromCached: true, }; } - const opfsRoot = await navigator.storage.getDirectory(); + const opfsRoot = BROWSER_RUNTIME._opfsRoot!; let dirHandle: FileSystemDirectoryHandle = opfsRoot; // check if mkdir -p is needed const opfsPath = path.slice(OPFS_PREFIX_LEN); @@ -156,6 +164,14 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { } return result; } + throw new Error(`Unsupported protocol ${protocol} for paths ${filePaths} with protocol ${protocol}`); + }, + /** Prepare a file handle that could only be acquired aschronously */ + async prepareDBFileHandle(dbPath: string, protocol: DuckDBDataProtocol): Promise { + if (protocol === DuckDBDataProtocol.BROWSER_FSACCESS && this.prepareFileHandles) { + const filePaths = [dbPath, `${dbPath}.wal`]; + return this.prepareFileHandles(filePaths, protocol); + } throw new Error(`Unsupported protocol ${protocol} for path ${dbPath} with protocol ${protocol}`); }, From f848df5313b366f2e3f5c015b3c4dd2fb9f78499 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Thu, 16 Jan 2025 13:19:43 +0100 Subject: [PATCH 2/2] Add registerOPFSFileName to API, available in the shell via --- .../crate/src/duckdb/async_duckdb.rs | 13 ++++++++++++ packages/duckdb-wasm-shell/crate/src/shell.rs | 20 +++++++++++++++++++ .../duckdb-wasm-shell/crate/src/shell_api.rs | 5 +++++ .../src/bindings/bindings_interface.ts | 1 + .../src/parallel/async_bindings.ts | 10 ++++++++++ .../src/parallel/worker_dispatcher.ts | 5 +++++ .../src/parallel/worker_request.ts | 3 +++ 7 files changed, 57 insertions(+) diff --git a/packages/duckdb-wasm-shell/crate/src/duckdb/async_duckdb.rs b/packages/duckdb-wasm-shell/crate/src/duckdb/async_duckdb.rs index a041ddacf..1a791b3d9 100644 --- a/packages/duckdb-wasm-shell/crate/src/duckdb/async_duckdb.rs +++ b/packages/duckdb-wasm-shell/crate/src/duckdb/async_duckdb.rs @@ -63,6 +63,11 @@ extern "C" { conn: ConnectionID, ) -> Result; + #[wasm_bindgen(catch, method, js_name = "registerOPFSFileName")] + async fn register_opfs_file_name( + this: &JsAsyncDuckDB, + text: &str, + ) -> Result; #[wasm_bindgen(catch, method, js_name = "collectFileStatistics")] async fn collect_file_statistics( this: &JsAsyncDuckDB, @@ -158,6 +163,14 @@ impl AsyncDuckDB { Ok(()) } + pub async fn register_opfs_file_name( + &self, + file: &str, + ) -> Result<(), js_sys::Error> { + self.bindings.register_opfs_file_name(file).await?; + Ok(()) + } + /// Enable file statistics pub async fn export_file_statistics( &self, diff --git a/packages/duckdb-wasm-shell/crate/src/shell.rs b/packages/duckdb-wasm-shell/crate/src/shell.rs index 35349b98d..40b1edb62 100644 --- a/packages/duckdb-wasm-shell/crate/src/shell.rs +++ b/packages/duckdb-wasm-shell/crate/src/shell.rs @@ -377,6 +377,18 @@ impl Shell { Shell::with(|s| s.write(&buffer)); } + pub fn register_opfs_file_name(name: &str) { + let db_ptr = Shell::with(|s| s.db.clone()); + let name_copy = name.to_string(); + spawn_local(async move { + let db = match db_ptr { + Some(ref db) => db.read().unwrap(), + None => return, + }; + db.register_opfs_file_name(&name_copy).await.unwrap(); + }); + } + pub fn collect_file_statistics(name: &str, enable: bool) { let db_ptr = Shell::with(|s| s.db.clone()); let name_copy = name.to_string(); @@ -532,6 +544,13 @@ impl Shell { } } } + "register" => { + let filename = args[subcmd.len()..].trim(); + db.register_opfs_file_name(filename).await.unwrap(); + Shell::with_mut(|s| { + s.writeln(&format!("Registering OPFS file handle for: {}", filename)) + }); + } "track" => { let filename = args[subcmd.len()..].trim(); db.collect_file_statistics(filename, true).await.unwrap(); @@ -666,6 +685,7 @@ impl Shell { ".files drop Drop all files.\r\n", ".files drop $FILE Drop a single file.\r\n", ".files track $FILE Collect file statistics.\r\n", + ".files register $FILE Register OPFS file handle.\r\n", ".files paging $FILE Show file paging.\r\n", ".files reads $FILE Show file reads.\r\n", ".open $FILE Open database file.\r\n", diff --git a/packages/duckdb-wasm-shell/crate/src/shell_api.rs b/packages/duckdb-wasm-shell/crate/src/shell_api.rs index 05766f123..fee9c6523 100644 --- a/packages/duckdb-wasm-shell/crate/src/shell_api.rs +++ b/packages/duckdb-wasm-shell/crate/src/shell_api.rs @@ -65,6 +65,11 @@ pub fn writeln(text: &str) { Shell::with(|s| s.writeln(text)); } +#[wasm_bindgen(js_name = "registerOPFSFileName")] +pub fn register_opfs_file_name(name: &str) { + Shell::register_opfs_file_name(name); +} + #[wasm_bindgen(js_name = "collectFileStatistics")] pub fn collect_file_statistics(name: &str, enable: bool) { Shell::collect_file_statistics(name, enable); diff --git a/packages/duckdb-wasm/src/bindings/bindings_interface.ts b/packages/duckdb-wasm/src/bindings/bindings_interface.ts index a565facbb..271a42ef9 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_interface.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_interface.ts @@ -62,6 +62,7 @@ export interface DuckDBBindings { flushFiles(): void; copyFileToPath(name: string, path: string): void; copyFileToBuffer(name: string): Uint8Array; + registerOPFSFileName(file: string): void; collectFileStatistics(file: string, enable: boolean): void; exportFileStatistics(file: string): FileStatistics; } diff --git a/packages/duckdb-wasm/src/parallel/async_bindings.ts b/packages/duckdb-wasm/src/parallel/async_bindings.ts index 14a03b547..4aaf3a6fa 100644 --- a/packages/duckdb-wasm/src/parallel/async_bindings.ts +++ b/packages/duckdb-wasm/src/parallel/async_bindings.ts @@ -160,6 +160,7 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { switch (task.type) { case WorkerRequestType.CLOSE_PREPARED: case WorkerRequestType.COLLECT_FILE_STATISTICS: + case WorkerRequestType.REGISTER_OPFS_FILE_NAME: case WorkerRequestType.COPY_FILE_TO_PATH: case WorkerRequestType.DISCONNECT: case WorkerRequestType.DROP_FILE: @@ -546,6 +547,15 @@ export class AsyncDuckDB implements AsyncDuckDBBindings { await this.postTask(task, []); } + /** Enable file statistics */ + public async registerOPFSFileName(name: string): Promise { + const task = new WorkerTask( + WorkerRequestType.REGISTER_OPFS_FILE_NAME, + [name], + ); + await this.postTask(task, []); + } + /** Enable file statistics */ public async collectFileStatistics(name: string, enable: boolean): Promise { const task = new WorkerTask( diff --git a/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts b/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts index 06b81a5b8..3a5a8f295 100644 --- a/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts +++ b/packages/duckdb-wasm/src/parallel/worker_dispatcher.ts @@ -360,6 +360,11 @@ export abstract class AsyncDuckDBDispatcher implements Logger { this.sendOK(request); break; + case WorkerRequestType.REGISTER_OPFS_FILE_NAME: + this._bindings.registerOPFSFileName(request.data[0]); + this.sendOK(request); + break; + case WorkerRequestType.EXPORT_FILE_STATISTICS: { this.postMessage( { diff --git a/packages/duckdb-wasm/src/parallel/worker_request.ts b/packages/duckdb-wasm/src/parallel/worker_request.ts index cf128cdbb..9b9df0634 100644 --- a/packages/duckdb-wasm/src/parallel/worker_request.ts +++ b/packages/duckdb-wasm/src/parallel/worker_request.ts @@ -14,6 +14,7 @@ export enum WorkerRequestType { CANCEL_PENDING_QUERY = 'CANCEL_PENDING_QUERY', CLOSE_PREPARED = 'CLOSE_PREPARED', COLLECT_FILE_STATISTICS = 'COLLECT_FILE_STATISTICS', + REGISTER_OPFS_FILE_NAME = 'REGISTER_OPFS_FILE_NAME', CONNECT = 'CONNECT', COPY_FILE_TO_BUFFER = 'COPY_FILE_TO_BUFFER', COPY_FILE_TO_PATH = 'COPY_FILE_TO_PATH', @@ -108,6 +109,7 @@ export type WorkerRequestVariant = | WorkerRequest | WorkerRequest | WorkerRequest + | WorkerRequest | WorkerRequest | WorkerRequest | WorkerRequest @@ -166,6 +168,7 @@ export type WorkerResponseVariant = export type WorkerTaskVariant = | WorkerTask + | WorkerTask | WorkerTask | WorkerTask | WorkerTask