From 801ae5a365f53b795b5f40d23c56c85fdab4eb9f Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Tue, 18 Nov 2025 14:25:09 +0530 Subject: [PATCH 1/2] fix: scrapeList pagination persistence and action data separation --- maxun-core/src/interpret.ts | 70 ++++++++++++++----- .../classes/Interpreter.ts | 38 +++++++--- 2 files changed, 80 insertions(+), 28 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index fdc31cdcd..c38c1c203 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -82,6 +82,8 @@ export default class Interpreter extends EventEmitter { scrapeSchema: {} }; + private scrapeListCounter: number = 0; + constructor(workflow: WorkflowFile, options?: Partial) { super(); this.workflow = workflow.workflow; @@ -575,12 +577,13 @@ export default class Interpreter extends EventEmitter { try { await this.ensureScriptsLoaded(page); - + if (this.options.debugChannel?.incrementScrapeListIndex) { this.options.debugChannel.incrementScrapeListIndex(); } let scrapeResults = []; + let paginationUsed = false; if (!config.pagination) { scrapeResults = await page.evaluate((cfg) => { @@ -592,6 +595,7 @@ export default class Interpreter extends EventEmitter { } }, config); } else { + paginationUsed = true; scrapeResults = await this.handlePagination(page, config); } @@ -599,25 +603,39 @@ export default class Interpreter extends EventEmitter { scrapeResults = []; } - const actionType = "scrapeList"; - const actionName = (config as any).__name || "List"; + console.log(`ScrapeList completed with ${scrapeResults.length} results`); - if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {}; - if (!this.serializableDataByType[actionType][actionName]) { - this.serializableDataByType[actionType][actionName] = []; - } + if (!paginationUsed) { + const actionType = "scrapeList"; + let actionName = (config as any).__name || ""; - this.serializableDataByType[actionType][actionName].push(...scrapeResults); + if (!actionName || actionName.trim() === "") { + this.scrapeListCounter++; + actionName = `List ${this.scrapeListCounter}`; + } - await this.options.serializableCallback({ - scrapeList: this.serializableDataByType.scrapeList, - scrapeSchema: this.serializableDataByType.scrapeSchema - }); + if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {}; + if (!this.serializableDataByType[actionType][actionName]) { + this.serializableDataByType[actionType][actionName] = []; + } + + this.serializableDataByType[actionType][actionName].push(...scrapeResults); + + await this.options.serializableCallback({ + scrapeList: this.serializableDataByType.scrapeList, + scrapeSchema: this.serializableDataByType.scrapeSchema + }); + } } catch (error) { console.error('ScrapeList action failed completely:', error.message); - + const actionType = "scrapeList"; - const actionName = (config as any).__name || "List"; + let actionName = (config as any).__name || ""; + + if (!actionName || actionName.trim() === "") { + this.scrapeListCounter++; + actionName = `List ${this.scrapeListCounter}`; + } if (!this.namedResults[actionType]) this.namedResults[actionType] = {}; this.namedResults[actionType][actionName] = []; @@ -818,12 +836,26 @@ export default class Interpreter extends EventEmitter { return []; } + const actionType = "scrapeList"; + let actionName = (config as any).__name || ""; + if (!actionName || actionName.trim() === "") { + this.scrapeListCounter++; + actionName = `List ${this.scrapeListCounter}`; + } + + if (!this.serializableDataByType[actionType]) { + this.serializableDataByType[actionType] = {}; + } + if (!this.serializableDataByType[actionType][actionName]) { + this.serializableDataByType[actionType][actionName] = []; + } + let allResults: Record[] = []; let previousHeight = 0; let scrapedItems: Set = new Set(); let visitedUrls: Set = new Set(); const MAX_RETRIES = 3; - const RETRY_DELAY = 1000; // 1 second delay between retries + const RETRY_DELAY = 1000; const MAX_UNCHANGED_RESULTS = 5; const debugLog = (message: string, ...args: any[]) => { @@ -831,7 +863,6 @@ export default class Interpreter extends EventEmitter { }; const scrapeCurrentPage = async () => { - // Check abort flag before scraping current page if (this.isAborted) { debugLog("Workflow aborted, stopping scrapeCurrentPage"); return; @@ -849,7 +880,6 @@ export default class Interpreter extends EventEmitter { debugLog(`Page evaluation failed: ${error.message}`); return; } - const newResults = results.filter(item => { const uniqueKey = JSON.stringify(item); if (scrapedItems.has(uniqueKey)) return false; @@ -859,7 +889,11 @@ export default class Interpreter extends EventEmitter { allResults = allResults.concat(newResults); debugLog("Results collected:", allResults.length); - await this.options.serializableCallback(allResults); + this.serializableDataByType[actionType][actionName] = [...allResults]; + await this.options.serializableCallback({ + scrapeList: this.serializableDataByType.scrapeList, + scrapeSchema: this.serializableDataByType.scrapeSchema + }); }; const checkLimit = () => { diff --git a/server/src/workflow-management/classes/Interpreter.ts b/server/src/workflow-management/classes/Interpreter.ts index 71cac8b70..2ab7a319e 100644 --- a/server/src/workflow-management/classes/Interpreter.ts +++ b/server/src/workflow-management/classes/Interpreter.ts @@ -567,20 +567,39 @@ export class WorkflowInterpreter { typeKey = "scrapeSchema"; } - if (this.currentActionType === "scrapeList" && data.scrapeList) { + if (typeKey === "scrapeList" && data.scrapeList) { data = data.scrapeList; - } else if (this.currentActionType === "scrapeSchema" && data.scrapeSchema) { + } else if (typeKey === "scrapeSchema" && data.scrapeSchema) { data = data.scrapeSchema; } - let actionName = this.currentActionName || ""; - if (typeKey === "scrapeList") { - actionName = this.getUniqueActionName(typeKey, this.currentActionName); + let actionName = ""; + if (typeKey === "scrapeList" && data && typeof data === "object" && !Array.isArray(data)) { + const keys = Object.keys(data); + if (keys.length === 1) { + actionName = keys[0]; + data = data[actionName]; + } else if (keys.length > 1) { + actionName = keys[keys.length - 1]; + data = data[actionName]; + } + } + + if (!actionName) { + actionName = this.currentActionName || ""; + if (typeKey === "scrapeList" && !actionName) { + actionName = this.getUniqueActionName(typeKey, ""); + } } const flattened = Array.isArray(data) ? data - : (data?.List ?? (data && typeof data === 'object' ? Object.values(data).flat?.() ?? data : [])); + : ( + data?.List ?? + (data && typeof data === "object" + ? Object.values(data).flat?.() ?? data + : []) + ); if (!this.serializableDataByType[typeKey]) { this.serializableDataByType[typeKey] = {}; @@ -588,16 +607,15 @@ export class WorkflowInterpreter { this.serializableDataByType[typeKey][actionName] = flattened; - await this.persistDataToDatabase(typeKey, { [actionName]: flattened }); + await this.persistDataToDatabase(typeKey, { + [actionName]: flattened, + }); this.socket.emit("serializableCallback", { type: typeKey, name: actionName, data: flattened, }); - - this.currentActionType = null; - this.currentActionName = null; } catch (err: any) { logger.log('error', `serializableCallback handler failed: ${err.message}`); } From 1b8e5046a8809b4622693ebebaaf4724be305fd3 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Tue, 18 Nov 2025 15:11:09 +0530 Subject: [PATCH 2/2] fix: __name non existent field --- maxun-core/src/interpret.ts | 66 ++++++++++++++----------------------- 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index c38c1c203..d87e4bc26 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -486,7 +486,7 @@ export default class Interpreter extends EventEmitter { await this.options.serializableCallback(scrapeResults); }, - scrapeSchema: async (schema: Record) => { + scrapeSchema: async (schema: Record, actionName: string = "") => { if (this.isAborted) { this.log('Workflow aborted, stopping scrapeSchema', Level.WARN); return; @@ -542,17 +542,17 @@ export default class Interpreter extends EventEmitter { } const actionType = "scrapeSchema"; - const actionName = (schema as any).__name || "Texts"; + const name = actionName || "Texts"; if (!this.namedResults[actionType]) this.namedResults[actionType] = {}; - this.namedResults[actionType][actionName] = this.cumulativeResults; + this.namedResults[actionType][name] = this.cumulativeResults; if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {}; - if (!this.serializableDataByType[actionType][actionName]) { - this.serializableDataByType[actionType][actionName] = []; + if (!this.serializableDataByType[actionType][name]) { + this.serializableDataByType[actionType][name] = []; } - this.serializableDataByType[actionType][actionName] = [...this.cumulativeResults]; + this.serializableDataByType[actionType][name] = [...this.cumulativeResults]; await this.options.serializableCallback({ scrapeList: this.serializableDataByType.scrapeList, @@ -560,7 +560,7 @@ export default class Interpreter extends EventEmitter { }); }, - scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { + scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }, actionName: string = "") => { if (this.isAborted) { this.log('Workflow aborted, stopping scrapeList', Level.WARN); return; @@ -596,7 +596,7 @@ export default class Interpreter extends EventEmitter { }, config); } else { paginationUsed = true; - scrapeResults = await this.handlePagination(page, config); + scrapeResults = await this.handlePagination(page, config, actionName); } if (!Array.isArray(scrapeResults)) { @@ -607,19 +607,19 @@ export default class Interpreter extends EventEmitter { if (!paginationUsed) { const actionType = "scrapeList"; - let actionName = (config as any).__name || ""; + let name = actionName || ""; - if (!actionName || actionName.trim() === "") { + if (!name || name.trim() === "") { this.scrapeListCounter++; - actionName = `List ${this.scrapeListCounter}`; + name = `List ${this.scrapeListCounter}`; } if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {}; - if (!this.serializableDataByType[actionType][actionName]) { - this.serializableDataByType[actionType][actionName] = []; + if (!this.serializableDataByType[actionType][name]) { + this.serializableDataByType[actionType][name] = []; } - this.serializableDataByType[actionType][actionName].push(...scrapeResults); + this.serializableDataByType[actionType][name].push(...scrapeResults); await this.options.serializableCallback({ scrapeList: this.serializableDataByType.scrapeList, @@ -630,18 +630,18 @@ export default class Interpreter extends EventEmitter { console.error('ScrapeList action failed completely:', error.message); const actionType = "scrapeList"; - let actionName = (config as any).__name || ""; + let name = actionName || ""; - if (!actionName || actionName.trim() === "") { + if (!name || name.trim() === "") { this.scrapeListCounter++; - actionName = `List ${this.scrapeListCounter}`; + name = `List ${this.scrapeListCounter}`; } if (!this.namedResults[actionType]) this.namedResults[actionType] = {}; - this.namedResults[actionType][actionName] = []; + this.namedResults[actionType][name] = []; if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {}; - this.serializableDataByType[actionType][actionName] = []; + this.serializableDataByType[actionType][name] = []; await this.options.serializableCallback({ scrapeList: this.serializableDataByType.scrapeList, @@ -736,26 +736,7 @@ export default class Interpreter extends EventEmitter { debug.setActionType(String(step.action)); } - if ((step as any)?.name) { - stepName = (step as any).name; - } else if ( - Array.isArray((step as any)?.args) && - (step as any).args.length > 0 && - typeof (step as any).args[0] === "object" && - "__name" in (step as any).args[0] - ) { - stepName = (step as any).args[0].__name; - } else if ( - typeof (step as any)?.args === "object" && - step?.args !== null && - "__name" in (step as any).args - ) { - stepName = (step as any).args.__name; - } - - if (!stepName) { - stepName = String(step.action); - } + stepName = (step as any)?.name || String(step.action); if (debug && typeof (debug as any).setActionName === "function") { (debug as any).setActionName(stepName); @@ -769,6 +750,9 @@ export default class Interpreter extends EventEmitter { const params = !step.args || Array.isArray(step.args) ? step.args : [step.args]; if (step.action === 'screenshot') { await (wawActions.screenshot as any)(...(params ?? []), stepName ?? undefined); + } else if (step.action === 'scrapeList' || step.action === 'scrapeSchema') { + const actionName = (step as any).name || ""; + await wawActions[step.action as CustomFunctions](...(params ?? []), actionName); } else { await wawActions[step.action as CustomFunctions](...(params ?? [])); } @@ -830,14 +814,14 @@ export default class Interpreter extends EventEmitter { fields: any, limit?: number, pagination: any -}) { + }, providedActionName: string = "") { if (this.isAborted) { this.log('Workflow aborted, stopping pagination', Level.WARN); return []; } const actionType = "scrapeList"; - let actionName = (config as any).__name || ""; + let actionName = providedActionName || ""; if (!actionName || actionName.trim() === "") { this.scrapeListCounter++; actionName = `List ${this.scrapeListCounter}`;