From 18b517c875d00b5856a26118b55a9d7c614cd152 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Thu, 12 Dec 2024 18:41:25 +0530 Subject: [PATCH 1/7] chore: remove note --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index ac74d21cd..1fc1bfd68 100644 --- a/README.md +++ b/README.md @@ -29,9 +29,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web -> Note: Maxun is in its early stages of development and currently does not support self-hosting. However, you can run Maxun locally. Self-hosting capabilities are planned for a future release and will be available soon. - -# Local Installation +# Installation ### Docker Compose ``` git clone https://github.com/getmaxun/maxun From ac13cd81d95f36dfd709c5177a6f857920e50467 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Thu, 12 Dec 2024 18:41:48 +0530 Subject: [PATCH 2/7] chore: remove clone instruction --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 1fc1bfd68..26eb72fe1 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,6 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web # Installation ### Docker Compose ``` -git clone https://github.com/getmaxun/maxun docker-compose up -d ``` You can access the frontend at http://localhost:5173/ and backend at http://localhost:8080/ From 44693259257497deb6b285f54e64a0ae0ec2b7b1 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 14 Dec 2024 10:49:52 +0530 Subject: [PATCH 3/7] chore: sync compose master <-> develop --- docker-compose.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 51a9f4eba..3c6e3a0f7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -64,8 +64,6 @@ services: - redis - minio volumes: - - ./server:/app/server # Mount server source code for hot reloading - - ./maxun-core:/app/maxun-core # Mount maxun-core for any shared code updates - /var/run/dbus:/var/run/dbus frontend: @@ -79,13 +77,10 @@ services: environment: PUBLIC_URL: ${PUBLIC_URL} BACKEND_URL: ${BACKEND_URL} - volumes: - - ./:/app # Mount entire frontend app directory for hot reloading - - /app/node_modules # Anonymous volume to prevent overwriting node_modules depends_on: - backend volumes: postgres_data: minio_data: - redis_data: + redis_data: \ No newline at end of file From 7f48464eea993f0d4468942cdeb77c87398191f8 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 14 Dec 2024 18:35:38 +0530 Subject: [PATCH 4/7] feat: add page navigation timeout --- maxun-core/src/interpret.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index d1cc8318d..848ddd768 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -365,7 +365,7 @@ export default class Interpreter extends EventEmitter { try { const newPage = await context.newPage(); await newPage.goto(link); - await newPage.waitForLoadState('networkidle'); + await newPage.waitForLoadState('domcontentloaded'); await this.runLoop(newPage, this.initializedWorkflow!); } catch (e) { // `runLoop` uses soft mode, so it recovers from it's own exceptions @@ -576,7 +576,7 @@ export default class Interpreter extends EventEmitter { } await Promise.all([ nextButton.dispatchEvent('click'), - page.waitForNavigation({ waitUntil: 'networkidle' }) + page.waitForNavigation({ waitUntil: 'domcontentloaded' }) ]); await page.waitForTimeout(1000); @@ -767,6 +767,8 @@ export default class Interpreter extends EventEmitter { public async run(page: Page, params?: ParamType): Promise { this.log('Starting the workflow.', Level.LOG); const context = page.context(); + + page.setDefaultNavigationTimeout(100000); // Check proxy settings from context options const contextOptions = (context as any)._options; From bdf908e37cdcb2200cb5c653a5149db279ce51aa Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 14 Dec 2024 18:36:59 +0530 Subject: [PATCH 5/7] feat: add domcontentloaded wait load state --- server/src/workflow-management/classes/Generator.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 57be015ed..2cde90e4c 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -189,7 +189,7 @@ export class WorkflowGenerator { * * This function also makes sure to add a waitForLoadState and a generated flag * action after every new action or pair added. The [waitForLoadState](https://playwright.dev/docs/api/class-frame#frame-wait-for-load-state) - * action waits for the networkidle event to be fired, + * action waits for the domcontentloaded event to be fired, * and the generated flag action is used for making pausing the interpretation possible. * * @param pair The pair to add to the workflow. @@ -217,7 +217,7 @@ export class WorkflowGenerator { if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { pair.what.push({ action: 'waitForLoadState', - args: ['networkidle'], + args: ['domcontentloaded'], }); } this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what); @@ -232,7 +232,7 @@ export class WorkflowGenerator { if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { pair.what.push({ action: 'waitForLoadState', - args: ['networkidle'], + args: ['domcontentloaded'], }); } if (this.generatedData.lastIndex === 0) { From f38230d1b4d45d266886679c3228d07d2f52d18d Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 14 Dec 2024 20:30:24 +0530 Subject: [PATCH 6/7] feat: revert to networkidle for wait load state --- server/src/workflow-management/classes/Generator.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 2cde90e4c..57be015ed 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -189,7 +189,7 @@ export class WorkflowGenerator { * * This function also makes sure to add a waitForLoadState and a generated flag * action after every new action or pair added. The [waitForLoadState](https://playwright.dev/docs/api/class-frame#frame-wait-for-load-state) - * action waits for the domcontentloaded event to be fired, + * action waits for the networkidle event to be fired, * and the generated flag action is used for making pausing the interpretation possible. * * @param pair The pair to add to the workflow. @@ -217,7 +217,7 @@ export class WorkflowGenerator { if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { pair.what.push({ action: 'waitForLoadState', - args: ['domcontentloaded'], + args: ['networkidle'], }); } this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what); @@ -232,7 +232,7 @@ export class WorkflowGenerator { if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { pair.what.push({ action: 'waitForLoadState', - args: ['domcontentloaded'], + args: ['networkidle'], }); } if (this.generatedData.lastIndex === 0) { From 7ce7a1598c3c394d8107677859991257460755ee Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 14 Dec 2024 20:32:07 +0530 Subject: [PATCH 7/7] feat: check for selector visibility in getState --- maxun-core/src/interpret.ts | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 848ddd768..e11ae255a 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -192,8 +192,8 @@ export default class Interpreter extends EventEmitter { // const actionable = async (selector: string): Promise => { // try { // const proms = [ - // page.isEnabled(selector, { timeout: 5000 }), - // page.isVisible(selector, { timeout: 5000 }), + // page.isEnabled(selector, { timeout: 10000 }), + // page.isVisible(selector, { timeout: 10000 }), // ]; // return await Promise.all(proms).then((bools) => bools.every((x) => x)); @@ -214,6 +214,17 @@ export default class Interpreter extends EventEmitter { // return []; // }), // ).then((x) => x.flat()); + + const presentSelectors: SelectorArray = await Promise.all( + selectors.map(async (selector) => { + try { + await page.waitForSelector(selector, { state: 'attached' }); + return [selector]; + } catch (e) { + return []; + } + }), + ).then((x) => x.flat()); const action = workflowCopy[workflowCopy.length - 1]; @@ -233,7 +244,7 @@ export default class Interpreter extends EventEmitter { ...p, [cookie.name]: cookie.value, }), {}), - selectors, + selectors: presentSelectors, }; } @@ -365,7 +376,7 @@ export default class Interpreter extends EventEmitter { try { const newPage = await context.newPage(); await newPage.goto(link); - await newPage.waitForLoadState('domcontentloaded'); + await newPage.waitForLoadState('networkidle'); await this.runLoop(newPage, this.initializedWorkflow!); } catch (e) { // `runLoop` uses soft mode, so it recovers from it's own exceptions @@ -576,7 +587,7 @@ export default class Interpreter extends EventEmitter { } await Promise.all([ nextButton.dispatchEvent('click'), - page.waitForNavigation({ waitUntil: 'domcontentloaded' }) + page.waitForNavigation({ waitUntil: 'networkidle' }) ]); await page.waitForTimeout(1000);