From a01737eda2ce87ff82c84985ef341021e569196e Mon Sep 17 00:00:00 2001 From: Baptiste Arnaud Date: Wed, 17 Mar 2021 18:28:18 +0100 Subject: [PATCH] Better script gen & fix recording stop listening --- public/content.js | 2 +- public/manifest.json | 2 +- src/StepItem/OptionItem.tsx | 2 +- src/StepItem/StepItem.tsx | 2 +- src/lib/scriptGenerator.ts | 35 ++++++++++++++++++++++++++--------- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/public/content.js b/public/content.js index dfbe5a8..4091e63 100644 --- a/public/content.js +++ b/public/content.js @@ -269,8 +269,8 @@ const stopSelectNode = () => { for (const handler of handlers) { document.removeEventListener("click", handler, { capture: true }); } - handlers = []; stopClicksKeysRecording(); + handlers = []; }; const onRecordClick = function (stepIndex) { diff --git a/public/manifest.json b/public/manifest.json index 61b733a..68176d5 100644 --- a/public/manifest.json +++ b/public/manifest.json @@ -1,6 +1,6 @@ { "name": "Tinking - Scrapping Tool", - "version": "0.1.0", + "version": "0.1.1", "manifest_version": 2, "description": "Extract data from any website without code, just clicks", "icons": { diff --git a/src/StepItem/OptionItem.tsx b/src/StepItem/OptionItem.tsx index d2010ec..548dd58 100644 --- a/src/StepItem/OptionItem.tsx +++ b/src/StepItem/OptionItem.tsx @@ -224,7 +224,7 @@ const SelectOption = ({ optionType?: OptionType; onOptionChange: (val: OptionType) => void; }) => ( - + void; }) => ( - + }> {step.action ?? "Select an action"} diff --git a/src/lib/scriptGenerator.ts b/src/lib/scriptGenerator.ts index 0648911..8f305ce 100644 --- a/src/lib/scriptGenerator.ts +++ b/src/lib/scriptGenerator.ts @@ -246,7 +246,7 @@ const parseSingleCommandFromStep = ( const element = document.querySelector("${step.selector}") return element.src || null; } - let ${variableName} = await page.evaluate(() => ${variableName}Eval); + let ${variableName} = await page.evaluate(${variableName}Eval); if(${variableName} === null || ${variableName} === ""){ // The content could be dynamically loaded. Waiting a bit... await page.waitForTimeout(4000) @@ -341,9 +341,13 @@ const parseLoopFromStep = (step: Step) => { await page.waitForSelector("${step.selector}") let urls = [] urls = await page.evaluate(() => { - return [...document.querySelectorAll("${step.selector}")].map((node) => node.href); + return [...document.querySelectorAll("${ + step.selector + }")].map((node) => node.href); }); - if(urls.length >= ${amountToExtract}){ + if(${ + amountToExtract === "" ? "false" : `urls.length >= ${amountToExtract}` + }){ urls = urls.slice(0, ${amountToExtract}) } else { let i = 0 @@ -366,18 +370,30 @@ const parseLoopFromStep = (step: Step) => { }catch{ break; } - const firstLinkInNewPage = ( - await page.$("${step.selector}") - ).href; - if (!firstLinkInNewPage || firstLinkInNewPage === firstLinkInCurrentPage) { + let firstLinkInNewPage = await page.evaluate(() => {return document.querySelector("${ + step.selector + }").href}); + if (firstLinkInNewPage === firstLinkInCurrentPage) { // There is some kind of loading state we need to wait for await page.waitForTimeout(4000); + firstLinkInNewPage = await page.evaluate(() => {return document.querySelector("${ + step.selector + }").href}); + if (firstLinkInNewPage === firstLinkInCurrentPage) { + break; + } } const newUrls = await page.evaluate(() => { - return [...document.querySelectorAll("${step.selector}")].map(node => node.href); + return [...document.querySelectorAll("${ + step.selector + }")].map(node => node.href); }) urls = urls.concat(newUrls) - if (urls.length >= ${amountToExtract}) { + if (${ + amountToExtract === "" + ? "false" + : `urls.length >= ${amountToExtract}` + }) { urls = urls.slice(0, ${amountToExtract}) break; } @@ -460,6 +476,7 @@ const parseLibrarySettings = (library: "puppeteer" | "playwright") => { "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", + "--window-size=1300,1024" ], });`; }