From ee403719e0d77bea13c61752cfc1f0c022a93f30 Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Thu, 10 Jul 2025 18:30:33 +0200 Subject: [PATCH 1/6] fix(scrape-single-url): return the only dataset item after the run is finished --- .../scrape-single-url/scrape-single-url.mjs | 46 ++++++++++++++++++- components/apify/apify.app.mjs | 9 ++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/components/apify/actions/scrape-single-url/scrape-single-url.mjs b/components/apify/actions/scrape-single-url/scrape-single-url.mjs index 2de46d5b103bc..3f4c91425c84a 100644 --- a/components/apify/actions/scrape-single-url/scrape-single-url.mjs +++ b/components/apify/actions/scrape-single-url/scrape-single-url.mjs @@ -36,7 +36,7 @@ export default { }, }, async run({ $ }) { - const response = await this.apify.runActor({ + const startActorResponse = await this.apify.runActorAsynchronously({ $, actorId: ACTOR_ID, data: { @@ -51,7 +51,49 @@ export default { ], }, }); + + console.log("Started Actor run:", startActorResponse); + + const { + data: { + id: runId, defaultDatasetId, + }, + } = startActorResponse; + console.log("Actor Run ID:", runId); + console.log("Dataset ID:", defaultDatasetId); + + let actorRunStatus = null; + let retries = 0; + const maxRetries = 30; + const delay = 5 * 1000; + + while (actorRunStatus !== "SUCCEEDED" && actorRunStatus !== "FAILED" && retries < maxRetries) { + await new Promise((resolve) => setTimeout(resolve, delay)); + const runDetails = await this.apify.getActorRun({ + $, + runId, + }); + actorRunStatus = runDetails.data.status; + console.log(`Actor run status: ${actorRunStatus} (retry: ${retries + 1}/${maxRetries})`); + retries++; + } + + if (actorRunStatus !== "SUCCEEDED") { + throw new Error(`Actor run did not succeed. Final status: ${actorRunStatus}`); + } + + const datasetResponse = await this.apify.listDatasetItems({ + $, + datasetId: defaultDatasetId, + params: { + limit: 1, + offset: 0, + }, + }); + + console.log(datasetResponse); + $.export("$summary", `Successfully scraped content from ${this.url}`); - return response; + return datasetResponse[0]; }, }; diff --git a/components/apify/apify.app.mjs b/components/apify/apify.app.mjs index 1a3e46a0df318..a5b9bd65605cc 100644 --- a/components/apify/apify.app.mjs +++ b/components/apify/apify.app.mjs @@ -195,6 +195,15 @@ export default { ...opts, }); }, + getActorRun({ + runId, ...opts + }) { + return this._makeRequest({ + method: "GET", + path: `/actor-runs/${runId}`, + ...opts, + }); + }, runActorAsynchronously({ actorId, ...opts }) { From a30a10c5076628a7b2c2f72fc1ab922308648946 Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Thu, 10 Jul 2025 18:52:39 +0200 Subject: [PATCH 2/6] fix(scrape-single-url): version up --- .../apify/actions/scrape-single-url/scrape-single-url.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/apify/actions/scrape-single-url/scrape-single-url.mjs b/components/apify/actions/scrape-single-url/scrape-single-url.mjs index 3f4c91425c84a..cd3cb0f0c4c13 100644 --- a/components/apify/actions/scrape-single-url/scrape-single-url.mjs +++ b/components/apify/actions/scrape-single-url/scrape-single-url.mjs @@ -5,7 +5,7 @@ export default { key: "apify-scrape-single-url", name: "Scrape Single URL", description: "Executes a scraper on a specific website and returns its content as text. This action is perfect for extracting content from a single page.", - version: "0.0.3", + version: "0.0.4", type: "action", props: { apify, From 8736a1bbaa5fd6e10198b814801f6228fee7f2b2 Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Thu, 10 Jul 2025 21:20:00 +0200 Subject: [PATCH 3/6] fix(scrape-single-url): version up --- components/apify/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/apify/package.json b/components/apify/package.json index 27476a27bb947..40e938fefcb5e 100644 --- a/components/apify/package.json +++ b/components/apify/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/apify", - "version": "0.2.1", + "version": "0.2.2", "description": "Pipedream Apify Components", "main": "apify.app.mjs", "keywords": [ From b51dc672a42fe6b61022bbd9e164ebe4a962ccde Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Mon, 21 Jul 2025 11:44:24 +0200 Subject: [PATCH 4/6] fix(scrape-single-url): version up --- .../apify/actions/get-dataset-items/get-dataset-items.mjs | 2 +- components/apify/actions/run-actor/run-actor.mjs | 2 +- .../actions/run-task-synchronously/run-task-synchronously.mjs | 2 +- .../set-key-value-store-record/set-key-value-store-record.mjs | 2 +- .../new-finished-actor-run-instant.mjs | 2 +- .../new-finished-task-run-instant.mjs | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/components/apify/actions/get-dataset-items/get-dataset-items.mjs b/components/apify/actions/get-dataset-items/get-dataset-items.mjs index f596171377d99..1143996e19e32 100644 --- a/components/apify/actions/get-dataset-items/get-dataset-items.mjs +++ b/components/apify/actions/get-dataset-items/get-dataset-items.mjs @@ -5,7 +5,7 @@ export default { key: "apify-get-dataset-items", name: "Get Dataset Items", description: "Returns data stored in a dataset. [See the documentation](https://docs.apify.com/api/v2/dataset-items-get)", - version: "0.0.2", + version: "0.0.3", type: "action", props: { apify, diff --git a/components/apify/actions/run-actor/run-actor.mjs b/components/apify/actions/run-actor/run-actor.mjs index c4a7c6089c3ce..30e37e9028bde 100644 --- a/components/apify/actions/run-actor/run-actor.mjs +++ b/components/apify/actions/run-actor/run-actor.mjs @@ -7,7 +7,7 @@ export default { key: "apify-run-actor", name: "Run Actor", description: "Performs an execution of a selected actor in Apify. [See the documentation](https://docs.apify.com/api/v2#/reference/actors/run-collection/run-actor)", - version: "0.0.3", + version: "0.0.4", type: "action", props: { apify, diff --git a/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs b/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs index b2153c9f219f2..b493fb31b1e51 100644 --- a/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs +++ b/components/apify/actions/run-task-synchronously/run-task-synchronously.mjs @@ -4,7 +4,7 @@ export default { key: "apify-run-task-synchronously", name: "Run Task Synchronously", description: "Run a specific task and return its dataset items. [See the documentation](https://docs.apify.com/api/v2/actor-task-run-sync-get-dataset-items-get)", - version: "0.0.2", + version: "0.0.3", type: "action", props: { apify, diff --git a/components/apify/actions/set-key-value-store-record/set-key-value-store-record.mjs b/components/apify/actions/set-key-value-store-record/set-key-value-store-record.mjs index 4586b75d940a3..ddee823cc47b2 100644 --- a/components/apify/actions/set-key-value-store-record/set-key-value-store-record.mjs +++ b/components/apify/actions/set-key-value-store-record/set-key-value-store-record.mjs @@ -5,7 +5,7 @@ export default { key: "apify-set-key-value-store-record", name: "Set Key-Value Store Record", description: "Create or update a record in the key-value store of Apify. [See the documentation](https://docs.apify.com/api/v2#/reference/key-value-stores/record-collection/put-record)", - version: "0.0.3", + version: "0.0.4", type: "action", props: { apify, diff --git a/components/apify/sources/new-finished-actor-run-instant/new-finished-actor-run-instant.mjs b/components/apify/sources/new-finished-actor-run-instant/new-finished-actor-run-instant.mjs index 20426d00d1682..807de7a7bcd2f 100644 --- a/components/apify/sources/new-finished-actor-run-instant/new-finished-actor-run-instant.mjs +++ b/components/apify/sources/new-finished-actor-run-instant/new-finished-actor-run-instant.mjs @@ -6,7 +6,7 @@ export default { key: "apify-new-finished-actor-run-instant", name: "New Finished Actor Run (Instant)", description: "Emit new event when a selected actor is run and finishes.", - version: "0.0.3", + version: "0.0.4", type: "source", dedupe: "unique", props: { diff --git a/components/apify/sources/new-finished-task-run-instant/new-finished-task-run-instant.mjs b/components/apify/sources/new-finished-task-run-instant/new-finished-task-run-instant.mjs index 3d7f8370d7532..3b5950a55d83c 100644 --- a/components/apify/sources/new-finished-task-run-instant/new-finished-task-run-instant.mjs +++ b/components/apify/sources/new-finished-task-run-instant/new-finished-task-run-instant.mjs @@ -6,7 +6,7 @@ export default { key: "apify-new-finished-task-run-instant", name: "New Finished Task Run (Instant)", description: "Emit new event when a selected task is run and finishes.", - version: "0.0.3", + version: "0.0.4", type: "source", dedupe: "unique", props: { From 26a52b15de459b99daca17d11fc72b911fbdfcc4 Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Wed, 23 Jul 2025 09:57:07 +0200 Subject: [PATCH 5/6] fix(scrape-single-url): introduce a job status constant, expand a list of terminal statuses to stop the loop --- .../scrape-single-url/scrape-single-url.mjs | 15 +++++++-------- components/apify/common/constants.mjs | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/components/apify/actions/scrape-single-url/scrape-single-url.mjs b/components/apify/actions/scrape-single-url/scrape-single-url.mjs index cd3cb0f0c4c13..b73a0b2a23a43 100644 --- a/components/apify/actions/scrape-single-url/scrape-single-url.mjs +++ b/components/apify/actions/scrape-single-url/scrape-single-url.mjs @@ -1,5 +1,7 @@ import apify from "../../apify.app.mjs"; -import { ACTOR_ID } from "../../common/constants.mjs"; +import { + ACTOR_ID, ACTOR_JOB_STATUSES, ACTOR_JOB_TERMINAL_STATUSES, +} from "../../common/constants.mjs"; export default { key: "apify-scrape-single-url", @@ -52,33 +54,30 @@ export default { }, }); - console.log("Started Actor run:", startActorResponse); - const { data: { id: runId, defaultDatasetId, }, } = startActorResponse; - console.log("Actor Run ID:", runId); - console.log("Dataset ID:", defaultDatasetId); let actorRunStatus = null; let retries = 0; const maxRetries = 30; const delay = 5 * 1000; - while (actorRunStatus !== "SUCCEEDED" && actorRunStatus !== "FAILED" && retries < maxRetries) { + while ((!actorRunStatus || !ACTOR_JOB_TERMINAL_STATUSES.includes(actorRunStatus)) + && retries < maxRetries + ) { await new Promise((resolve) => setTimeout(resolve, delay)); const runDetails = await this.apify.getActorRun({ $, runId, }); actorRunStatus = runDetails.data.status; - console.log(`Actor run status: ${actorRunStatus} (retry: ${retries + 1}/${maxRetries})`); retries++; } - if (actorRunStatus !== "SUCCEEDED") { + if (actorRunStatus !== ACTOR_JOB_STATUSES.SUCCEEDED) { throw new Error(`Actor run did not succeed. Final status: ${actorRunStatus}`); } diff --git a/components/apify/common/constants.mjs b/components/apify/common/constants.mjs index 869e38c5bd157..997b673db968a 100644 --- a/components/apify/common/constants.mjs +++ b/components/apify/common/constants.mjs @@ -9,3 +9,21 @@ export const EVENT_TYPES = [ "ACTOR.RUN.TIMED_OUT", "ACTOR.RUN.RESURRECTED", ]; + +export const ACTOR_JOB_STATUSES = { + READY: "READY", // started but not allocated to any worker yet + RUNNING: "RUNNING", // running on worker + SUCCEEDED: "SUCCEEDED", // finished and all good + FAILED: "FAILED", // run or build failed + TIMING_OUT: "TIMING-OUT", // timing out now + TIMED_OUT: "TIMED-OUT", // timed out + ABORTING: "ABORTING", // being aborted by user + ABORTED: "ABORTED", // aborted by user +}; + +export const ACTOR_JOB_TERMINAL_STATUSES = [ + ACTOR_JOB_STATUSES.SUCCEEDED, + ACTOR_JOB_STATUSES.FAILED, + ACTOR_JOB_STATUSES.TIMED_OUT, + ACTOR_JOB_STATUSES.ABORTED, +]; From 44a839e942eaeb005323c1470b44e3c6b723eba7 Mon Sep 17 00:00:00 2001 From: oleksandravalko Date: Thu, 31 Jul 2025 10:26:43 +0200 Subject: [PATCH 6/6] fix(scrape-single-url): import constants from package, decrease delay in between calls --- .../apify/actions/run-actor/run-actor.mjs | 4 +-- .../scrape-single-url/scrape-single-url.mjs | 7 ++--- components/apify/common/constants.mjs | 27 ------------------- 3 files changed, 6 insertions(+), 32 deletions(-) diff --git a/components/apify/actions/run-actor/run-actor.mjs b/components/apify/actions/run-actor/run-actor.mjs index 84febb9e9b6a8..d31e8a7aa589b 100644 --- a/components/apify/actions/run-actor/run-actor.mjs +++ b/components/apify/actions/run-actor/run-actor.mjs @@ -1,7 +1,7 @@ /* eslint-disable no-unused-vars */ import apify from "../../apify.app.mjs"; import { parseObject } from "../../common/utils.mjs"; -import { EVENT_TYPES } from "../../common/constants.mjs"; +import { WEBHOOK_EVENT_TYPES } from "@apify/consts"; export default { key: "apify-run-actor", @@ -186,7 +186,7 @@ export default { type: "string[]", label: "Event Types", description: "The types of events to send to the webhook", - options: EVENT_TYPES, + options: Object.values(WEBHOOK_EVENT_TYPES), }; } return props; diff --git a/components/apify/actions/scrape-single-url/scrape-single-url.mjs b/components/apify/actions/scrape-single-url/scrape-single-url.mjs index b73a0b2a23a43..e00d6c3d40bf6 100644 --- a/components/apify/actions/scrape-single-url/scrape-single-url.mjs +++ b/components/apify/actions/scrape-single-url/scrape-single-url.mjs @@ -1,7 +1,8 @@ import apify from "../../apify.app.mjs"; +import { ACTOR_ID } from "../../common/constants.mjs"; import { - ACTOR_ID, ACTOR_JOB_STATUSES, ACTOR_JOB_TERMINAL_STATUSES, -} from "../../common/constants.mjs"; + ACTOR_JOB_STATUSES, ACTOR_JOB_TERMINAL_STATUSES, +} from "@apify/consts"; export default { key: "apify-scrape-single-url", @@ -63,7 +64,7 @@ export default { let actorRunStatus = null; let retries = 0; const maxRetries = 30; - const delay = 5 * 1000; + const delay = 1000; while ((!actorRunStatus || !ACTOR_JOB_TERMINAL_STATUSES.includes(actorRunStatus)) && retries < maxRetries diff --git a/components/apify/common/constants.mjs b/components/apify/common/constants.mjs index 997b673db968a..a16d51a32af1a 100644 --- a/components/apify/common/constants.mjs +++ b/components/apify/common/constants.mjs @@ -1,29 +1,2 @@ export const ACTOR_ID = "aYG0l9s7dbB7j3gbS"; export const LIMIT = 100; - -export const EVENT_TYPES = [ - "ACTOR.RUN.CREATED", - "ACTOR.RUN.SUCCEEDED", - "ACTOR.RUN.FAILED", - "ACTOR.RUN.ABORTED", - "ACTOR.RUN.TIMED_OUT", - "ACTOR.RUN.RESURRECTED", -]; - -export const ACTOR_JOB_STATUSES = { - READY: "READY", // started but not allocated to any worker yet - RUNNING: "RUNNING", // running on worker - SUCCEEDED: "SUCCEEDED", // finished and all good - FAILED: "FAILED", // run or build failed - TIMING_OUT: "TIMING-OUT", // timing out now - TIMED_OUT: "TIMED-OUT", // timed out - ABORTING: "ABORTING", // being aborted by user - ABORTED: "ABORTED", // aborted by user -}; - -export const ACTOR_JOB_TERMINAL_STATUSES = [ - ACTOR_JOB_STATUSES.SUCCEEDED, - ACTOR_JOB_STATUSES.FAILED, - ACTOR_JOB_STATUSES.TIMED_OUT, - ACTOR_JOB_STATUSES.ABORTED, -];