Skip to content

Commit

Permalink
feat: enable migration testing (#1583)
Browse files Browse the repository at this point in the history
  • Loading branch information
szmarczak committed Oct 21, 2022
1 parent 66ad416 commit ee3a68f
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 0 deletions.
6 changes: 6 additions & 0 deletions packages/basic-crawler/src/internals/basic-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,12 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
await this._init();
await this.stats.startCapturing();

process.once('SIGINT', async () => {
this.log.warning('Pausing... Press CTRL+C again to force exit. To resume, do: CRAWLEE_PURGE_ON_START=0 npm run start');
await this._pauseOnMigration();
await this.autoscaledPool!.abort();
});

try {
this.log.info('Starting the crawl');
await this.autoscaledPool!.run();
Expand Down
7 changes: 7 additions & 0 deletions test/e2e/migration/actor/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.idea
.DS_Store
node_modules
package-lock.json
apify_storage
crawlee_storage
storage
16 changes: 16 additions & 0 deletions test/e2e/migration/actor/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM apify/actor-node:16-beta

COPY packages ./packages
COPY package*.json ./

RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& npm update \
&& echo "Installed NPM packages:" \
&& (npm list --only=prod --no-optional --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version

COPY . ./
6 changes: 6 additions & 0 deletions test/e2e/migration/actor/apify.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"name": "test-migration",
"version": "0.0",
"buildTag": "latest",
"env": null
}
35 changes: 35 additions & 0 deletions test/e2e/migration/actor/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { Actor } from 'apify';
import { CheerioCrawler, Configuration, Dataset } from '@crawlee/cheerio';
import { ApifyStorageLocal } from '@apify/storage-local';

process.env.CRAWLEE_PURGE_ON_START = '0';

const mainOptions = {
exit: Actor.isAtHome(),
storage: process.env.STORAGE_IMPLEMENTATION === 'LOCAL' ? new ApifyStorageLocal() : undefined,
};

const run = async () => {
await Actor.main(async () => {
const crawler = new CheerioCrawler({
maxConcurrency: 1,
maxRequestsPerCrawl: 5,
async requestHandler({ enqueueLinks, request }) {
const { url } = request;
await enqueueLinks({ pseudoUrls: ['https://apify.com[(/[\\w-]+)?]'] });

await Dataset.pushData({ url });

process.emit('SIGINT');
},
});

// eslint-disable-next-line no-underscore-dangle
Configuration.getGlobalConfig().getStorageClient().__purged = false;

await crawler.run(['https://apify.com']);
}, mainOptions);
};

await run();
await run();
28 changes: 28 additions & 0 deletions test/e2e/migration/actor/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"name": "test-migration",
"version": "0.0.1",
"description": "Migration Test",
"dependencies": {
"apify": "next",
"@apify/storage-local": "^2.1.0",
"@crawlee/basic": "file:./packages/basic-crawler",
"@crawlee/browser-pool": "file:./packages/browser-pool",
"@crawlee/http": "file:./packages/http-crawler",
"@crawlee/cheerio": "file:./packages/cheerio-crawler",
"@crawlee/core": "file:./packages/core",
"@crawlee/memory-storage": "file:./packages/memory-storage",
"@crawlee/types": "file:./packages/types",
"@crawlee/utils": "file:./packages/utils"
},
"overrides": {
"apify": {
"@crawlee/core": "file:./packages/core",
"@crawlee/utils": "file:./packages/utils"
}
},
"scripts": {
"start": "node main.js"
},
"type": "module",
"license": "ISC"
}
10 changes: 10 additions & 0 deletions test/e2e/migration/test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { initialize, getActorTestDir, runActor, expect, validateDataset } from '../tools.mjs';

const testActorDirname = getActorTestDir(import.meta.url);
await initialize(testActorDirname);

const { datasetItems } = await runActor(testActorDirname);

await expect(datasetItems.length === 2, 'Number of dataset items');
await expect(validateDataset(datasetItems, ['url']), 'Dataset items validation');
await expect(datasetItems[0].url !== datasetItems[1].url, 'Dataset items unique');

0 comments on commit ee3a68f

Please sign in to comment.