Skip to content

Commit

Permalink
fix: support DELETE requests in HttpCrawler (#2039)
Browse files Browse the repository at this point in the history
This commit addresses an issue in the @crawlee/http (HttpCrawler) and
@crawlee/cheerio (CheerioCrawler) packages related to setting the DELETE
method for requests. The problem caused requests to fail with a timeout,
rendering the functionality unusable.

Closes #1658
  • Loading branch information
HamzaAlwan committed Aug 21, 2023
1 parent 8b3694f commit 7ea5c41
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 4 deletions.
5 changes: 5 additions & 0 deletions packages/http-crawler/src/internals/http-crawler.ts
Expand Up @@ -814,6 +814,11 @@ export class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any, H
}
});

// We need to end the stream for DELETE requests, otherwise it will hang.
if (options.method && ['DELETE', 'delete'].includes(options.method)) {
stream.end();
}

stream.on('error', reject);
stream.on('response', () => {
resolve(addResponsePropertiesToStream(stream));
Expand Down
32 changes: 30 additions & 2 deletions test/core/crawlers/cheerio_crawler.test.ts
Expand Up @@ -507,7 +507,7 @@ describe('CheerioCrawler', () => {
});
});

test('should ignore non http error status codes set by user', async () => {
test('should ignore http error status codes set by user', async () => {
const requestList = await getRequestListForMock({
headers: {
'content-type': 'text/plain',
Expand All @@ -534,7 +534,7 @@ describe('CheerioCrawler', () => {
expect(failed).toHaveLength(0);
});

test('should throw and error on http error status codes set by user', async () => {
test('should throw an error on http error status codes set by user', async () => {
const requestList = await getRequestListForMirror();
const failed: Request[] = [];

Expand Down Expand Up @@ -1290,6 +1290,34 @@ describe('CheerioCrawler', () => {
expect(cheerioCrawler.requestHandler).toBeUndefined();
});
});

test('should work with delete requests', async () => {
const sources: Source[] = [1, 2, 3, 4].map((num) => {
return {
url: `${serverAddress}/special/mock?a=${num}`,
method: 'DELETE',
};
});
const requestList = await RequestList.open(null, sources);

const failed: Request[] = [];

const cheerioCrawler = new CheerioCrawler({
requestList,
maxConcurrency: 1,
maxRequestRetries: 0,
navigationTimeoutSecs: 5,
requestHandlerTimeoutSecs: 5,
requestHandler: async () => {},
failedRequestHandler: async ({ request }) => {
failed.push(request);
},
});

await cheerioCrawler.run();

expect(failed).toHaveLength(0);
});
});

async function getRequestListForMock(mockData: Dictionary, pathName = 'special/mock') {
Expand Down
26 changes: 24 additions & 2 deletions test/core/crawlers/http_crawler.test.ts
Expand Up @@ -279,7 +279,7 @@ test('POST with undefined (empty) payload', async () => {
expect(results).toStrictEqual(['']);
});

test('should ignore non http error status codes set by user', async () => {
test('should ignore http error status codes set by user', async () => {
const failed: any[] = [];

const crawler = new HttpCrawler({
Expand All @@ -298,7 +298,7 @@ test('should ignore non http error status codes set by user', async () => {
expect(failed).toHaveLength(0);
});

test('should throw and error on http error status codes set by user', async () => {
test('should throw an error on http error status codes set by user', async () => {
const failed: any[] = [];

const crawler = new HttpCrawler({
Expand All @@ -316,3 +316,25 @@ test('should throw and error on http error status codes set by user', async () =
expect(crawler.autoscaledPool.minConcurrency).toBe(2);
expect(failed).toHaveLength(1);
});

test('should work with delete requests', async () => {
const failed: any[] = [];

const cheerioCrawler = new HttpCrawler({
maxConcurrency: 1,
maxRequestRetries: 0,
navigationTimeoutSecs: 5,
requestHandlerTimeoutSecs: 5,
requestHandler: async () => {},
failedRequestHandler: async ({ request }) => {
failed.push(request);
},
});

await cheerioCrawler.run([{
url: `${url}`,
method: 'DELETE',
}]);

expect(failed).toHaveLength(0);
});

0 comments on commit 7ea5c41

Please sign in to comment.