Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: support DELETE requests in HttpCrawler #2039

Merged
merged 1 commit into from Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/http-crawler/src/internals/http-crawler.ts
Expand Up @@ -814,6 +814,11 @@ export class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any, H
}
});

// We need to end the stream for DELETE requests, otherwise it will hang.
if (options.method && ['DELETE', 'delete'].includes(options.method)) {
stream.end();
}

stream.on('error', reject);
stream.on('response', () => {
resolve(addResponsePropertiesToStream(stream));
Expand Down
32 changes: 30 additions & 2 deletions test/core/crawlers/cheerio_crawler.test.ts
Expand Up @@ -507,7 +507,7 @@ describe('CheerioCrawler', () => {
});
});

test('should ignore non http error status codes set by user', async () => {
test('should ignore http error status codes set by user', async () => {
const requestList = await getRequestListForMock({
headers: {
'content-type': 'text/plain',
Expand All @@ -534,7 +534,7 @@ describe('CheerioCrawler', () => {
expect(failed).toHaveLength(0);
});

test('should throw and error on http error status codes set by user', async () => {
test('should throw an error on http error status codes set by user', async () => {
const requestList = await getRequestListForMirror();
const failed: Request[] = [];

Expand Down Expand Up @@ -1290,6 +1290,34 @@ describe('CheerioCrawler', () => {
expect(cheerioCrawler.requestHandler).toBeUndefined();
});
});

test('should work with delete requests', async () => {
const sources: Source[] = [1, 2, 3, 4].map((num) => {
return {
url: `${serverAddress}/special/mock?a=${num}`,
method: 'DELETE',
};
});
const requestList = await RequestList.open(null, sources);

const failed: Request[] = [];

const cheerioCrawler = new CheerioCrawler({
requestList,
maxConcurrency: 1,
maxRequestRetries: 0,
navigationTimeoutSecs: 5,
requestHandlerTimeoutSecs: 5,
requestHandler: async () => {},
failedRequestHandler: async ({ request }) => {
failed.push(request);
},
});

await cheerioCrawler.run();

expect(failed).toHaveLength(0);
});
});

async function getRequestListForMock(mockData: Dictionary, pathName = 'special/mock') {
Expand Down
26 changes: 24 additions & 2 deletions test/core/crawlers/http_crawler.test.ts
Expand Up @@ -279,7 +279,7 @@ test('POST with undefined (empty) payload', async () => {
expect(results).toStrictEqual(['']);
});

test('should ignore non http error status codes set by user', async () => {
test('should ignore http error status codes set by user', async () => {
const failed: any[] = [];

const crawler = new HttpCrawler({
Expand All @@ -298,7 +298,7 @@ test('should ignore non http error status codes set by user', async () => {
expect(failed).toHaveLength(0);
});

test('should throw and error on http error status codes set by user', async () => {
test('should throw an error on http error status codes set by user', async () => {
const failed: any[] = [];

const crawler = new HttpCrawler({
Expand All @@ -316,3 +316,25 @@ test('should throw and error on http error status codes set by user', async () =
expect(crawler.autoscaledPool.minConcurrency).toBe(2);
expect(failed).toHaveLength(1);
});

test('should work with delete requests', async () => {
const failed: any[] = [];

const cheerioCrawler = new HttpCrawler({
maxConcurrency: 1,
maxRequestRetries: 0,
navigationTimeoutSecs: 5,
requestHandlerTimeoutSecs: 5,
requestHandler: async () => {},
failedRequestHandler: async ({ request }) => {
failed.push(request);
},
});

await cheerioCrawler.run([{
url: `${url}`,
method: 'DELETE',
}]);

expect(failed).toHaveLength(0);
});