Skip to content

Commit

Permalink
fix: sendRequest types (#1445)
Browse files Browse the repository at this point in the history
  • Loading branch information
szmarczak committed Aug 8, 2022
1 parent 6b4c967 commit 751563b
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 14 deletions.
22 changes: 12 additions & 10 deletions packages/basic-crawler/src/internals/basic-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import {
purgeDefaultStorages,
validators,
} from '@crawlee/core';
import type { GotOptionsInit, OptionsOfTextResponseBody, Response as GotResponse } from 'got-scraping';
import type { Method, OptionsInit, Response as GotResponse } from 'got-scraping';
import { gotScraping } from 'got-scraping';
import type { ProcessedRequest, Dictionary, Awaitable, BatchAddRequestsResult } from '@crawlee/types';
import { chunk, sleep } from '@crawlee/utils';
Expand All @@ -43,7 +43,7 @@ import ow, { ArgumentError } from 'ow';
export interface BasicCrawlingContext<UserData extends Dictionary = Dictionary> extends CrawlingContext<UserData> {
crawler: BasicCrawler;
enqueueLinks: (options: BasicCrawlerEnqueueLinksOptions) => Promise<BatchAddRequestsResult>;
sendRequest: (overrideOptions?: Partial<GotOptionsInit>) => Promise<GotResponse<string>>;
sendRequest: (overrideOptions?: Partial<OptionsInit>) => Promise<GotResponse<string>>;
}

/** @internal */
Expand Down Expand Up @@ -829,10 +829,16 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
requestQueue: await this.getRequestQueue(),
});
},
sendRequest: async (overrideOptions?: GotOptionsInit) => {
sendRequest: async (overrideOptions?: OptionsInit) => {
const cookieJar = session ? {
getCookieString: async (url: string) => session!.getCookieString(url),
setCookie: async (rawCookie: string, url: string) => session!.setCookie(rawCookie, url),
...overrideOptions?.cookieJar,
} : overrideOptions?.cookieJar;

return gotScraping({
url: request!.url,
method: request!.method,
method: request!.method as Method, // Narrow type to omit CONNECT
body: request!.payload,
headers: request!.headers,
proxyUrl: crawlingContext.proxyInfo?.url,
Expand All @@ -843,12 +849,8 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
limit: 0,
...overrideOptions?.retry,
},
cookieJar: {
getCookieString: (url: string) => session!.getCookieString(url),
setCookie: (rawCookie: string, url: string) => session!.setCookie(rawCookie, url),
...overrideOptions?.cookieJar,
},
} as OptionsOfTextResponseBody);
cookieJar,
});
},
};

Expand Down
61 changes: 57 additions & 4 deletions test/core/crawlers/basic_crawler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1051,9 +1051,11 @@ describe('BasicCrawler', () => {
});

describe('sendRequest', () => {
const html = `<!DOCTYPE html><html><head><title>foobar</title></head><body><p>Hello, world!</p></body></html>`;

const httpServer = http.createServer((request, response) => {
response.setHeader('content-type', 'text/html');
response.end(`<!DOCTYPE html><html><head><title>foobar</title></head><body><p>Hello, world!</p></body></html>`);
response.end(html);
});

let url: string;
Expand All @@ -1071,21 +1073,72 @@ describe('BasicCrawler', () => {
});

test('works', async () => {
expect.assertions(2);
const responses: { statusCode: number; body: string }[] = [];

const requestList = await RequestList.open(null, [url]);

const crawler = new BasicCrawler({
useSessionPool: true,
requestList,
async requestHandler({ sendRequest }) {
const response = await sendRequest();

responses.push({
statusCode: response.statusCode,
body: response.body,
});
},
});

await crawler.run();

expect(responses).toStrictEqual([
{
statusCode: 200,
body: html,
},
]);
});

test('works without session', async () => {
const requestList = await RequestList.open(null, [url]);

const responses: { statusCode: number; body: string }[] = [];

const crawler = new BasicCrawler({
useSessionPool: false,
requestList,
async requestHandler({ sendRequest }) {
const response = await sendRequest();

expect(response.statusCode).toBe(200);
expect(response.body.includes('Hello, world!')).toBe(true);
responses.push({
statusCode: response.statusCode,
body: response.body,
});
},
});

await crawler.run();

expect(responses).toStrictEqual([
{
statusCode: 200,
body: html,
},
]);
});

test('proxyUrl TypeScript support', async () => {
const crawler = new BasicCrawler({
useSessionPool: true,
async requestHandler({ sendRequest }) {
await sendRequest({
proxyUrl: 'http://example.com',
});
},
});

expect(crawler).toBeTruthy();
});
});
});

0 comments on commit 751563b

Please sign in to comment.