Skip to content

Commit

Permalink
fix(http-crawler): replace IncomingMessage with PlainResponse for…
Browse files Browse the repository at this point in the history
… context's `response` (#1973)

Closes #1964
  • Loading branch information
foxt451 committed Jul 12, 2023
1 parent eeefab6 commit 2a1cc7f
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
3 changes: 3 additions & 0 deletions packages/core/src/request.ts
Expand Up @@ -51,6 +51,9 @@ export enum RequestState {
* are considered as pointing to the same web resource. This behavior applies to all Crawlee classes,
* such as {@apilink RequestList}, {@apilink RequestQueue}, {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler}.
*
* > To access and examine the actual request sent over http, with all autofilled headers you can access
* `response.request` object from the request handler
*
* Example use:
*
* ```javascript
Expand Down
16 changes: 8 additions & 8 deletions packages/http-crawler/src/internals/http-crawler.ts
Expand Up @@ -27,7 +27,7 @@ import type { RequestLike, ResponseLike } from 'content-type';
import * as cheerio from 'cheerio';
import contentTypeParser from 'content-type';
import mime from 'mime-types';
import type { OptionsInit, Method, Request as GotRequest, Options } from 'got-scraping';
import type { OptionsInit, Method, Request as GotRequest, Options, PlainResponse } from 'got-scraping';
import { gotScraping, TimeoutError } from 'got-scraping';
import type { JsonValue } from 'type-fest';
import { extname } from 'node:path';
Expand Down Expand Up @@ -191,7 +191,7 @@ export interface InternalHttpCrawlingContext<
* Parsed `Content-Type header: { type, encoding }`.
*/
contentType: { type: string; encoding: BufferEncoding };
response: IncomingMessage;
response: PlainResponse;

parseWithCheerio(): Promise<cheerio.CheerioAPI>;
}
Expand Down Expand Up @@ -569,15 +569,15 @@ export class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any, H
* on the request such as only downloading the request body if the
* received content type matches text/html, application/xml, application/xhtml+xml.
*/
protected async _requestFunction({ request, session, proxyUrl, gotOptions }: RequestFunctionOptions): Promise<IncomingMessage> {
protected async _requestFunction({ request, session, proxyUrl, gotOptions }: RequestFunctionOptions): Promise<PlainResponse> {
const opts = this._getRequestOptions(request, session, proxyUrl, gotOptions);

try {
return await this._requestAsBrowser(opts, session);
} catch (e) {
if (e instanceof TimeoutError) {
this._handleRequestTimeout(session);
return undefined as unknown as IncomingMessage;
return undefined as unknown as PlainResponse;
}

throw e;
Expand Down Expand Up @@ -752,10 +752,10 @@ export class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any, H
* @internal wraps public utility for mocking purposes
*/
private _requestAsBrowser = (options: OptionsInit & { isStream: true }, session?: Session) => {
return new Promise<IncomingMessage>((resolve, reject) => {
return new Promise<PlainResponse>((resolve, reject) => {
const stream = gotScraping(options);

stream.on('redirect', (updatedOptions: Options, redirectResponse: IncomingMessage) => {
stream.on('redirect', (updatedOptions: Options, redirectResponse: PlainResponse) => {
if (this.persistCookiesPerSession) {
session!.setCookiesFromResponse(redirectResponse);

Expand Down Expand Up @@ -812,11 +812,11 @@ function addResponsePropertiesToStream(stream: GotRequest) {
for (const prop of properties) {
if (!(prop in stream)) {
// @ts-expect-error
stream[prop] = response[prop as keyof IncomingMessage];
stream[prop] = response[prop as keyof PlainResponse];
}
}

return stream as unknown as IncomingMessage;
return stream as unknown as PlainResponse;
}

/**
Expand Down
2 changes: 0 additions & 2 deletions test/core/crawlers/cheerio_crawler.test.ts
Expand Up @@ -380,8 +380,6 @@ describe('CheerioCrawler', () => {
const crawler = new CheerioCrawler({
requestList,
requestHandler: ({ response }) => {
// TODO: this accesses IncomingMessage#request, which doesn't exist according to types
// @ts-expect-error
headers.push(response.request.options.headers);
},
});
Expand Down

0 comments on commit 2a1cc7f

Please sign in to comment.