diff --git a/packages/core/src/request.ts b/packages/core/src/request.ts index 7fc500be96b..4b5c67940e5 100644 --- a/packages/core/src/request.ts +++ b/packages/core/src/request.ts @@ -51,6 +51,9 @@ export enum RequestState { * are considered as pointing to the same web resource. This behavior applies to all Crawlee classes, * such as {@apilink RequestList}, {@apilink RequestQueue}, {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler}. * + * > To access and examine the actual request sent over http, with all autofilled headers you can access + * `response.request` object from the request handler + * * Example use: * * ```javascript diff --git a/packages/http-crawler/src/internals/http-crawler.ts b/packages/http-crawler/src/internals/http-crawler.ts index 9c8cbf914ad..d797a5d0227 100644 --- a/packages/http-crawler/src/internals/http-crawler.ts +++ b/packages/http-crawler/src/internals/http-crawler.ts @@ -27,7 +27,7 @@ import type { RequestLike, ResponseLike } from 'content-type'; import * as cheerio from 'cheerio'; import contentTypeParser from 'content-type'; import mime from 'mime-types'; -import type { OptionsInit, Method, Request as GotRequest, Options } from 'got-scraping'; +import type { OptionsInit, Method, Request as GotRequest, Options, PlainResponse } from 'got-scraping'; import { gotScraping, TimeoutError } from 'got-scraping'; import type { JsonValue } from 'type-fest'; import { extname } from 'node:path'; @@ -191,7 +191,7 @@ export interface InternalHttpCrawlingContext< * Parsed `Content-Type header: { type, encoding }`. */ contentType: { type: string; encoding: BufferEncoding }; - response: IncomingMessage; + response: PlainResponse; parseWithCheerio(): Promise; } @@ -569,7 +569,7 @@ export class HttpCrawler { + protected async _requestFunction({ request, session, proxyUrl, gotOptions }: RequestFunctionOptions): Promise { const opts = this._getRequestOptions(request, session, proxyUrl, gotOptions); try { @@ -577,7 +577,7 @@ export class HttpCrawler { - return new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { const stream = gotScraping(options); - stream.on('redirect', (updatedOptions: Options, redirectResponse: IncomingMessage) => { + stream.on('redirect', (updatedOptions: Options, redirectResponse: PlainResponse) => { if (this.persistCookiesPerSession) { session!.setCookiesFromResponse(redirectResponse); @@ -812,11 +812,11 @@ function addResponsePropertiesToStream(stream: GotRequest) { for (const prop of properties) { if (!(prop in stream)) { // @ts-expect-error - stream[prop] = response[prop as keyof IncomingMessage]; + stream[prop] = response[prop as keyof PlainResponse]; } } - return stream as unknown as IncomingMessage; + return stream as unknown as PlainResponse; } /** diff --git a/test/core/crawlers/cheerio_crawler.test.ts b/test/core/crawlers/cheerio_crawler.test.ts index d459c8865fa..6021f56f713 100644 --- a/test/core/crawlers/cheerio_crawler.test.ts +++ b/test/core/crawlers/cheerio_crawler.test.ts @@ -380,8 +380,6 @@ describe('CheerioCrawler', () => { const crawler = new CheerioCrawler({ requestList, requestHandler: ({ response }) => { - // TODO: this accesses IncomingMessage#request, which doesn't exist according to types - // @ts-expect-error headers.push(response.request.options.headers); }, });