diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index 7d170d812e3a..0b69968558ef 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -684,7 +684,6 @@ export class BasicCrawler { - // eslint-disable-next-line max-len this.log.deprecated( "The 'error' property of the crawling context is deprecated, and it is now passed as the second parameter in 'errorHandler' and 'failedRequestHandler'. Please update your code, as this property will be removed in a future version.", ); diff --git a/packages/browser-crawler/src/internals/browser-crawler.ts b/packages/browser-crawler/src/internals/browser-crawler.ts index 2427a7a9f0b7..2e86bd43c155 100644 --- a/packages/browser-crawler/src/internals/browser-crawler.ts +++ b/packages/browser-crawler/src/internals/browser-crawler.ts @@ -457,8 +457,8 @@ export abstract class BrowserCrawler< protected override async isRequestBlocked(crawlingContext: Context): Promise { const { page, response } = crawlingContext; - // eslint-disable-next-line dot-notation const blockedStatusCodes = + // eslint-disable-next-line dot-notation (this.sessionPool?.['blockedStatusCodes'].length ?? 0) > 0 ? // eslint-disable-next-line dot-notation this.sessionPool!['blockedStatusCodes'] @@ -803,8 +803,8 @@ export async function browserCrawlerEnqueueLinks({ * Extracts URLs from a given page. * @ignore */ -// eslint-disable-next-line @typescript-eslint/ban-types export async function extractUrlsFromPage( + // eslint-disable-next-line @typescript-eslint/ban-types page: { $$eval: Function }, selector: string, baseUrl: string, diff --git a/packages/browser-pool/src/browser-pool.ts b/packages/browser-pool/src/browser-pool.ts index 4ed51853a211..b50982a8c142 100644 --- a/packages/browser-pool/src/browser-pool.ts +++ b/packages/browser-pool/src/browser-pool.ts @@ -369,7 +369,6 @@ export class BrowserPool< const firstPluginName = firstPluginConstructor.name; const providedPluginName = (providedPlugin as BrowserPlugin).constructor.name; - // eslint-disable-next-line max-len throw new Error( `Browser plugin at index ${i} (${providedPluginName}) is not an instance of the same plugin as the first plugin provided (${firstPluginName}).`, ); diff --git a/packages/core/src/crawlers/crawler_commons.ts b/packages/core/src/crawlers/crawler_commons.ts index 38597c2f525b..f6457e7aa78a 100644 --- a/packages/core/src/crawlers/crawler_commons.ts +++ b/packages/core/src/crawlers/crawler_commons.ts @@ -12,9 +12,9 @@ import type { Session } from '../session_pool/session'; import type { RequestQueueOperationOptions, Dataset, RecordOptions } from '../storages'; import { KeyValueStore } from '../storages'; -// we need `Record` here, otherwise `Omit` is resolved badly -// eslint-disable-next-line export interface RestrictedCrawlingContext + // we need `Record` here, otherwise `Omit` is resolved badly + // eslint-disable-next-line extends Record { /** * The original {@apilink Request} object. @@ -160,8 +160,11 @@ export interface CrawlingContext> = {}; + private pushDataCalls: Parameters[] = []; + private addRequestsCalls: Parameters[] = []; + private enqueueLinksCalls: Parameters[] = []; constructor( diff --git a/packages/core/src/enqueue_links/shared.ts b/packages/core/src/enqueue_links/shared.ts index 505b694b26bf..970e62ad9f80 100644 --- a/packages/core/src/enqueue_links/shared.ts +++ b/packages/core/src/enqueue_links/shared.ts @@ -171,10 +171,7 @@ export function createRequests( .filter(({ url }) => { return !excludePatternObjects.some((excludePatternObject) => { const { regexp, glob } = excludePatternObject; - return ( - (regexp && url.match(regexp)) || // eslint-disable-line - (glob && minimatch(url, glob, { nocase: true })) - ); + return (regexp && url.match(regexp)) || (glob && minimatch(url, glob, { nocase: true })); }); }) .map(({ url, opts }) => { @@ -184,10 +181,7 @@ export function createRequests( for (const urlPatternObject of urlPatternObjects) { const { regexp, glob, ...requestRegExpOptions } = urlPatternObject; - if ( - (regexp && url.match(regexp)) || // eslint-disable-line - (glob && minimatch(url, glob, { nocase: true })) - ) { + if ((regexp && url.match(regexp)) || (glob && minimatch(url, glob, { nocase: true }))) { const request = typeof opts === 'string' ? { url: opts, ...requestRegExpOptions, enqueueStrategy: strategy } @@ -214,10 +208,7 @@ export function filterRequestsByPatterns(requests: Request[], patterns?: UrlPatt for (const urlPatternObject of patterns) { const { regexp, glob } = urlPatternObject; - if ( - (regexp && request.url.match(regexp)) || // eslint-disable-line - (glob && minimatch(request.url, glob, { nocase: true })) - ) { + if ((regexp && request.url.match(regexp)) || (glob && minimatch(request.url, glob, { nocase: true }))) { filtered.push(request); // Break the pattern loop, as we already matched this request once break; diff --git a/packages/core/src/session_pool/session_pool.ts b/packages/core/src/session_pool/session_pool.ts index 2bc048c53c4e..271957231298 100644 --- a/packages/core/src/session_pool/session_pool.ts +++ b/packages/core/src/session_pool/session_pool.ts @@ -240,7 +240,6 @@ export class SessionPool extends EventEmitter { } if (!this.persistStateKeyValueStoreId) { - // eslint-disable-next-line max-len this.log.debug( `No 'persistStateKeyValueStoreId' options specified, this session pool's data has been saved in the KeyValueStore with the id: ${this.keyValueStore.id}`, ); diff --git a/packages/core/src/storages/request_provider.ts b/packages/core/src/storages/request_provider.ts index 99503884ede5..ced4ad36207e 100644 --- a/packages/core/src/storages/request_provider.ts +++ b/packages/core/src/storages/request_provider.ts @@ -741,6 +741,7 @@ declare class BuiltRequestProvider extends RequestProvider { override fetchNextRequest( options?: RequestOptions | undefined, ): Promise | null>; + protected override ensureHeadIsNonEmpty(): Promise; } diff --git a/packages/http-crawler/src/internals/http-crawler.ts b/packages/http-crawler/src/internals/http-crawler.ts index 03394518cbd7..773955372efa 100644 --- a/packages/http-crawler/src/internals/http-crawler.ts +++ b/packages/http-crawler/src/internals/http-crawler.ts @@ -32,7 +32,6 @@ import { RETRY_CSS_SELECTORS, gotScraping } from '@crawlee/utils'; import * as cheerio from 'cheerio'; import type { RequestLike, ResponseLike } from 'content-type'; import contentTypeParser from 'content-type'; -// @ts-expect-error This throws a compilation error due to got-scraping being ESM only but we only import types, so its alllll gooooood import type { OptionsInit, Method, @@ -40,6 +39,7 @@ import type { Options, PlainResponse, TimeoutError as TimeoutErrorClass, + // @ts-expect-error This throws a compilation error due to got-scraping being ESM only but we only import types, so its alllll gooooood } from 'got-scraping'; import iconv from 'iconv-lite'; import mime from 'mime-types'; @@ -599,7 +599,6 @@ export class HttpCrawler< if (gotOptions.headers?.Cookie && gotOptions.headers?.cookie) { const { Cookie: upperCaseHeader, cookie: lowerCaseHeader } = gotOptions.headers; - // eslint-disable-next-line max-len this.log.warning( `Encountered mixed casing for the cookie headers in the got options for request ${request.url} (${request.id}). Their values will be merged`, ); diff --git a/packages/playwright-crawler/src/internals/utils/playwright-utils.ts b/packages/playwright-crawler/src/internals/utils/playwright-utils.ts index 91057f564990..dc04d20b7028 100644 --- a/packages/playwright-crawler/src/internals/utils/playwright-utils.ts +++ b/packages/playwright-crawler/src/internals/utils/playwright-utils.ts @@ -806,7 +806,6 @@ export function registerUtilsToContext(context: PlaywrightCrawlingContext): void context.infiniteScroll = async (options?: InfiniteScrollOptions) => infiniteScroll(context.page, options); context.saveSnapshot = async (options?: SaveSnapshotOptions) => saveSnapshot(context.page, { ...options, config: context.crawler.config }); - // eslint-disable-next-line max-len context.enqueueLinksByClickingElements = async ( options: Omit, ) => diff --git a/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts b/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts index cf0603d35e9f..daf3409293f4 100644 --- a/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts +++ b/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts @@ -275,7 +275,6 @@ export async function sendCDPCommand( const jsonPath = require.resolve('puppeteer/package.json'); const parsed = JSON.parse(await readFile(jsonPath, 'utf-8')); - // eslint-disable-next-line max-len throw new Error( `Cannot detect CDP client for Puppeteer ${parsed.version}. You should report this to Crawlee, mentioning the puppeteer version you are using.`, ); @@ -1007,7 +1006,6 @@ export function registerUtilsToContext(context: PuppeteerCrawlingContext): void await injectJQuery(context.page, { surviveNavigations: false }); }; context.parseWithCheerio = async () => parseWithCheerio(context.page); - // eslint-disable-next-line max-len context.enqueueLinksByClickingElements = async ( options: Omit, ) => diff --git a/packages/utils/src/internals/general.ts b/packages/utils/src/internals/general.ts index 4bfec532a3a5..2bb7bcb2d3ab 100644 --- a/packages/utils/src/internals/general.ts +++ b/packages/utils/src/internals/general.ts @@ -5,19 +5,21 @@ import { setTimeout } from 'node:timers/promises'; * Default regular expression to match URLs in a string that may be plain text, JSON, CSV or other. It supports common URL characters * and does not support URLs containing commas or spaces. The URLs also may contain Unicode letters (not symbols). */ +// eslint-disable-next-line export const URL_NO_COMMAS_REGEX = RegExp( 'https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+.~#?&//=\\(\\)]*)?', 'giu', -); // eslint-disable-line +); /** * Regular expression that, in addition to the default regular expression `URL_NO_COMMAS_REGEX`, supports matching commas in URL path and query. * Note, however, that this may prevent parsing URLs from comma delimited lists, or the URLs may become malformed. */ +// eslint-disable-next-line export const URL_WITH_COMMAS_REGEX = RegExp( 'https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+,.~#?&//=\\(\\)]*)?', 'giu', -); // eslint-disable-line +); let isDockerPromiseCache: Promise | undefined; diff --git a/packages/utils/src/internals/social.ts b/packages/utils/src/internals/social.ts index 34a19b6af9a2..3d415527ed6f 100644 --- a/packages/utils/src/internals/social.ts +++ b/packages/utils/src/internals/social.ts @@ -3,7 +3,8 @@ import cheerio from 'cheerio'; import { htmlToText } from './cheerio'; // Regex inspired by https://zapier.com/blog/extract-links-email-phone-regex/ -const EMAIL_REGEX_STRING = '(?:[a-z0-9!#$%&\'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\\])'; +const EMAIL_REGEX_STRING = + '(?:[a-z0-9!#$%&\'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\\])'; /** * Regular expression to exactly match a single email address. diff --git a/test/core/crawlers/browser_crawler.test.ts b/test/core/crawlers/browser_crawler.test.ts index 3ec2540db2d8..e70f590c59ff 100644 --- a/test/core/crawlers/browser_crawler.test.ts +++ b/test/core/crawlers/browser_crawler.test.ts @@ -158,7 +158,6 @@ describe('BrowserCrawler', () => { let isEvaluated = false; const browserCrawler = new (class extends BrowserCrawlerTest { - // eslint-disable-next-line max-len protected override async _navigationHandler( ctx: PuppeteerCrawlingContext, gotoOptions: PuppeteerGoToOptions, @@ -292,7 +291,6 @@ describe('BrowserCrawler', () => { }); let optionsGoto: PuppeteerGoToOptions; const browserCrawler = new (class extends BrowserCrawlerTest { - // eslint-disable-next-line max-len protected override async _navigationHandler( ctx: PuppeteerCrawlingContext, gotoOptions: PuppeteerGoToOptions, @@ -941,7 +939,6 @@ describe('BrowserCrawler', () => { await crawler.run([serverAddress]); expect(spy).toBeCalled(); - // eslint-disable-next-line max-len expect(spy.mock.calls[0][0]).toEqual( 'When using RequestList and RequestQueue at the same time, you should instantiate both explicitly and provide them in the crawler options, to ensure correctly handled restarts of the crawler.', ); diff --git a/test/core/crawlers/cheerio_crawler.test.ts b/test/core/crawlers/cheerio_crawler.test.ts index 43158e78f66e..7696711c7040 100644 --- a/test/core/crawlers/cheerio_crawler.test.ts +++ b/test/core/crawlers/cheerio_crawler.test.ts @@ -191,7 +191,6 @@ describe('CheerioCrawler', () => { maxConcurrency: 2, }); - // eslint-disable-next-line max-len await expect(cheerioCrawler.run()).rejects.toThrow( "Route not found for label 'undefined'. You must set up a route for this label or a default route. Use `requestHandler`, `router.addHandler` or `router.addDefaultHandler`.", ); diff --git a/test/core/crawlers/puppeteer_crawler.test.ts b/test/core/crawlers/puppeteer_crawler.test.ts index 7de9d2aa6779..140215ff6c80 100644 --- a/test/core/crawlers/puppeteer_crawler.test.ts +++ b/test/core/crawlers/puppeteer_crawler.test.ts @@ -154,8 +154,8 @@ describe('PuppeteerCrawler', () => { test('should throw if launchOptions.proxyUrl is supplied', async () => { try { + // eslint-disable-next-line new PuppeteerCrawler({ - //eslint-disable-line requestList, maxRequestRetries: 0, maxConcurrency: 1, diff --git a/test/core/error_tracker.test.ts b/test/core/error_tracker.test.ts index ade00956e7f0..f15cb00acbdb 100644 --- a/test/core/error_tracker.test.ts +++ b/test/core/error_tracker.test.ts @@ -1,6 +1,3 @@ -/* eslint-disable no-multi-spaces */ -import exp from 'node:constants'; - import { ErrorTracker } from '../../packages/utils/src/internals/error_tracker'; const random = () => Math.random().toString(36).slice(2); diff --git a/test/core/puppeteer_request_interception.test.ts b/test/core/puppeteer_request_interception.test.ts index 380483f71c5f..ee9abfeeea4d 100644 --- a/test/core/puppeteer_request_interception.test.ts +++ b/test/core/puppeteer_request_interception.test.ts @@ -173,7 +173,7 @@ describe('utils.puppeteer.addInterceptRequestHandler|removeInterceptRequestHandl // Override headers const headers = { ...request.headers(), - accept: 'text/html', + 'accept': 'text/html', 'accept-language': 'en-GB', 'upgrade-insecure-requests': '2', }; diff --git a/test/shared/data/html_to_text_test_data.ts b/test/shared/data/html_to_text_test_data.ts index c7832f97e380..ad123764f957 100644 --- a/test/shared/data/html_to_text_test_data.ts +++ b/test/shared/data/html_to_text_test_data.ts @@ -1,5 +1,3 @@ -/* eslint-disable */ - // The whitespace in the text is important. Don't change it. // We're keeping this text as a JS string, because git and other // tools do magic with line endings and it can break tests. diff --git a/test/utils/general.test.ts b/test/utils/general.test.ts index a6760fe93c00..f7dc788e8d43 100644 --- a/test/utils/general.test.ts +++ b/test/utils/general.test.ts @@ -84,10 +84,10 @@ describe('sleep()', () => { describe('snakeCaseToCamelCase()', () => { test('should camel case all sneaky cases of snake case', () => { const tests = { - aaa_bbb_: 'aaaBbb', + 'aaa_bbb_': 'aaaBbb', '': '', - AaA_bBb_cCc: 'aaaBbbCcc', - a_1_b_1a: 'a1B1a', + 'AaA_bBb_cCc': 'aaaBbbCcc', + 'a_1_b_1a': 'a1B1a', }; Object.entries(tests).forEach(([snakeCase, camelCase]) => {