diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index 7d170d812e3a..a5125f9b33f7 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -684,7 +684,6 @@ export class BasicCrawler { - // eslint-disable-next-line max-len this.log.deprecated( + // eslint-disable-next-line max-len "The 'error' property of the crawling context is deprecated, and it is now passed as the second parameter in 'errorHandler' and 'failedRequestHandler'. Please update your code, as this property will be removed in a future version.", ); diff --git a/packages/browser-crawler/src/internals/browser-crawler.ts b/packages/browser-crawler/src/internals/browser-crawler.ts index 9ec8debc000d..d5749d65fe9d 100644 --- a/packages/browser-crawler/src/internals/browser-crawler.ts +++ b/packages/browser-crawler/src/internals/browser-crawler.ts @@ -457,8 +457,8 @@ export abstract class BrowserCrawler< protected override async isRequestBlocked(crawlingContext: Context): Promise { const { page, response } = crawlingContext; - // eslint-disable-next-line dot-notation const blockedStatusCodes = + // eslint-disable-next-line dot-notation (this.sessionPool?.['blockedStatusCodes'].length ?? 0) > 0 ? // eslint-disable-next-line dot-notation this.sessionPool!['blockedStatusCodes'] @@ -803,8 +803,8 @@ export async function browserCrawlerEnqueueLinks({ * Extracts URLs from a given page. * @ignore */ -// eslint-disable-next-line @typescript-eslint/ban-types export async function extractUrlsFromPage( + // eslint-disable-next-line @typescript-eslint/ban-types page: { $$eval: Function }, selector: string, baseUrl: string, diff --git a/packages/browser-pool/src/browser-pool.ts b/packages/browser-pool/src/browser-pool.ts index 4ed51853a211..baed33967d8c 100644 --- a/packages/browser-pool/src/browser-pool.ts +++ b/packages/browser-pool/src/browser-pool.ts @@ -369,8 +369,8 @@ export class BrowserPool< const firstPluginName = firstPluginConstructor.name; const providedPluginName = (providedPlugin as BrowserPlugin).constructor.name; - // eslint-disable-next-line max-len throw new Error( + // eslint-disable-next-line max-len `Browser plugin at index ${i} (${providedPluginName}) is not an instance of the same plugin as the first plugin provided (${firstPluginName}).`, ); } diff --git a/packages/core/src/crawlers/crawler_commons.ts b/packages/core/src/crawlers/crawler_commons.ts index 38597c2f525b..f6457e7aa78a 100644 --- a/packages/core/src/crawlers/crawler_commons.ts +++ b/packages/core/src/crawlers/crawler_commons.ts @@ -12,9 +12,9 @@ import type { Session } from '../session_pool/session'; import type { RequestQueueOperationOptions, Dataset, RecordOptions } from '../storages'; import { KeyValueStore } from '../storages'; -// we need `Record` here, otherwise `Omit` is resolved badly -// eslint-disable-next-line export interface RestrictedCrawlingContext + // we need `Record` here, otherwise `Omit` is resolved badly + // eslint-disable-next-line extends Record { /** * The original {@apilink Request} object. @@ -160,8 +160,11 @@ export interface CrawlingContext> = {}; + private pushDataCalls: Parameters[] = []; + private addRequestsCalls: Parameters[] = []; + private enqueueLinksCalls: Parameters[] = []; constructor( diff --git a/packages/core/src/enqueue_links/shared.ts b/packages/core/src/enqueue_links/shared.ts index 505b694b26bf..970e62ad9f80 100644 --- a/packages/core/src/enqueue_links/shared.ts +++ b/packages/core/src/enqueue_links/shared.ts @@ -171,10 +171,7 @@ export function createRequests( .filter(({ url }) => { return !excludePatternObjects.some((excludePatternObject) => { const { regexp, glob } = excludePatternObject; - return ( - (regexp && url.match(regexp)) || // eslint-disable-line - (glob && minimatch(url, glob, { nocase: true })) - ); + return (regexp && url.match(regexp)) || (glob && minimatch(url, glob, { nocase: true })); }); }) .map(({ url, opts }) => { @@ -184,10 +181,7 @@ export function createRequests( for (const urlPatternObject of urlPatternObjects) { const { regexp, glob, ...requestRegExpOptions } = urlPatternObject; - if ( - (regexp && url.match(regexp)) || // eslint-disable-line - (glob && minimatch(url, glob, { nocase: true })) - ) { + if ((regexp && url.match(regexp)) || (glob && minimatch(url, glob, { nocase: true }))) { const request = typeof opts === 'string' ? { url: opts, ...requestRegExpOptions, enqueueStrategy: strategy } @@ -214,10 +208,7 @@ export function filterRequestsByPatterns(requests: Request[], patterns?: UrlPatt for (const urlPatternObject of patterns) { const { regexp, glob } = urlPatternObject; - if ( - (regexp && request.url.match(regexp)) || // eslint-disable-line - (glob && minimatch(request.url, glob, { nocase: true })) - ) { + if ((regexp && request.url.match(regexp)) || (glob && minimatch(request.url, glob, { nocase: true }))) { filtered.push(request); // Break the pattern loop, as we already matched this request once break; diff --git a/packages/core/src/session_pool/session_pool.ts b/packages/core/src/session_pool/session_pool.ts index 2bc048c53c4e..32d48672f8bb 100644 --- a/packages/core/src/session_pool/session_pool.ts +++ b/packages/core/src/session_pool/session_pool.ts @@ -240,8 +240,8 @@ export class SessionPool extends EventEmitter { } if (!this.persistStateKeyValueStoreId) { - // eslint-disable-next-line max-len this.log.debug( + // eslint-disable-next-line max-len `No 'persistStateKeyValueStoreId' options specified, this session pool's data has been saved in the KeyValueStore with the id: ${this.keyValueStore.id}`, ); } diff --git a/packages/core/src/storages/request_provider.ts b/packages/core/src/storages/request_provider.ts index 99503884ede5..ced4ad36207e 100644 --- a/packages/core/src/storages/request_provider.ts +++ b/packages/core/src/storages/request_provider.ts @@ -741,6 +741,7 @@ declare class BuiltRequestProvider extends RequestProvider { override fetchNextRequest( options?: RequestOptions | undefined, ): Promise | null>; + protected override ensureHeadIsNonEmpty(): Promise; } diff --git a/packages/http-crawler/src/internals/http-crawler.ts b/packages/http-crawler/src/internals/http-crawler.ts index d019cbbb60ab..ef473cef9b89 100644 --- a/packages/http-crawler/src/internals/http-crawler.ts +++ b/packages/http-crawler/src/internals/http-crawler.ts @@ -32,7 +32,6 @@ import { RETRY_CSS_SELECTORS, gotScraping } from '@crawlee/utils'; import * as cheerio from 'cheerio'; import type { RequestLike, ResponseLike } from 'content-type'; import contentTypeParser from 'content-type'; -// @ts-expect-error This throws a compilation error due to got-scraping being ESM only but we only import types, so its alllll gooooood import type { OptionsInit, Method, @@ -40,6 +39,7 @@ import type { Options, PlainResponse, TimeoutError as TimeoutErrorClass, + // @ts-expect-error This throws a compilation error due to got-scraping being ESM only but we only import types, so its alllll gooooood } from 'got-scraping'; import iconv from 'iconv-lite'; import mime from 'mime-types'; @@ -599,7 +599,6 @@ export class HttpCrawler< if (gotOptions.headers?.Cookie && gotOptions.headers?.cookie) { const { Cookie: upperCaseHeader, cookie: lowerCaseHeader } = gotOptions.headers; - // eslint-disable-next-line max-len this.log.warning( `Encountered mixed casing for the cookie headers in the got options for request ${request.url} (${request.id}). Their values will be merged`, ); diff --git a/packages/memory-storage/test/fs-fallback.test.ts b/packages/memory-storage/test/fs-fallback.test.ts index 1f014d936bc1..ed1039ff3bdb 100644 --- a/packages/memory-storage/test/fs-fallback.test.ts +++ b/packages/memory-storage/test/fs-fallback.test.ts @@ -69,16 +69,20 @@ describe('fallback to fs for reading', () => { }); }); - test('attempting to read "other" key value store with no "__metadata__" present should read from fs, even if accessed without generating id first', async () => { - const otherStore = storage.keyValueStore('other'); - - const input = await otherStore.getRecord('INPUT'); - expect(input).toStrictEqual({ - key: 'INPUT', - value: { foo: 'bar but from fs' }, - contentType: 'application/json; charset=utf-8', - }); - }); + test( + 'attempting to read "other" key value store with no "__metadata__" present should read from fs, ' + + 'even if accessed without generating id first', + async () => { + const otherStore = storage.keyValueStore('other'); + + const input = await otherStore.getRecord('INPUT'); + expect(input).toStrictEqual({ + key: 'INPUT', + value: { foo: 'bar but from fs' }, + contentType: 'application/json; charset=utf-8', + }); + }, + ); test('attempting to read non-existent "default_2" key value store should return undefined', async () => { await expect(storage.keyValueStore('default_2').get()).resolves.toBeUndefined(); diff --git a/packages/playwright-crawler/src/internals/utils/playwright-utils.ts b/packages/playwright-crawler/src/internals/utils/playwright-utils.ts index 91057f564990..dc04d20b7028 100644 --- a/packages/playwright-crawler/src/internals/utils/playwright-utils.ts +++ b/packages/playwright-crawler/src/internals/utils/playwright-utils.ts @@ -806,7 +806,6 @@ export function registerUtilsToContext(context: PlaywrightCrawlingContext): void context.infiniteScroll = async (options?: InfiniteScrollOptions) => infiniteScroll(context.page, options); context.saveSnapshot = async (options?: SaveSnapshotOptions) => saveSnapshot(context.page, { ...options, config: context.crawler.config }); - // eslint-disable-next-line max-len context.enqueueLinksByClickingElements = async ( options: Omit, ) => diff --git a/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts b/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts index cf0603d35e9f..daf3409293f4 100644 --- a/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts +++ b/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts @@ -275,7 +275,6 @@ export async function sendCDPCommand( const jsonPath = require.resolve('puppeteer/package.json'); const parsed = JSON.parse(await readFile(jsonPath, 'utf-8')); - // eslint-disable-next-line max-len throw new Error( `Cannot detect CDP client for Puppeteer ${parsed.version}. You should report this to Crawlee, mentioning the puppeteer version you are using.`, ); @@ -1007,7 +1006,6 @@ export function registerUtilsToContext(context: PuppeteerCrawlingContext): void await injectJQuery(context.page, { surviveNavigations: false }); }; context.parseWithCheerio = async () => parseWithCheerio(context.page); - // eslint-disable-next-line max-len context.enqueueLinksByClickingElements = async ( options: Omit, ) => diff --git a/packages/utils/src/internals/general.ts b/packages/utils/src/internals/general.ts index 4bfec532a3a5..2bb7bcb2d3ab 100644 --- a/packages/utils/src/internals/general.ts +++ b/packages/utils/src/internals/general.ts @@ -5,19 +5,21 @@ import { setTimeout } from 'node:timers/promises'; * Default regular expression to match URLs in a string that may be plain text, JSON, CSV or other. It supports common URL characters * and does not support URLs containing commas or spaces. The URLs also may contain Unicode letters (not symbols). */ +// eslint-disable-next-line export const URL_NO_COMMAS_REGEX = RegExp( 'https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+.~#?&//=\\(\\)]*)?', 'giu', -); // eslint-disable-line +); /** * Regular expression that, in addition to the default regular expression `URL_NO_COMMAS_REGEX`, supports matching commas in URL path and query. * Note, however, that this may prevent parsing URLs from comma delimited lists, or the URLs may become malformed. */ +// eslint-disable-next-line export const URL_WITH_COMMAS_REGEX = RegExp( 'https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+,.~#?&//=\\(\\)]*)?', 'giu', -); // eslint-disable-line +); let isDockerPromiseCache: Promise | undefined; diff --git a/packages/utils/src/internals/social.ts b/packages/utils/src/internals/social.ts index 6260b945e6ad..8de2f73d3221 100644 --- a/packages/utils/src/internals/social.ts +++ b/packages/utils/src/internals/social.ts @@ -3,8 +3,8 @@ import cheerio from 'cheerio'; import { htmlToText } from './cheerio'; // Regex inspired by https://zapier.com/blog/extract-links-email-phone-regex/ -// eslint-disable-next-line max-len const EMAIL_REGEX_STRING = + // eslint-disable-next-line max-len '(?:[a-z0-9!#$%&\'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\\])'; /** @@ -167,11 +167,9 @@ export function phonesFromUrls(urls: string[]): string[] { // - They use a negative lookbehind and lookahead assertions, which are only supported in Node 8+. // They are used to prevent matching URLs in strings like "blahttps://www.example.com" -// eslint-disable-next-line max-len const LINKEDIN_REGEX_STRING = '(? { let isEvaluated = false; const browserCrawler = new (class extends BrowserCrawlerTest { - // eslint-disable-next-line max-len protected override async _navigationHandler( ctx: PuppeteerCrawlingContext, gotoOptions: PuppeteerGoToOptions, @@ -292,7 +291,6 @@ describe('BrowserCrawler', () => { }); let optionsGoto: PuppeteerGoToOptions; const browserCrawler = new (class extends BrowserCrawlerTest { - // eslint-disable-next-line max-len protected override async _navigationHandler( ctx: PuppeteerCrawlingContext, gotoOptions: PuppeteerGoToOptions, @@ -941,9 +939,10 @@ describe('BrowserCrawler', () => { await crawler.run([serverAddress]); expect(spy).toBeCalled(); - // eslint-disable-next-line max-len expect(spy.mock.calls[0][0]).toEqual( - 'When using RequestList and RequestQueue at the same time, you should instantiate both explicitly and provide them in the crawler options, to ensure correctly handled restarts of the crawler.', + 'When using RequestList and RequestQueue at the same time, ' + + 'you should instantiate both explicitly and provide them in the crawler options, ' + + 'to ensure correctly handled restarts of the crawler.', ); expect(spy.mock.calls[1][0]).toEqual(expect.stringContaining(proxyError)); }); diff --git a/test/core/crawlers/cheerio_crawler.test.ts b/test/core/crawlers/cheerio_crawler.test.ts index 43158e78f66e..d55c5b5d6212 100644 --- a/test/core/crawlers/cheerio_crawler.test.ts +++ b/test/core/crawlers/cheerio_crawler.test.ts @@ -191,9 +191,9 @@ describe('CheerioCrawler', () => { maxConcurrency: 2, }); - // eslint-disable-next-line max-len await expect(cheerioCrawler.run()).rejects.toThrow( - "Route not found for label 'undefined'. You must set up a route for this label or a default route. Use `requestHandler`, `router.addHandler` or `router.addDefaultHandler`.", + "Route not found for label 'undefined'. You must set up a route for this label or a default route. " + + 'Use `requestHandler`, `router.addHandler` or `router.addDefaultHandler`.', ); }); diff --git a/test/core/crawlers/puppeteer_crawler.test.ts b/test/core/crawlers/puppeteer_crawler.test.ts index 7de9d2aa6779..140215ff6c80 100644 --- a/test/core/crawlers/puppeteer_crawler.test.ts +++ b/test/core/crawlers/puppeteer_crawler.test.ts @@ -154,8 +154,8 @@ describe('PuppeteerCrawler', () => { test('should throw if launchOptions.proxyUrl is supplied', async () => { try { + // eslint-disable-next-line new PuppeteerCrawler({ - //eslint-disable-line requestList, maxRequestRetries: 0, maxConcurrency: 1, diff --git a/test/core/error_tracker.test.ts b/test/core/error_tracker.test.ts index ade00956e7f0..f15cb00acbdb 100644 --- a/test/core/error_tracker.test.ts +++ b/test/core/error_tracker.test.ts @@ -1,6 +1,3 @@ -/* eslint-disable no-multi-spaces */ -import exp from 'node:constants'; - import { ErrorTracker } from '../../packages/utils/src/internals/error_tracker'; const random = () => Math.random().toString(36).slice(2); diff --git a/test/core/puppeteer_request_interception.test.ts b/test/core/puppeteer_request_interception.test.ts index 380483f71c5f..ee9abfeeea4d 100644 --- a/test/core/puppeteer_request_interception.test.ts +++ b/test/core/puppeteer_request_interception.test.ts @@ -173,7 +173,7 @@ describe('utils.puppeteer.addInterceptRequestHandler|removeInterceptRequestHandl // Override headers const headers = { ...request.headers(), - accept: 'text/html', + 'accept': 'text/html', 'accept-language': 'en-GB', 'upgrade-insecure-requests': '2', }; diff --git a/test/shared/data/html_to_text_test_data.ts b/test/shared/data/html_to_text_test_data.ts index c7832f97e380..ad123764f957 100644 --- a/test/shared/data/html_to_text_test_data.ts +++ b/test/shared/data/html_to_text_test_data.ts @@ -1,5 +1,3 @@ -/* eslint-disable */ - // The whitespace in the text is important. Don't change it. // We're keeping this text as a JS string, because git and other // tools do magic with line endings and it can break tests. diff --git a/test/utils/general.test.ts b/test/utils/general.test.ts index a6760fe93c00..f7dc788e8d43 100644 --- a/test/utils/general.test.ts +++ b/test/utils/general.test.ts @@ -84,10 +84,10 @@ describe('sleep()', () => { describe('snakeCaseToCamelCase()', () => { test('should camel case all sneaky cases of snake case', () => { const tests = { - aaa_bbb_: 'aaaBbb', + 'aaa_bbb_': 'aaaBbb', '': '', - AaA_bBb_cCc: 'aaaBbbCcc', - a_1_b_1a: 'a1B1a', + 'AaA_bBb_cCc': 'aaaBbbCcc', + 'a_1_b_1a': 'a1B1a', }; Object.entries(tests).forEach(([snakeCase, camelCase]) => {