Skip to content

Commit

Permalink
fix(enqueueLinks): filter out empty/nullish globs (#2286)
Browse files Browse the repository at this point in the history
Was noticed as an issue when running through apify where we'd get an
empty element that throws a very non descriptive error

Reported here:
https://console.apify.com/actors/aYG0l9s7dbB7j3gbS/issues/Wd0Ahfk9Vd2OPk4Uf
  • Loading branch information
vladfrangu committed Jan 17, 2024
1 parent 3d2c149 commit 84319b3
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
17 changes: 16 additions & 1 deletion packages/core/src/enqueue_links/shared.ts
Expand Up @@ -78,7 +78,22 @@ export function constructRegExpObjectsFromPseudoUrls(pseudoUrls: PseudoUrlInput[
*/
export function constructGlobObjectsFromGlobs(globs: GlobInput[]): GlobObject[] {
return globs
.filter((glob) => ((glob as GlobObject).glob || (glob as string)).trim().length > 0)
.filter((glob) => {
// Skip possibly nullish, empty strings
if (!glob) {
return false;
}

if (typeof glob === 'string') {
return glob.trim().length > 0;
}

if (glob.glob) {
return glob.glob.trim().length > 0;
}

return false;
})
.map((item) => {
// Get glob object from cache.
let globObject = enqueueLinksPatternCache.get(item);
Expand Down
2 changes: 2 additions & 0 deletions test/core/enqueue_links/enqueue_links.test.ts
Expand Up @@ -157,6 +157,8 @@ describe('enqueueLinks()', () => {
'https://example.com/**/*',
'',
{ glob: ' ' },
// Empty string used to throw an error (https://console.apify.com/actors/aYG0l9s7dbB7j3gbS/issues/Wd0Ahfk9Vd2OPk4Uf)
{ glob: '' },
{ glob: '?(http|https)://cool.com/', method: 'POST' as const },
];

Expand Down

0 comments on commit 84319b3

Please sign in to comment.