Skip to content

Commit ca5712c

Browse files
committed
fix: more accurate filtering for include, exclude
1 parent 9dacf52 commit ca5712c

File tree

2 files changed

+46
-10
lines changed

2 files changed

+46
-10
lines changed

packages/core/src/puppeteer/worker.ts

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ UnlighthouseWorkerStats,
1414
import { ReportArtifacts, asRegExp, createTaskReportFromRoute } from '../util'
1515
import { useUnlighthouse } from '../unlighthouse'
1616
import { useLogger } from '../logger'
17+
import { createFilter } from '../util/filter'
1718
import {
1819
launchPuppeteerCluster,
1920
} from './cluster'
@@ -91,16 +92,12 @@ export async function createUnlighthouseWorker(tasks: Record<UnlighthouseTask, T
9192
if (ignoredRoutes.has(id))
9293
return
9394

94-
if (resolvedConfig.scanner.include) {
95-
// must match
96-
if (resolvedConfig.scanner.include.filter(rule => asRegExp(rule).test(path)).length === 0)
97-
return
98-
}
99-
100-
if (resolvedConfig.scanner.exclude) {
101-
// must not match
102-
if (resolvedConfig.scanner.exclude.filter(rule => asRegExp(rule).test(path)).length > 0)
95+
if (resolvedConfig.scanner.include || resolvedConfig.scanner.exclude) {
96+
const filter = createFilter(resolvedConfig.scanner)
97+
if (filter(path)) {
98+
logger.debug('Skipping route based on include / exclude rules', { path })
10399
return
100+
}
104101
}
105102

106103
/*
@@ -109,7 +106,7 @@ export async function createUnlighthouseWorker(tasks: Record<UnlighthouseTask, T
109106
* Note: this is somewhat similar to the logic in discovery/routes.ts, that's because we need to sample the routes
110107
* from the sitemap or as provided. This logic is for ensuring crawled URLs don't exceed the group limit.
111108
*/
112-
if (resolvedConfig.scanner.dynamicSampling > 0) {
109+
if (resolvedConfig.scanner.dynamicSampling && resolvedConfig.scanner.dynamicSampling > 0) {
113110
const routeGroup = get(route, resolvedConfig.client.groupRoutesKey.replace('route.', ''))
114111
// group all urls by their route definition path name
115112
const routesInGroup = [...routeReports.values()].filter(

packages/core/src/util/filter.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import { createRouter, toRouteMatcher } from 'radix3'
2+
3+
interface CreateFilterOptions {
4+
include?: (string | RegExp)[]
5+
exclude?: (string | RegExp)[]
6+
}
7+
8+
export function createFilter(options: CreateFilterOptions = {}): (path: string) => boolean {
9+
const include = options.include || []
10+
const exclude = options.exclude || []
11+
if (include.length === 0 && exclude.length === 0)
12+
return () => true
13+
14+
return function (path: string): boolean {
15+
for (const v of [{ rules: exclude, result: false }, { rules: include, result: true }]) {
16+
const regexRules = v.rules.filter(r => r instanceof RegExp) as RegExp[]
17+
if (regexRules.some(r => r.test(path)))
18+
return v.result
19+
20+
const stringRules = v.rules.filter(r => typeof r === 'string') as string[]
21+
if (stringRules.length > 0) {
22+
const routes = {}
23+
for (const r of stringRules) {
24+
// quick scan of literal string matches
25+
if (r === path)
26+
return v.result
27+
28+
// need to flip the array data for radix3 format, true value is arbitrary
29+
// @ts-expect-error untyped
30+
routes[r] = true
31+
}
32+
const routeRulesMatcher = toRouteMatcher(createRouter({ routes, strictTrailingSlash: false }))
33+
if (routeRulesMatcher.matchAll(path).length > 0)
34+
return Boolean(v.result)
35+
}
36+
}
37+
return include.length === 0
38+
}
39+
}

0 commit comments

Comments
 (0)