Skip to content

Commit acfc455

Browse files
authored
perf(gatsby): Enable fast filters for $regex/$glob (#24188)
1 parent 6d93fb8 commit acfc455

File tree

3 files changed

+123
-10
lines changed

3 files changed

+123
-10
lines changed

packages/gatsby/src/redux/nodes.ts

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,32 @@ import { createPageDependency } from "./actions/add-page-dependency"
44
import { IDbQueryElemMatch } from "../db/common/query"
55

66
// Only list supported ops here. "CacheableFilterOp"
7-
type FilterOp = "$eq" | "$ne" | "$lt" | "$lte" | "$gt" | "$gte" | "$in" | "$nin"
7+
type FilterOp =
8+
| "$eq"
9+
| "$ne"
10+
| "$lt"
11+
| "$lte"
12+
| "$gt"
13+
| "$gte"
14+
| "$in"
15+
| "$nin"
16+
| "$regex" // Note: this includes $glob
817
// Note: `undefined` is an encoding for a property that does not exist
918
type FilterValueNullable =
1019
| string
1120
| number
1221
| boolean
1322
| null
1423
| undefined
24+
| RegExp // Only valid for $regex
1525
| Array<string | number | boolean | null | undefined>
1626
// This is filter value in most cases
17-
type FilterValue = string | number | boolean | Array<string | number | boolean>
27+
type FilterValue =
28+
| string
29+
| number
30+
| boolean
31+
| RegExp // Only valid for $regex
32+
| Array<string | number | boolean>
1833
export type FilterCacheKey = string
1934
export interface IFilterCache {
2035
op: FilterOp
@@ -697,6 +712,33 @@ export const getNodesFromCacheByValue = (
697712
return set
698713
}
699714

715+
if (op === `$regex`) {
716+
// Note: $glob is converted to $regex so $glob filters go through here, too
717+
// Aside from the input pattern format, further behavior is exactly the same.
718+
719+
// The input to the filter must be a string (including leading/trailing slash and regex flags)
720+
// By the time the filter reaches this point, the filterValue has to be a regex.
721+
722+
if (!(filterValue instanceof RegExp)) {
723+
throw new Error(
724+
`The value for the $regex comparator must be an instance of RegExp`
725+
)
726+
}
727+
const regex = filterValue
728+
729+
const result = new Set<IGatsbyNode>()
730+
filterCache.byValue.forEach((nodes, value) => {
731+
// TODO: does the value have to be a string for $regex? Can we auto-ignore any non-strings? Or does it coerce.
732+
// Note: partial paths should also be included for regex (matching Sift behavior)
733+
if (value !== undefined && regex.test(String(value))) {
734+
nodes.forEach(node => result.add(node))
735+
}
736+
})
737+
738+
// TODO: we _can_ cache this set as well. Might make sense if it turns out that $regex is mostly used with literals
739+
return result
740+
}
741+
700742
if (filterValue == null) {
701743
if (op === `$lt` || op === `$gt`) {
702744
// Nothing is lt/gt null
@@ -714,6 +756,14 @@ export const getNodesFromCacheByValue = (
714756
)
715757
}
716758

759+
if (filterValue instanceof RegExp) {
760+
// This is most likely an internal error, although it is possible for
761+
// users to talk to this API more directly.
762+
throw new Error(
763+
`A RegExp instance is only valid for $regex and $glob comparators`
764+
)
765+
}
766+
717767
if (op === `$lt`) {
718768
// First try a direct approach. If a value is queried that also exists then
719769
// we can prevent a binary search through the whole set, O(1) vs O(log n)

packages/gatsby/src/redux/run-sift.js

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,17 @@ const {
1919
getNode: siftGetNode,
2020
} = require(`./nodes`)
2121

22-
const FAST_OPS = [`$eq`, `$ne`, `$lt`, `$lte`, `$gt`, `$gte`, `$in`, `$nin`]
22+
const FAST_OPS = [
23+
`$eq`,
24+
`$ne`,
25+
`$lt`,
26+
`$lte`,
27+
`$gt`,
28+
`$gte`,
29+
`$in`,
30+
`$nin`,
31+
`$regex`, // Note: this includes $glob
32+
]
2333

2434
// More of a testing mechanic, to verify whether last runSift call used Sift
2535
let lastFilterUsedSift = false

packages/gatsby/src/schema/__tests__/run-query.js

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,7 +1119,7 @@ it(`should use the cache argument`, async () => {
11191119
it(`handles the regex operator without flags`, async () => {
11201120
const needleStr = `/^The.*Wax/`
11211121
const needleRex = /^The.*Wax/
1122-
const [result, allNodes] = await runSlowFilter({
1122+
const [result, allNodes] = await runFastFilter({
11231123
name: { regex: needleStr },
11241124
})
11251125

@@ -1136,7 +1136,46 @@ it(`should use the cache argument`, async () => {
11361136
// Note: needle is different from checked because `new RegExp('/a/i')` does _not_ work
11371137
const needleRex = /^the.*wax/i
11381138
const needleStr = `/^the.*wax/i`
1139-
const [result, allNodes] = await runSlowFilter({
1139+
const [result, allNodes] = await runFastFilter({
1140+
name: { regex: needleStr },
1141+
})
1142+
1143+
expect(result?.length).toEqual(
1144+
allNodes.filter(node => needleRex.test(node.name)).length
1145+
)
1146+
expect(result?.length).toBeGreaterThan(0) // Make sure there _are_ results, don't let this be zero
1147+
result.forEach(node =>
1148+
expect(needleRex.test(node.name)).toEqual(true)
1149+
)
1150+
})
1151+
1152+
it(`rejects strings without forward slashes`, async () => {
1153+
await expect(
1154+
runFastFilter({
1155+
name: { regex: `^The.*Wax` },
1156+
})
1157+
).rejects.toThrow()
1158+
})
1159+
1160+
it(`rejects strings without trailing slash`, async () => {
1161+
await expect(
1162+
runFastFilter({
1163+
name: { regex: `/^The.*Wax` },
1164+
})
1165+
).rejects.toThrow()
1166+
})
1167+
1168+
it(`accepts strings without leading slash`, async () => {
1169+
// If this test starts failing, that might be okay and it should be dropped.
1170+
// At the time of writing it, this was a status quo edge case for Sift
1171+
1172+
// Passes because the requirement is mostly about a .split() failing
1173+
// for the other fail cases. As long as it passes a regex ultimately
1174+
// we don't really have to care / validate.
1175+
1176+
const needleRex = /(?:)/i // This was what it turned into
1177+
const needleStr = `^the.*wax/i`
1178+
const [result, allNodes] = await runFastFilter({
11401179
name: { regex: needleStr },
11411180
})
11421181

@@ -1149,22 +1188,34 @@ it(`should use the cache argument`, async () => {
11491188
)
11501189
})
11511190

1152-
it(`handles the nested regex operator`, async () => {
1191+
it(`rejects actual regex`, async () => {
1192+
await expect(
1193+
runFastFilter({
1194+
name: { regex: /^The.*Wax/ },
1195+
})
1196+
).rejects.toThrow()
1197+
})
1198+
1199+
it(`handles the nested regex operator and ignores partial paths`, async () => {
11531200
const needleStr = `/.*/`
11541201
const needleRex = /.*/
1155-
const [result, allNodes] = await runSlowFilter({
1202+
const [result, allNodes] = await runFastFilter({
11561203
nestedRegex: { field: { regex: needleStr } },
11571204
})
11581205

11591206
expect(result?.length).toEqual(
11601207
allNodes.filter(
1161-
node => node.nestedRegex && needleRex.test(node.nestedRegex.field)
1208+
node =>
1209+
node.nestedRegex !== undefined &&
1210+
node.nestedRegex.field !== undefined &&
1211+
needleRex.test(node.nestedRegex.field)
11621212
).length
11631213
)
11641214
expect(result?.length).toBeGreaterThan(0) // Make sure there _are_ results, don't let this be zero
11651215
result.forEach(node =>
11661216
expect(
1167-
node.nestedRegex && needleRex.test(node.nestedRegex.field)
1217+
node.nestedRegex === undefined ||
1218+
needleRex.test(node.nestedRegex.field)
11681219
).toEqual(true)
11691220
)
11701221
})
@@ -1726,8 +1777,10 @@ it(`should use the cache argument`, async () => {
17261777
})
17271778

17281779
describe(`$glob`, () => {
1780+
// Note: glob internally uses $regex
1781+
17291782
it(`handles the glob operator`, async () => {
1730-
const [result] = await runSlowFilter({ name: { glob: `*Wax` } })
1783+
const [result] = await runFastFilter({ name: { glob: `*Wax` } })
17311784

17321785
expect(result.length).toEqual(2)
17331786
expect(result[0].name).toEqual(`The Mad Wax`)

0 commit comments

Comments
 (0)