diff --git a/src/config/normalize/lib/rule.js b/src/config/normalize/lib/rule.js index 07477f77e..6fd0704c4 100644 --- a/src/config/normalize/lib/rule.js +++ b/src/config/normalize/lib/rule.js @@ -9,9 +9,9 @@ export const normalizeRules = function (rules) { } const parseName = function ({ name, ...rule }) { - const nameQuery = serializeQuery(name) const namePath = normalizeQuery(name) - return { ...rule, nameQuery, namePath } + const nameQuery = serializeQuery(name) + return { ...rule, namePath, nameQuery } } const normalizeRule = function ( diff --git a/src/config/normalize/lib/wild_wild_parser/compare.js b/src/config/normalize/lib/wild_wild_parser/compare.js index b1a981fc5..c18742284 100644 --- a/src/config/normalize/lib/wild_wild_parser/compare.js +++ b/src/config/normalize/lib/wild_wild_parser/compare.js @@ -1,5 +1,4 @@ -import { validatePath } from './normalize.js' -import { normalizeQuery } from './parse.js' +import { normalizeQuery, normalizePath } from './normalize.js' import { getObjectTokenType } from './tokens/main.js' // Check if two queries are equal. @@ -38,22 +37,24 @@ const isSameQueryArray = function (queryArrayA, queryArrayB) { // Check if two paths are equal export const isSamePath = function (pathA, pathB) { - validatePath(pathA) - validatePath(pathB) + const pathC = normalizePath(pathA) + const pathD = normalizePath(pathB) return ( - pathA.length === pathB.length && - pathA.every((prop, index) => isSameToken(pathB[index], prop)) + pathC.length === pathD.length && + pathC.every((prop, index) => isSameToken(pathD[index], prop)) ) } // Check if a path is a parent to another export const isParentPath = function (parentPath, childPath) { + const parentPathA = normalizePath(parentPath) + const childPathA = normalizePath(childPath) return ( - childPath.length > parentPath.length && - childPath.every( + childPathA.length > parentPathA.length && + childPathA.every( (childToken, index) => - index >= parentPath.length || - isSameToken(childToken, parentPath[index]), + index >= parentPathA.length || + isSameToken(childToken, parentPathA[index]), ) ) } diff --git a/src/config/normalize/lib/wild_wild_parser/main.js b/src/config/normalize/lib/wild_wild_parser/main.js index fdf65d798..8be966f9d 100644 --- a/src/config/normalize/lib/wild_wild_parser/main.js +++ b/src/config/normalize/lib/wild_wild_parser/main.js @@ -4,6 +4,7 @@ export { isSameToken, isParentPath, } from './compare.js' -export { normalizeQuery, normalizePath } from './parse.js' +export { normalizeQuery, normalizePath } from './normalize.js' +export { parseQuery, parsePath } from './parse.js' export { serializeQuery, serializePath } from './serialize.js' export { getTokenType } from './tokens/main.js' diff --git a/src/config/normalize/lib/wild_wild_parser/normalize.js b/src/config/normalize/lib/wild_wild_parser/normalize.js index 65ec63034..5769888e0 100644 --- a/src/config/normalize/lib/wild_wild_parser/normalize.js +++ b/src/config/normalize/lib/wild_wild_parser/normalize.js @@ -1,111 +1,95 @@ -import { inspect } from 'util' - -import { getObjectTokenType, getPathObjectTokenType } from './tokens/main.js' - -// Most methods accept both query and array syntaxes. -// This checks which one is used. -export const isQueryString = function (query) { - return typeof query === 'string' -} - -// Transform a queryArrays into a path, if possible -export const normalizePathShape = function (queryArrays) { - if (queryArrays.length !== 1) { - throwQueryArraysError(queryArrays, 'It must not be a union.') - } - - const [queryArray] = queryArrays - validatePath(queryArray) - return queryArray -} - -// Paths are a subset of query strings|arrays which use: -// - No unions -// - Only prop tokens, and array tokens (positive only) -// Those are the ones exposed in output, as opposed to query arrays which are -// exposed in input. -export const validatePath = function (path) { - if (!Array.isArray(path)) { - throwQueryArraysError(path, 'It must be an array.') - } - - path.forEach((prop) => { - validateProp(prop, path) - }) -} - -const validateProp = function (prop, path) { - if (getPathObjectTokenType(prop) === undefined) { - throwTokenError( - path, - prop, - 'It must be a property name (string) or an array index (positive integer).', - ) - } -} - -// Normalize query arrays -export const normalizeQueryArrays = function (queryArrays) { - validateQueryArrays(queryArrays) - const queryArraysA = - queryArrays.every(Array.isArray) && queryArrays.length !== 0 - ? queryArrays - : [queryArrays] - return queryArraysA.map(normalizeQueryArray) -} - -const validateQueryArrays = function (queryArrays) { - if (!Array.isArray(queryArrays)) { - throwQueryArraysError(queryArrays, 'It must be an array.') - } -} - -const normalizeQueryArray = function (queryArray) { - return queryArray.map((token) => normalizeToken(token, queryArray)) -} - -const normalizeToken = function (token, queryArray) { - const tokenType = getObjectTokenType(token) - validateToken(tokenType, token, queryArray) - return tokenType.normalize(token) -} - -const validateToken = function (tokenType, token, queryArray) { - if (tokenType === undefined) { - throwTokenError( - queryArray, - token, - `It must be one of the following: - - a property name string - - an array index integer, positive or negative - - a property name regular expression - - { type: "any" } - - { type: "slice", from: integer, to: integer }`, - ) - } -} - -const throwQueryArraysError = function (queryArray, message) { - throw new Error(`Invalid query: ${inspect(queryArray)}\n${message}`) -} - -const throwTokenError = function (queryArray, token, message) { - throwQueryArraysError( - queryArray, - `Invalid token: ${inspect(token)}\n${message}`, - ) -} - -// Validate query string is a string -export const validateQueryString = function (queryString) { - if (!isQueryString(queryString)) { - throw new Error('it must be a string.') - } +import { parsePath, parseQuery } from './parse.js' +import { + normalizeQueryArrays, + isQueryString, + normalizeArrayPath, +} from './validate.js' + +// There are two formats: +// - Query string +// - Tokens are dot-separated +// - Unions are space-separated +// - This is more convenient wherever a string is better, including in CLI +// flags, in URLs, in files, etc. +// - \ must escape the following characters: . \ space +// - If a token is meant as a property name but could be interpreted as a +// different type, it must be start with \ +// - A leading dot can be optionally used, e.g. `.one`. It is ignored. +// - A lone dot targets the root. +// - Property names that are empty strings can be specified, e.g. `..a..b.` +// parses as `["", "a", "", "b", ""]` +// - Array[s] of tokens +// - Tokens are elements of the inner arrays +// - Unions use optional outer arrays +// - An empty inner array targets the root. +// - This does not need any escaping, making it better with dynamic input +// - This is faster as it does not perform any parsing +// Unions must not have 0 elements: +// - Empty arrays are interpreted as a single array of tokens targetting the +// root +// - Empty query strings throw an error +// - This is because: +// - Empty unions semantics might be confusing +// - Empty arrays are ambiguous with root queries +// - Which are a much more common use case +// - Also, this allows paths to be a strict subset of query arrays +// - Otherwise, root queries would need to be wrapped in an outer +// array +// - Downside: if a union of query arrays is computed dynamically by the +// consumer logic, it might need to test whether the array is empty +// Each object property is matched by a token among the following types: +// - Property name +// - String format: "propName" +// - Array format: "propName" +// - Empty keys are supported with empty strings +// - Array index +// - String format: "1" +// - Array format: 1 +// - We distinguish between property names and array indices that are +// integers +// - Negatives indices can be used to get elements at the end, e.g. -2 +// - Including -0 which can be used to append elements +// - Array slices +// - String format: "0:2" +// - Array format: { type: "slice", from: 0, end: 2 } +// - Matches multiple indices of an array +// - Negatives indices like the array indices format +// - `from` is included, `to` is excluded (like `Array.slice()`) +// - `from` defaults to 0 and `to` to -0 +// - Wildcard +// - String format: "*" +// - Array format: { type: "any" } +// - We use objects instead of strings or symbols as both are valid as +// object properties which creates a risk for injections +// - Matches any object property or array item +// - Regular expression +// - String format: "/regexp/" or "/regexp/flags" +// - Array format: RegExp instance +// - Matches any object property with a matching name +// - ^ and $ must be used if the RegExp needs to match from the beginning +// or until the end +// Symbols are always ignored: +// - Both in the query string|array and in the target value +// - This is because symbols cannot be serialized in a query string +// - This would remove the guarantee that both string|array syntaxes are +// equivalent and interchangeable +// - We do not use `symbol.description` as this should not be used for +// identity purpose +// Exceptions are thrown on syntax errors: +// - I.e. query or path syntax errors, or wrong arguments +// - But queries matching nothing do not throw: instead they return nothing + +// Parse a path string into an array of tokens. +// If the query is already an array of tokens, only validate and normalize it. +export const normalizePath = function (query) { + return isQueryString(query) + ? parsePath(query) + : normalizeArrayPath(query, query) } -// Empty query strings are ambiguous and not allowed -export const validateEmptyQuery = function ({ arrays }) { - if (arrays.length === 0) { - throw new Error('it must not be an empty string.') - } +// Same as `normalizePath()` but for any query +export const normalizeQuery = function (query) { + return isQueryString(query) + ? parseQuery(query) + : normalizeQueryArrays(query, query) } diff --git a/src/config/normalize/lib/wild_wild_parser/parse.js b/src/config/normalize/lib/wild_wild_parser/parse.js index 081bc444f..6ac7746f5 100644 --- a/src/config/normalize/lib/wild_wild_parser/parse.js +++ b/src/config/normalize/lib/wild_wild_parser/parse.js @@ -1,103 +1,146 @@ import { - normalizeQueryArrays, - isQueryString, - normalizePathShape, -} from './normalize.js' -import { parseQueryString } from './query.js' + ESCAPE, + ARRAY_SEPARATOR, + ARRAY_SEPARATOR_NAME, + TOKEN_SEPARATOR, + SPECIAL_CHARS, +} from './tokens/escape.js' +import { getStringTokenType } from './tokens/main.js' +import { + normalizeArraysPath, + validateEmptyQuery, + validateQueryString, + throwQueryError, +} from './validate.js' // Parse a query string into an array of tokens. // Also validate and normalize it. -// This is inspired by JSON paths. -// There are two formats: -// - Query string -// - Tokens are dot-separated -// - Unions are space-separated -// - This is more convenient wherever a string is better, including in CLI -// flags, in URLs, in files, etc. -// - \ must escape the following characters: . \ space -// - If a token is meant as a property name but could be interpreted as a -// different type, it must be start with \ -// - A leading dot can be optionally used, e.g. `.one`. It is ignored. -// - A lone dot targets the root. -// - Property names that are empty strings can be specified, e.g. `..a..b.` -// parses as `["", "a", "", "b", ""]` -// - Array[s] of tokens -// - Tokens are elements of the inner arrays -// - Unions use optional outer arrays -// - An empty inner array targets the root. -// - This does not need any escaping, making it better with dynamic input -// - This is faster as it does not perform any parsing -// Unions must not have 0 elements: -// - Empty arrays are interpreted as a single array of tokens targetting the -// root -// - Empty query strings throw an error -// - This is because: -// - Empty unions semantics might be confusing -// - Empty arrays are ambiguous with root queries -// - Which are a much more common use case -// - Also, this allows paths to be a strict subset of query arrays -// - Otherwise, root queries would need to be wrapped in an outer -// array -// - Downside: if a union of query arrays is computed dynamically by the -// consumer logic, it might need to test whether the array is empty -// Each object property is matched by a token among the following types: -// - Property name -// - String format: "propName" -// - Array format: "propName" -// - Empty keys are supported with empty strings -// - Array index -// - String format: "1" -// - Array format: 1 -// - We distinguish between property names and array indices that are -// integers -// - Negatives indices can be used to get elements at the end, e.g. -2 -// - Including -0 which can be used to append elements -// - Array slices -// - String format: "0:2" -// - Array format: { type: "slice", from: 0, end: 2 } -// - Matches multiple indices of an array -// - Negatives indices like the array indices format -// - `from` is included, `to` is excluded (like `Array.slice()`) -// - `from` defaults to 0 and `to` to -0 -// - Wildcard -// - String format: "*" -// - Array format: { type: "any" } -// - We use objects instead of strings or symbols as both are valid as -// object properties which creates a risk for injections -// - Matches any object property or array item -// - Regular expression -// - String format: "/regexp/" or "/regexp/flags" -// - Array format: RegExp instance -// - Matches any object property with a matching name -// - ^ and $ must be used if the RegExp needs to match from the beginning -// or until the end -// Symbols are always ignored: -// - Both in the query string|array and in the target value -// - This is because symbols cannot be serialized in a query string -// - This would remove the guarantee that both string|array syntaxes are -// equivalent and interchangeable -// - We do not use `symbol.description` as this should not be used for -// identity purpose -// Exceptions are thrown on syntax errors: -// - I.e. query or path syntax errors, or wrong arguments -// - But queries matching nothing do not throw: instead they return nothing -// Parse a path -export const normalizePath = function (query) { - const queryArrays = normalizeQuery(query) - return normalizePathShape(queryArrays) +export const parsePath = function (pathString) { + const queryArrays = parseQuery(pathString) + return normalizeArraysPath(queryArrays, pathString) +} + +// Same as `parsePath()` but for any query +export const parseQuery = function (queryString) { + validateQueryString(queryString) + const queryArrays = parseQueryString(queryString) + validateEmptyQuery(queryArrays, queryString) + return queryArrays +} + +// Use imperative logic for performance +// eslint-disable-next-line complexity +const parseQueryString = function (queryString) { + const state = getInitialState() + + // eslint-disable-next-line fp/no-loops + for (; state.index <= queryString.length; state.index += 1) { + const char = queryString[state.index] + + // eslint-disable-next-line max-depth + if (char === ESCAPE) { + parseEscape(state, queryString) + } else if (char === TOKEN_SEPARATOR) { + addToken(state) + } else if (char === ARRAY_SEPARATOR || state.index === queryString.length) { + addQueryArray(state) + } else { + state.chars += char + } + } + + return state.arrays } -// Parse a query string or array -export const normalizeQuery = function (query) { - return isQueryString(query) - ? safeParseQueryString(query) - : normalizeQueryArrays(query) +const getInitialState = function () { + const state = { arrays: [], index: 0 } + resetQueryArrayState(state) + resetTokenState(state) + return state } -const safeParseQueryString = function (queryString) { - try { - return parseQueryString(queryString) - } catch (error) { - throw new Error(`Invalid query "${queryString}": ${error.message}`) +const parseEscape = function (state, queryString) { + const nextChar = queryString[state.index + 1] + + if (SPECIAL_CHARS.has(nextChar)) { + state.index += 1 + state.chars += nextChar + return + } + + if (state.chars.length !== 0) { + throwQueryError( + queryString, + `Character "${ESCAPE}" must either be at the start of a token, or be followed by ${ARRAY_SEPARATOR_NAME} or ${TOKEN_SEPARATOR} or ${ESCAPE}`, + ) + } + + state.isProp = true +} + +const addQueryArray = function (state) { + if (hasNoQueryArray(state)) { + return + } + + if (!hasOnlyDots(state)) { + addToken(state) } + + // eslint-disable-next-line fp/no-mutating-methods + state.arrays.push(state.array) + resetQueryArrayState(state) +} + +// When the query is an empty string or when two spaces are consecutive +const hasNoQueryArray = function (state) { + return ( + state.firstToken && state.chars.length === 0 && state.array.length === 0 + ) +} + +const resetQueryArrayState = function (state) { + state.array = [] + state.firstToken = true + state.onlyDots = true +} + +const addToken = function (state) { + if (handleLeadingDot(state)) { + return + } + + state.onlyDots = hasOnlyDots(state) + const tokenType = getStringTokenType(state.chars, state.isProp) + const token = tokenType.normalize(tokenType.parse(state.chars)) + // eslint-disable-next-line fp/no-mutating-methods + state.array.push(token) + resetTokenState(state) +} + +// In principle, the root query should be an empty string. +// But we use a lone dot instead because: +// - It distinguishes it from an absence of query +// - It allows parsing it in the middle of a space-separated list (as opposed +// to an empty string) +// However, we create ambiguities for queries with only dots (including a +// lone dot), where the last dot should not create an additional token. +const hasOnlyDots = function (state) { + return state.onlyDots && state.chars.length === 0 +} + +// We ignore leading dots, because they are used to represent the root. +// We do not require them for simplicity. +const handleLeadingDot = function (state) { + if (!state.firstToken) { + return false + } + + state.firstToken = false + return state.chars.length === 0 +} + +const resetTokenState = function (state) { + state.isProp = false + state.chars = '' } diff --git a/src/config/normalize/lib/wild_wild_parser/query.js b/src/config/normalize/lib/wild_wild_parser/query.js deleted file mode 100644 index 918b9f79a..000000000 --- a/src/config/normalize/lib/wild_wild_parser/query.js +++ /dev/null @@ -1,129 +0,0 @@ -import { validateEmptyQuery, validateQueryString } from './normalize.js' -import { - ESCAPE, - ARRAY_SEPARATOR, - ARRAY_SEPARATOR_NAME, - TOKEN_SEPARATOR, - SPECIAL_CHARS, -} from './tokens/escape.js' -import { getStringTokenType } from './tokens/main.js' - -// Parse a query string to query arrays of tokens -// Use imperative logic for performance -// eslint-disable-next-line complexity -export const parseQueryString = function (queryString) { - validateQueryString(queryString) - - const state = getInitialState() - - // eslint-disable-next-line fp/no-loops - for (; state.index <= queryString.length; state.index += 1) { - const char = queryString[state.index] - - // eslint-disable-next-line max-depth - if (char === ESCAPE) { - parseEscape(state, queryString) - } else if (char === TOKEN_SEPARATOR) { - addToken(state) - } else if (char === ARRAY_SEPARATOR || state.index === queryString.length) { - addQueryArray(state) - } else { - state.chars += char - } - } - - validateEmptyQuery(state) - return state.arrays -} - -const getInitialState = function () { - const state = { arrays: [], index: 0 } - resetQueryArrayState(state) - resetTokenState(state) - return state -} - -const parseEscape = function (state, queryString) { - const nextChar = queryString[state.index + 1] - - if (SPECIAL_CHARS.has(nextChar)) { - state.index += 1 - state.chars += nextChar - return - } - - if (state.chars.length !== 0) { - throw new Error( - `character "${ESCAPE}" must either be at the start of a token, or be followed by ${ARRAY_SEPARATOR_NAME} or ${TOKEN_SEPARATOR} or ${ESCAPE}`, - ) - } - - state.isProp = true -} - -const addQueryArray = function (state) { - if (hasNoQueryArray(state)) { - return - } - - if (!hasOnlyDots(state)) { - addToken(state) - } - - // eslint-disable-next-line fp/no-mutating-methods - state.arrays.push(state.array) - resetQueryArrayState(state) -} - -// When the query is an empty string or when two spaces are consecutive -const hasNoQueryArray = function (state) { - return ( - state.firstToken && state.chars.length === 0 && state.array.length === 0 - ) -} - -const resetQueryArrayState = function (state) { - state.array = [] - state.firstToken = true - state.onlyDots = true -} - -const addToken = function (state) { - if (handleLeadingDot(state)) { - return - } - - state.onlyDots = hasOnlyDots(state) - const tokenType = getStringTokenType(state.chars, state.isProp) - const token = tokenType.normalize(tokenType.parse(state.chars)) - // eslint-disable-next-line fp/no-mutating-methods - state.array.push(token) - resetTokenState(state) -} - -// In principle, the root query should be an empty string. -// But we use a lone dot instead because: -// - It distinguishes it from an absence of query -// - It allows parsing it in the middle of a space-separated list (as opposed -// to an empty string) -// However, we create ambiguities for queries with only dots (including a -// lone dot), where the last dot should not create an additional token. -const hasOnlyDots = function (state) { - return state.onlyDots && state.chars.length === 0 -} - -// We ignore leading dots, because they are used to represent the root. -// We do not require them for simplicity. -const handleLeadingDot = function (state) { - if (!state.firstToken) { - return false - } - - state.firstToken = false - return state.chars.length === 0 -} - -const resetTokenState = function (state) { - state.isProp = false - state.chars = '' -} diff --git a/src/config/normalize/lib/wild_wild_parser/serialize.js b/src/config/normalize/lib/wild_wild_parser/serialize.js index caf654542..941f6488d 100644 --- a/src/config/normalize/lib/wild_wild_parser/serialize.js +++ b/src/config/normalize/lib/wild_wild_parser/serialize.js @@ -1,4 +1,4 @@ -import { normalizeQuery, normalizePath } from './parse.js' +import { normalizeQuery, normalizePath } from './normalize.js' import { TOKEN_SEPARATOR, ARRAY_SEPARATOR } from './tokens/escape.js' import { getObjectTokenType } from './tokens/main.js' diff --git a/src/config/normalize/lib/wild_wild_parser/validate.js b/src/config/normalize/lib/wild_wild_parser/validate.js new file mode 100644 index 000000000..a931c2520 --- /dev/null +++ b/src/config/normalize/lib/wild_wild_parser/validate.js @@ -0,0 +1,116 @@ +import { inspect } from 'util' + +import { getObjectTokenType, getPathObjectTokenType } from './tokens/main.js' + +// Validate query string is a string +export const validateQueryString = function (queryString) { + if (!isQueryString(queryString)) { + throwQueryError(queryString, 'It must be a string.') + } +} + +// Most methods accept both query and array syntaxes. +// This checks which one is used. +export const isQueryString = function (query) { + return typeof query === 'string' +} + +// Empty query strings are ambiguous and not allowed +export const validateEmptyQuery = function (queryArrays, queryString) { + if (queryArrays.length === 0) { + throwQueryError(queryString, 'It must not be an empty string.') + } +} + +// Transform a queryArrays into a path, if possible +// Paths are a subset of query strings|arrays which use: +// - No unions +// - Only prop tokens, and array tokens (positive only) +// Those are the ones exposed in output, as opposed to query arrays which are +// exposed in input. +export const normalizeArraysPath = function (queryArrays, query) { + if (queryArrays.length !== 1) { + throwQueryError(query, 'It must not be a union.') + } + + const [path] = queryArrays + return normalizeArrayPath(path, query) +} + +// Ensure a queryArray is a path +export const normalizeArrayPath = function (path, query) { + if (!Array.isArray(path)) { + throwQueryError(query, 'It must be an array.') + } + + if (path.some(Array.isArray)) { + throwQueryError(query, 'It must not be a union.') + } + + path.forEach((prop) => { + validateProp(prop, query) + }) + return path +} + +const validateProp = function (prop, query) { + if (getPathObjectTokenType(prop) === undefined) { + throwTokenError( + query, + prop, + 'It must be a property name (string) or an array index (positive integer).', + ) + } +} + +// Normalize query arrays +export const normalizeQueryArrays = function (queryArrays, query) { + validateQueryArrays(queryArrays, query) + const queryArraysA = + queryArrays.every(Array.isArray) && queryArrays.length !== 0 + ? queryArrays + : [queryArrays] + return queryArraysA.map((queryArray) => + normalizeQueryArray(queryArray, query), + ) +} + +const validateQueryArrays = function (queryArrays, query) { + if (!Array.isArray(queryArrays)) { + throwQueryError(query, 'It must be an array.') + } +} + +const normalizeQueryArray = function (queryArray, query) { + return queryArray.map((token) => normalizeToken(token, query)) +} + +const normalizeToken = function (token, query) { + const tokenType = getObjectTokenType(token) + validateToken(tokenType, token, query) + return tokenType.normalize(token) +} + +const validateToken = function (tokenType, token, query) { + if (tokenType === undefined) { + throwTokenError( + query, + token, + `It must be one of the following: + - a property name string + - an array index integer, positive or negative + - a property name regular expression + - { type: "any" } + - { type: "slice", from: integer, to: integer }`, + ) + } +} + +const throwTokenError = function (queryArray, token, message) { + throwQueryError(queryArray, `Invalid token: ${inspect(token)}\n${message}`) +} + +// Throw an error when the query is invalid +export const throwQueryError = function (query, message) { + throw new Error(`Invalid query: ${inspect(query)}\n${message}`) +}