Start improving escaping

ehmicky · Mar 6, 2022 · 5025f1a · 5025f1a
1 parent aae6aeb
commit 5025f1a
Show file tree

Hide file tree

Showing 7 changed files with 46 additions and 111 deletions.
diff --git a/src/config/normalize/lib/wild_wild_path/parsing/parse.js b/src/config/normalize/lib/wild_wild_path/parsing/parse.js
@@ -1,13 +1,6 @@
-import { parseEscapedChar } from '../tokens/escape.js'
+import { SPECIAL_CHARS } from '../tokens/escape.js'
 import { getStringTokenType } from '../tokens/main.js'
-import {
-  ESCAPE,
-  SEPARATOR,
-  ANY,
-  MINUS,
-  REGEXP_DELIM,
-  SLICE,
-} from '../tokens/special.js'
+import { ESCAPE, SEPARATOR } from '../tokens/special.js'
 
 import { normalizePath } from './normalize.js'
 import { isQueryString } from './validate.js'
@@ -19,7 +12,9 @@ import { isQueryString } from './validate.js'
 //  - "Query": a dot-separated string
 //     - This is more convenient wherever a string is better, including in CLI
 //       flags, in URLs, in files, etc.
-//     - Special characters must be escaped with \: . * \ /
+//     - \ must escape the following characters: . \
+//     - If a token is meant as a property name but could be interpreted as a
+//       different type, it must be start with \
 //     - A leading dot can be optionally used, e.g. `.one`. It is ignored.
 //  - "Tokens": an array of values of diverse types
 //     - This is sometimes convenient
@@ -32,7 +27,9 @@ import { isQueryString } from './validate.js'
 //     - Empty keys are supported with empty strings
 //  - Array index
 //     - Query format: "1"
-//     - Tokens format: 1 (must be an integer, not a string)
+//     - Tokens format: 1
+//     - We distinguish between property names and array indices that are
+//       integers
 //     - Negatives indices can be used to get elements at the end, e.g. -2
 //        - Including -0 which can be used to append elements
 //  - Array slices
@@ -95,11 +92,11 @@ const parseQuery = function (query) {
 
     // eslint-disable-next-line max-depth
     if (char === ESCAPE) {
-      addEscapedChar(state, query)
+      parseEscape(state, query)
     } else if (char === SEPARATOR || state.index === query.length) {
       addToken(state)
     } else {
-      addChar(state, char)
+      state.chars += char
     }
   }
 
@@ -113,35 +110,33 @@ const getInitialState = function (query) {
   return state
 }
 
-const addEscapedChar = function (state, query) {
-  state.index += 1
-  state.chars += parseEscapedChar(query[state.index])
+const parseEscape = function (state, query) {
+  const nextChar = query[state.index + 1]
+
+  if (SPECIAL_CHARS.has(nextChar)) {
+    state.index += 1
+    state.chars += nextChar
+    return
+  }
+
+  if (state.chars.length !== 0) {
+    throw new Error(
+      `character "${ESCAPE}" must either be at the start of a token, or be followed by ${SEPARATOR} or ${ESCAPE}`,
+    )
+  }
+
+  state.isProp = true
 }
 
 const addToken = function (state) {
-  const tokenType = getStringTokenType(state)
+  const tokenType = getStringTokenType(state.chars, state.isProp)
   const token = tokenType.parse(state.chars)
   // eslint-disable-next-line fp/no-mutating-methods
   state.path.push(token)
   resetState(state)
 }
 
 const resetState = function (state) {
-  state.hasAny = false
-  state.hasMinus = false
-  state.hasRegExp = false
-  state.hasSlice = false
+  state.isProp = false
   state.chars = ''
 }
-
-// eslint-disable-next-line complexity
-const addChar = function (state, char) {
-  if (state.chars.length === 0) {
-    state.hasAny = state.hasAny || char === ANY
-    state.hasMinus = state.hasMinus || char === MINUS
-    state.hasRegExp = state.hasRegExp || char === REGEXP_DELIM
-  }
-
-  state.hasSlice = state.hasSlice || char === SLICE
-  state.chars += char
-}
diff --git a/src/config/normalize/lib/wild_wild_path/tokens/any.js b/src/config/normalize/lib/wild_wild_path/tokens/any.js
@@ -1,7 +1,7 @@
 import isPlainObj from 'is-plain-obj'
 
 import { objectProps } from './common.js'
-import { ANY, ESCAPE, SEPARATOR } from './special.js'
+import { ANY } from './special.js'
 
 // Check the type of a parsed token
 const testObject = function (token) {
@@ -16,20 +16,12 @@ const serialize = function () {
 }
 
 // Check the type of a serialized token
-const testString = function ({ hasAny }) {
-  return hasAny
+const testString = function (chars) {
+  return chars === ANY
 }
 
 // Parse a string into a token
-const parse = function (chars) {
-  if (chars !== ANY) {
-    throw new Error(
-      `character "${ANY}" must not be preceded or followed by other characters except "${SEPARATOR}"
-If you intend "${ANY}" as a wildcard character, please use a regular expression instead.
-Otherwise, please escape it with a "${ESCAPE}".`,
-    )
-  }
-
+const parse = function () {
   return { type: ANY_TYPE }
 }
 

diff --git a/src/config/normalize/lib/wild_wild_path/tokens/array.js b/src/config/normalize/lib/wild_wild_path/tokens/array.js
@@ -1,5 +1,4 @@
 import { arrayProps } from './common.js'
-import { MINUS } from './special.js'
 
 // Check the type of a parsed token.
 // Integers specified as string tokens are assumed to be property names, not
@@ -14,15 +13,7 @@ const serialize = function (token) {
 }
 
 // Check the type of a serialized token
-const testString = function ({ chars, hasMinus }) {
-  return !hasEscapedMinus(chars, hasMinus) && isIndexString(chars)
-}
-
-export const hasEscapedMinus = function (chars, hasMinus) {
-  return chars[0] === MINUS && !hasMinus
-}
-
-export const isIndexString = function (chars) {
+const testString = function (chars) {
   return INTEGER_REGEXP.test(chars)
 }
 

diff --git a/src/config/normalize/lib/wild_wild_path/tokens/escape.js b/src/config/normalize/lib/wild_wild_path/tokens/escape.js
@@ -1,34 +1,7 @@
-import {
-  ESCAPE,
-  SEPARATOR,
-  ANY,
-  MINUS,
-  REGEXP_DELIM,
-  SLICE,
-} from './special.js'
+import { ESCAPE, SEPARATOR } from './special.js'
 
-// Parse an escaped character in a query string
-export const parseEscapedChar = function (escapedChar) {
-  validateEscape(escapedChar)
-  return escapedChar
-}
-
-const validateEscape = function (escapedChar) {
-  if (!SPECIAL_CHARS.has(escapedChar)) {
-    throw new Error(
-      `character "${ESCAPE}" must only be followed by ${SEPARATOR} ${ANY} ${MINUS} ${REGEXP_DELIM} ${SLICE} or ${ESCAPE}`,
-    )
-  }
-}
-
-const SPECIAL_CHARS = new Set([
-  ESCAPE,
-  SEPARATOR,
-  ANY,
-  MINUS,
-  REGEXP_DELIM,
-  SLICE,
-])
+// Sepcial characters to escape
+export const SPECIAL_CHARS = new Set([ESCAPE, SEPARATOR])
 
 // Escape special characters
 export const escapeSpecialChars = function (string) {

diff --git a/src/config/normalize/lib/wild_wild_path/tokens/main.js b/src/config/normalize/lib/wild_wild_path/tokens/main.js
@@ -19,6 +19,8 @@ export const getObjectTokenType = function (token) {
 }
 
 // Retrieve the type of a given token serialized string
-export const getStringTokenType = function (token) {
-  return TOKEN_TYPES.find((tokenType) => tokenType.testString(token))
+export const getStringTokenType = function (chars, isProp) {
+  return isProp
+    ? PROP_TOKEN
+    : TOKEN_TYPES.find((tokenType) => tokenType.testString(chars))
 }
diff --git a/src/config/normalize/lib/wild_wild_path/tokens/regexp.js b/src/config/normalize/lib/wild_wild_path/tokens/regexp.js
@@ -14,21 +14,14 @@ const serialize = function (token) {
 }
 
 // Check the type of a serialized token
-const testString = function ({ hasRegExp }) {
-  return hasRegExp
+const testString = function (chars) {
+  return chars[0] === REGEXP_DELIM && chars.lastIndexOf(REGEXP_DELIM) > 1
 }
 
 // Parse a string into a token
 // This might throw if the RegExp is invalid.
 const parse = function (chars) {
   const endIndex = chars.lastIndexOf(REGEXP_DELIM)
-
-  if (endIndex === 0) {
-    throw new Error(
-      `regular expression "${chars}" is missing a "${REGEXP_DELIM}" at the end.`,
-    )
-  }
-
   const regExpString = chars.slice(1, endIndex)
   const regExpFlags = chars.slice(endIndex + 1)
   return new RegExp(regExpString, regExpFlags)

diff --git a/src/config/normalize/lib/wild_wild_path/tokens/slice.js b/src/config/normalize/lib/wild_wild_path/tokens/slice.js
@@ -1,11 +1,6 @@
 import isPlainObj from 'is-plain-obj'
 
-import {
-  ARRAY_TOKEN,
-  getArrayIndex,
-  hasEscapedMinus,
-  isIndexString,
-} from './array.js'
+import { ARRAY_TOKEN, getArrayIndex } from './array.js'
 import { arrayProps } from './common.js'
 import { SLICE } from './special.js'
 
@@ -33,17 +28,11 @@ const serializeEdge = function (edge) {
 }
 
 // Check the type of a serialized token
-const testString = function ({ chars, hasSlice, hasMinus }) {
-  return (
-    hasSlice &&
-    !hasEscapedMinus(chars, hasMinus) &&
-    chars.split(SLICE).every(isEdgeString)
-  )
+const testString = function (chars) {
+  return SLICE_REGEXP.test(chars)
 }
 
-const isEdgeString = function (chars) {
-  return chars === DEFAULT_EDGE_STRING || isIndexString(chars)
-}
+const SLICE_REGEXP = /^(-?\d+)?:(-?\d+)?$/u
 
 // Parse a string into a token
 const parse = function (chars) {