Skip to content

Commit

Permalink
✅ Add some more checks on stringMatching
Browse files Browse the repository at this point in the history
  • Loading branch information
dubzzz committed May 30, 2023
1 parent f917e5c commit 08ed633
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 70 deletions.
@@ -1,5 +1,8 @@
import { safeIndexOf } from '../../../utils/globals';
import { TokenizerBlockMode, readFrom } from './ReadRegex';

const safeStringFromCodePoint = String.fromCodePoint;

/**
* Pop the last pushed token and return it,
* Throw if unable to pop it.
Expand Down Expand Up @@ -148,7 +151,7 @@ function blockToCharToken(block: string): CharRegexToken {
case 'x': {
const allDigits = block.substring(2);
const codePoint = Number.parseInt(allDigits, 16);
const symbol = String.fromCodePoint(codePoint);
const symbol = safeStringFromCodePoint(codePoint);
return { type: 'Char', kind: 'hex', symbol, value: block, codePoint };
}
case 'u': {
Expand All @@ -157,7 +160,7 @@ function blockToCharToken(block: string): CharRegexToken {
}
const allDigits = block[2] === '{' ? block.substring(3, block.length - 1) : block.substring(2);
const codePoint = Number.parseInt(allDigits, 16);
const symbol = String.fromCodePoint(codePoint);
const symbol = safeStringFromCodePoint(codePoint);
return { type: 'Char', kind: 'unicode', symbol, value: block, codePoint };
}

Expand Down Expand Up @@ -193,7 +196,7 @@ function blockToCharToken(block: string): CharRegexToken {
if (isDigit(next)) {
const allDigits = block.substring(1);
const codePoint = Number(allDigits);
const symbol = String.fromCodePoint(codePoint);
const symbol = safeStringFromCodePoint(codePoint);
return { type: 'Char', kind: 'decimal', symbol, value: block, codePoint };
}
const char = block.substring(1); // TODO - Properly handle unicode
Expand Down Expand Up @@ -381,7 +384,7 @@ function pushTokens(tokens: RegexToken[], regexSource: string, unicodeMode: bool
* Build the AST corresponding to the passed instance of RegExp
*/
export function tokenizeRegex(regex: RegExp): RegexToken {
const unicodeMode = regex.flags.includes('u');
const unicodeMode = safeIndexOf([...regex.flags], 'u') !== -1;
const regexSource = regex.source;
const tokens: RegexToken[] = [];
pushTokens(tokens, regexSource, unicodeMode);
Expand Down
37 changes: 22 additions & 15 deletions packages/fast-check/src/arbitrary/stringMatching.ts
@@ -1,4 +1,7 @@
import { Arbitrary } from '../check/arbitrary/definition/Arbitrary';
import { safeEvery, safeJoin } from '../utils/globals';
import { Error, safeIndexOf, safeMap } from '../utils/globals';
import { stringify } from '../utils/stringify';
import { SizeForArbitrary } from './_internals/helpers/MaxLengthFromMinLength';
import { tokenizeRegex, RegexToken } from './_internals/helpers/TokenizeRegex';
import { char } from './char';
Expand All @@ -9,6 +12,8 @@ import { oneof } from './oneof';
import { stringOf } from './stringOf';
import { tuple } from './tuple';

const safeStringFromCodePoint = String.fromCodePoint;

/**
* Constraints to be applied on the arbitrary {@link stringMatching}
* @remarks Since 3.10.0
Expand All @@ -24,15 +29,15 @@ export type StringMatchingConstraints = {

// Some predefined chars or groups of chars
// https://www.w3schools.com/jsref/jsref_regexp_whitespace.asp
const wordChars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
const digitChars = '0123456789';
const spaceChars = ' \t\r\n\v\f';
const newLineAndTerminatorChars = '\r\n\x1E\x15';
const wordChars = [...'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'];
const digitChars = [...'0123456789'];
const spaceChars = [...' \t\r\n\v\f'];
const newLineAndTerminatorChars = [...'\r\n\x1E\x15'];

const defaultChar = char();

function raiseUnsupportedASTNode(astNode: never): Error {
return new Error(`Unsupported AST node! Received: ${JSON.stringify(astNode)}`);
return new Error(`Unsupported AST node! Received: ${stringify(astNode)}`);
}

/**
Expand All @@ -48,32 +53,32 @@ function toMatchingArbitrary(astNode: RegexToken, constraints: StringMatchingCon
return constantFrom(...wordChars);
}
case '\\W': {
return defaultChar.filter((c) => !wordChars.includes(c));
return defaultChar.filter((c) => safeIndexOf(wordChars, c) === -1);
}
case '\\d': {
return constantFrom(...digitChars);
}
case '\\D': {
return defaultChar.filter((c) => !digitChars.includes(c));
return defaultChar.filter((c) => safeIndexOf(digitChars, c) === -1);
}
case '\\s': {
return constantFrom(...spaceChars);
}

case '\\S': {
return defaultChar.filter((c) => !spaceChars.includes(c));
return defaultChar.filter((c) => safeIndexOf(spaceChars, c) === -1);
}
case '\\b':
case '\\B': {
throw new Error(`Meta character ${astNode.value} not implemented yet!`);
}
case '.': {
return defaultChar.filter((c) => !newLineAndTerminatorChars.includes(c));
return defaultChar.filter((c) => safeIndexOf(newLineAndTerminatorChars, c) === -1);
}
}
}
if (astNode.symbol === undefined) {
throw new Error(`Unexpected undefined symbol received for non-meta Char! Received: ${JSON.stringify(astNode)}`);
throw new Error(`Unexpected undefined symbol received for non-meta Char! Received: ${stringify(astNode)}`);
}
return constant(astNode.symbol);
}
Expand Down Expand Up @@ -106,19 +111,21 @@ function toMatchingArbitrary(astNode: RegexToken, constraints: StringMatchingCon
}
case 'Alternative': {
// TODO - No unmap implemented yet!
return tuple(...astNode.expressions.map((n) => toMatchingArbitrary(n, constraints))).map((vs) => vs.join(''));
return tuple(...safeMap(astNode.expressions, (n) => toMatchingArbitrary(n, constraints))).map((vs) =>
safeJoin(vs, '')
);
}
case 'CharacterClass':
if (astNode.negative) {
const childrenArbitraries = astNode.expressions.map((n) => toMatchingArbitrary(n, constraints));
return defaultChar.filter((c) => childrenArbitraries.every((arb) => !arb.canShrinkWithoutContext(c)));
const childrenArbitraries = safeMap(astNode.expressions, (n) => toMatchingArbitrary(n, constraints));
return defaultChar.filter((c) => safeEvery(childrenArbitraries, (arb) => !arb.canShrinkWithoutContext(c)));
}
return oneof(...astNode.expressions.map((n) => toMatchingArbitrary(n, constraints)));
return oneof(...safeMap(astNode.expressions, (n) => toMatchingArbitrary(n, constraints)));
case 'ClassRange': {
const min = astNode.from.codePoint;
const max = astNode.to.codePoint;
return integer({ min, max }).map(
(n) => String.fromCodePoint(n),
(n) => safeStringFromCodePoint(n),
(c) => {
if (typeof c !== 'string') throw new Error('Invalid type');
if ([...c].length !== 1) throw new Error('Invalid length');
Expand Down
14 changes: 14 additions & 0 deletions packages/fast-check/src/utils/globals.ts
Expand Up @@ -83,6 +83,7 @@ const untouchedPop = Array.prototype.pop;
const untouchedSplice: (start: number, deleteCount?: number | undefined) => any[] = Array.prototype.splice;
const untouchedSlice = Array.prototype.slice;
const untouchedSort = Array.prototype.sort;
const untouchedEvery = Array.prototype.every;
function extractForEach(instance: unknown[]) {
try {
return instance.forEach;
Expand Down Expand Up @@ -153,6 +154,13 @@ function extractSort(instance: unknown[]) {
return undefined;
}
}
function extractEvery(instance: unknown[]) {
try {
return instance.every;
} catch (err) {
return undefined;
}
}
export function safeForEach<T>(instance: T[], fn: (value: T, index: number, array: T[]) => void): void {
if (extractForEach(instance) === untouchedForEach) {
return instance.forEach(fn);
Expand Down Expand Up @@ -219,6 +227,12 @@ export function safeSort<T>(instance: T[], ...args: [compareFn?: ((a: T, b: T) =
}
return safeApply(untouchedSort, instance, args);
}
export function safeEvery<T>(instance: T[], ...args: [predicate: (value: T) => boolean]): boolean {
if (extractEvery(instance) === untouchedEvery) {
return instance.every(...args);
}
return safeApply(untouchedEvery, instance, args);
}

// Date

Expand Down
8 changes: 8 additions & 0 deletions packages/fast-check/test/e2e/NoRegression.spec.ts
Expand Up @@ -148,6 +148,14 @@ describe(`NoRegression`, () => {
)
).toThrowErrorMatchingSnapshot();
});
it('stringMatching', () => {
expect(() =>
fc.assert(
fc.property(fc.stringMatching(/(^|\s)a+[^a][b-eB-E]+[^b-eB-E](\s|$)/), (v) => testFunc(v)),
settings
)
).toThrowErrorMatchingSnapshot();
});
it('unicodeString', () => {
expect(() =>
fc.assert(
Expand Down
4 changes: 4 additions & 0 deletions packages/fast-check/test/e2e/Poisoning.spec.ts
Expand Up @@ -2,6 +2,9 @@ import { restoreGlobals } from '@fast-check/poisoning';
import * as fc from '../../src/fast-check';
import { seed } from './seed';

// Building the matcher in a polluted context is not working for now
const preBuiltStringMatching = fc.stringMatching(/(^|\s)[0-9a-f]{8}-(\w{4})[^abc][^a-u]\D+(\s|$)/);

describe(`Poisoning (seed: ${seed})`, () => {
it.each<{
name: string;
Expand Down Expand Up @@ -44,6 +47,7 @@ describe(`Poisoning (seed: ${seed})`, () => {
{ name: 'string16bits', arbitraryBuilder: () => fc.string16bits() },
{ name: 'fullUnicodeString', arbitraryBuilder: () => fc.fullUnicodeString() },
{ name: 'stringOf', arbitraryBuilder: () => fc.stringOf(fc.char()) },
{ name: 'stringMatching', arbitraryBuilder: () => preBuiltStringMatching },
// : More specific strings
// related to fc.double: pure-rand is not resilient to prototype poisoning occuring on Array
//{ name: 'json', arbitraryBuilder: () => fc.json() },
Expand Down
102 changes: 61 additions & 41 deletions packages/fast-check/test/e2e/__snapshots__/NoRegression.spec.ts.snap
Expand Up @@ -982,47 +982,6 @@ Execution summary:
. . . . . . . . √ ["~"]"
`;
exports[`NoRegression gen 1`] = `
"Property failed after 1 tests
{ seed: 42, path: "0:0:1:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0", endOnFailure: true }
Counterexample: [[0,-1]]
Shrunk 28 time(s)
Got error: Property failed by returning false
Execution summary:
× [[9,-132539150]]
. × [[0,-132539150]]
. . √ [[0,0]]
. . × [[0,-66269575]]
. . . × [[0,-33134788]]
. . . . × [[0,-16567394]]
. . . . . × [[0,-8283697]]
. . . . . . × [[0,-4141849]]
. . . . . . . × [[0,-2070925]]
. . . . . . . . × [[0,-1035463]]
. . . . . . . . . × [[0,-517732]]
. . . . . . . . . . × [[0,-258866]]
. . . . . . . . . . . × [[0,-129433]]
. . . . . . . . . . . . × [[0,-64717]]
. . . . . . . . . . . . . × [[0,-32359]]
. . . . . . . . . . . . . . × [[0,-16180]]
. . . . . . . . . . . . . . . × [[0,-8090]]
. . . . . . . . . . . . . . . . × [[0,-4045]]
. . . . . . . . . . . . . . . . . × [[0,-2023]]
. . . . . . . . . . . . . . . . . . × [[0,-1012]]
. . . . . . . . . . . . . . . . . . . × [[0,-506]]
. . . . . . . . . . . . . . . . . . . . × [[0,-253]]
. . . . . . . . . . . . . . . . . . . . . × [[0,-127]]
. . . . . . . . . . . . . . . . . . . . . . × [[0,-64]]
. . . . . . . . . . . . . . . . . . . . . . . × [[0,-32]]
. . . . . . . . . . . . . . . . . . . . . . . . × [[0,-16]]
. . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-8]]
. . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-4]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-2]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-1]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . √ [[0,0]]"
`;
exports[`NoRegression base64String 1`] = `
"Property failed after 3 tests
{ seed: 42, path: "2:3:5:5", endOnFailure: true }
Expand Down Expand Up @@ -1957,6 +1916,47 @@ Execution summary:
}]"
`;
exports[`NoRegression gen 1`] = `
"Property failed after 1 tests
{ seed: 42, path: "0:0:1:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0", endOnFailure: true }
Counterexample: [[0,-1]]
Shrunk 28 time(s)
Got error: Property failed by returning false
Execution summary:
× [[9,-132539150]]
. × [[0,-132539150]]
. . √ [[0,0]]
. . × [[0,-66269575]]
. . . × [[0,-33134788]]
. . . . × [[0,-16567394]]
. . . . . × [[0,-8283697]]
. . . . . . × [[0,-4141849]]
. . . . . . . × [[0,-2070925]]
. . . . . . . . × [[0,-1035463]]
. . . . . . . . . × [[0,-517732]]
. . . . . . . . . . × [[0,-258866]]
. . . . . . . . . . . × [[0,-129433]]
. . . . . . . . . . . . × [[0,-64717]]
. . . . . . . . . . . . . × [[0,-32359]]
. . . . . . . . . . . . . . × [[0,-16180]]
. . . . . . . . . . . . . . . × [[0,-8090]]
. . . . . . . . . . . . . . . . × [[0,-4045]]
. . . . . . . . . . . . . . . . . × [[0,-2023]]
. . . . . . . . . . . . . . . . . . × [[0,-1012]]
. . . . . . . . . . . . . . . . . . . × [[0,-506]]
. . . . . . . . . . . . . . . . . . . . × [[0,-253]]
. . . . . . . . . . . . . . . . . . . . . × [[0,-127]]
. . . . . . . . . . . . . . . . . . . . . . × [[0,-64]]
. . . . . . . . . . . . . . . . . . . . . . . × [[0,-32]]
. . . . . . . . . . . . . . . . . . . . . . . . × [[0,-16]]
. . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-8]]
. . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-4]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-2]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-1]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . √ [[0,0]]"
`;
exports[`NoRegression hexaString 1`] = `
"Property failed after 6 tests
{ seed: 42, path: "5:1:1:10:1:5:0", endOnFailure: true }
Expand Down Expand Up @@ -3892,6 +3892,26 @@ Execution summary:
. . √ ["CB"]"
`;
exports[`NoRegression stringMatching 1`] = `
"Property failed after 1 tests
{ seed: 42, path: "0:1:1:1:1:0", endOnFailure: true }
Counterexample: ["a B "]
Shrunk 5 time(s)
Got error: Property failed by returning false
Execution summary:
× ["aa!cBcBx\\t"]
. √ ["a!cBcBx\\t"]
. × ["aa cBcBx\\t"]
. . √ ["a cBcBx\\t"]
. . × ["aa Bx\\t"]
. . . √ ["a Bx\\t"]
. . . × ["aa B \\t"]
. . . . √ ["a B \\t"]
. . . . × ["aa B "]
. . . . . × ["a B "]"
`;
exports[`NoRegression stringOf 1`] = `
"Property failed after 2 tests
{ seed: 42, path: "1:1:0", endOnFailure: true }
Expand Down
26 changes: 16 additions & 10 deletions website/docs/core-blocks/arbitraries/combiners/string.md
Expand Up @@ -65,19 +65,25 @@ String matching the passed regex.
**Usages:**

```js
fc.stringMatching(/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/);
// Note: Buggy IPv4 regex also matching 999.999.999.999
// Examples of generated values: "422.299.995.55", "2.1.3.6", "0.47.62.53", "45.34.48.3", "768.2.602.560"…
fc.stringMatching(/html|php|css|java(script)?/);
// Note: The regex does not contain ^ or $ assertions, so extra text could be added before and after the match
// Examples of generated values: "css", "html", "java", "php", "javascript"…

fc.stringMatching(/[0-9a-f]{8}-[0-9a-f]{4}-[12345][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/);
// Note: regex matching UUID
fc.stringMatching(/^[0-9a-f]{8}-[0-9a-f]{4}-[12345][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/);
// Note: Regex matching UUID
// Examples of generated values:
// • "93a1ec6d-7f8c-3ace-8ea7-ac73d142269b"
// • "11b300d1-dd9e-3ae1-abe9-bdfd61a2e1d0"
// • "dde3a7b7-3afb-5c23-bacb-7cc7b80ba93a"
// • "eb1c3b5f-d8b9-5907-a4bc-aacda161bdeb"
// • "6f3dcbaa-de1c-56ac-9eac-0d1ebf77ba13"
// • "fd606aa1-b53b-1c7b-9e2f-1e2c1ff1b8e9"
// • "e74cec0b-bd5a-4dba-96a9-edbfa9c1a198"
// • "fcccdcf3-908e-5179-adce-7ebae72c12dc"
// • "0eab1fab-5bc2-336c-9ccb-a3fecbe72ee2"
// • "bb3073ee-2283-2538-ba0c-1b976ebb9610"
// • …

fc.stringMatching(
/^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/
);
// Note: Regex matching IP v4, we rather recommend you to rely on `fc.ipV4()`
// Examples of generated values: "003.2.210.06", "253.17.60.12", "250.19.229.08", "3.250.26.253", "200.00.0.254"…
```

Resources: [API reference](https://fast-check.dev/api-reference/functions/stringMatching.html).
Expand Down

0 comments on commit 08ed633

Please sign in to comment.