Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✅ Add some more checks on stringMatching #3931

Merged
merged 1 commit into from May 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -1,5 +1,8 @@
import { safeIndexOf } from '../../../utils/globals';
import { TokenizerBlockMode, readFrom } from './ReadRegex';

const safeStringFromCodePoint = String.fromCodePoint;

/**
* Pop the last pushed token and return it,
* Throw if unable to pop it.
Expand Down Expand Up @@ -148,7 +151,7 @@ function blockToCharToken(block: string): CharRegexToken {
case 'x': {
const allDigits = block.substring(2);
const codePoint = Number.parseInt(allDigits, 16);
const symbol = String.fromCodePoint(codePoint);
const symbol = safeStringFromCodePoint(codePoint);
return { type: 'Char', kind: 'hex', symbol, value: block, codePoint };
}
case 'u': {
Expand All @@ -157,7 +160,7 @@ function blockToCharToken(block: string): CharRegexToken {
}
const allDigits = block[2] === '{' ? block.substring(3, block.length - 1) : block.substring(2);
const codePoint = Number.parseInt(allDigits, 16);
const symbol = String.fromCodePoint(codePoint);
const symbol = safeStringFromCodePoint(codePoint);
return { type: 'Char', kind: 'unicode', symbol, value: block, codePoint };
}

Expand Down Expand Up @@ -193,7 +196,7 @@ function blockToCharToken(block: string): CharRegexToken {
if (isDigit(next)) {
const allDigits = block.substring(1);
const codePoint = Number(allDigits);
const symbol = String.fromCodePoint(codePoint);
const symbol = safeStringFromCodePoint(codePoint);
return { type: 'Char', kind: 'decimal', symbol, value: block, codePoint };
}
const char = block.substring(1); // TODO - Properly handle unicode
Expand Down Expand Up @@ -381,7 +384,7 @@ function pushTokens(tokens: RegexToken[], regexSource: string, unicodeMode: bool
* Build the AST corresponding to the passed instance of RegExp
*/
export function tokenizeRegex(regex: RegExp): RegexToken {
const unicodeMode = regex.flags.includes('u');
const unicodeMode = safeIndexOf([...regex.flags], 'u') !== -1;
const regexSource = regex.source;
const tokens: RegexToken[] = [];
pushTokens(tokens, regexSource, unicodeMode);
Expand Down
37 changes: 22 additions & 15 deletions packages/fast-check/src/arbitrary/stringMatching.ts
@@ -1,4 +1,7 @@
import { Arbitrary } from '../check/arbitrary/definition/Arbitrary';
import { safeEvery, safeJoin } from '../utils/globals';
import { Error, safeIndexOf, safeMap } from '../utils/globals';
import { stringify } from '../utils/stringify';
import { SizeForArbitrary } from './_internals/helpers/MaxLengthFromMinLength';
import { tokenizeRegex, RegexToken } from './_internals/helpers/TokenizeRegex';
import { char } from './char';
Expand All @@ -9,6 +12,8 @@ import { oneof } from './oneof';
import { stringOf } from './stringOf';
import { tuple } from './tuple';

const safeStringFromCodePoint = String.fromCodePoint;

/**
* Constraints to be applied on the arbitrary {@link stringMatching}
* @remarks Since 3.10.0
Expand All @@ -24,15 +29,15 @@ export type StringMatchingConstraints = {

// Some predefined chars or groups of chars
// https://www.w3schools.com/jsref/jsref_regexp_whitespace.asp
const wordChars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
const digitChars = '0123456789';
const spaceChars = ' \t\r\n\v\f';
const newLineAndTerminatorChars = '\r\n\x1E\x15';
const wordChars = [...'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'];
const digitChars = [...'0123456789'];
const spaceChars = [...' \t\r\n\v\f'];
const newLineAndTerminatorChars = [...'\r\n\x1E\x15'];

const defaultChar = char();

function raiseUnsupportedASTNode(astNode: never): Error {
return new Error(`Unsupported AST node! Received: ${JSON.stringify(astNode)}`);
return new Error(`Unsupported AST node! Received: ${stringify(astNode)}`);
}

/**
Expand All @@ -48,32 +53,32 @@ function toMatchingArbitrary(astNode: RegexToken, constraints: StringMatchingCon
return constantFrom(...wordChars);
}
case '\\W': {
return defaultChar.filter((c) => !wordChars.includes(c));
return defaultChar.filter((c) => safeIndexOf(wordChars, c) === -1);
}
case '\\d': {
return constantFrom(...digitChars);
}
case '\\D': {
return defaultChar.filter((c) => !digitChars.includes(c));
return defaultChar.filter((c) => safeIndexOf(digitChars, c) === -1);
}
case '\\s': {
return constantFrom(...spaceChars);
}

case '\\S': {
return defaultChar.filter((c) => !spaceChars.includes(c));
return defaultChar.filter((c) => safeIndexOf(spaceChars, c) === -1);
}
case '\\b':
case '\\B': {
throw new Error(`Meta character ${astNode.value} not implemented yet!`);
}
case '.': {
return defaultChar.filter((c) => !newLineAndTerminatorChars.includes(c));
return defaultChar.filter((c) => safeIndexOf(newLineAndTerminatorChars, c) === -1);
}
}
}
if (astNode.symbol === undefined) {
throw new Error(`Unexpected undefined symbol received for non-meta Char! Received: ${JSON.stringify(astNode)}`);
throw new Error(`Unexpected undefined symbol received for non-meta Char! Received: ${stringify(astNode)}`);
}
return constant(astNode.symbol);
}
Expand Down Expand Up @@ -106,19 +111,21 @@ function toMatchingArbitrary(astNode: RegexToken, constraints: StringMatchingCon
}
case 'Alternative': {
// TODO - No unmap implemented yet!
return tuple(...astNode.expressions.map((n) => toMatchingArbitrary(n, constraints))).map((vs) => vs.join(''));
return tuple(...safeMap(astNode.expressions, (n) => toMatchingArbitrary(n, constraints))).map((vs) =>
safeJoin(vs, '')
);
}
case 'CharacterClass':
if (astNode.negative) {
const childrenArbitraries = astNode.expressions.map((n) => toMatchingArbitrary(n, constraints));
return defaultChar.filter((c) => childrenArbitraries.every((arb) => !arb.canShrinkWithoutContext(c)));
const childrenArbitraries = safeMap(astNode.expressions, (n) => toMatchingArbitrary(n, constraints));
return defaultChar.filter((c) => safeEvery(childrenArbitraries, (arb) => !arb.canShrinkWithoutContext(c)));
}
return oneof(...astNode.expressions.map((n) => toMatchingArbitrary(n, constraints)));
return oneof(...safeMap(astNode.expressions, (n) => toMatchingArbitrary(n, constraints)));
case 'ClassRange': {
const min = astNode.from.codePoint;
const max = astNode.to.codePoint;
return integer({ min, max }).map(
(n) => String.fromCodePoint(n),
(n) => safeStringFromCodePoint(n),
(c) => {
if (typeof c !== 'string') throw new Error('Invalid type');
if ([...c].length !== 1) throw new Error('Invalid length');
Expand Down
14 changes: 14 additions & 0 deletions packages/fast-check/src/utils/globals.ts
Expand Up @@ -83,6 +83,7 @@ const untouchedPop = Array.prototype.pop;
const untouchedSplice: (start: number, deleteCount?: number | undefined) => any[] = Array.prototype.splice;
const untouchedSlice = Array.prototype.slice;
const untouchedSort = Array.prototype.sort;
const untouchedEvery = Array.prototype.every;
function extractForEach(instance: unknown[]) {
try {
return instance.forEach;
Expand Down Expand Up @@ -153,6 +154,13 @@ function extractSort(instance: unknown[]) {
return undefined;
}
}
function extractEvery(instance: unknown[]) {
try {
return instance.every;
} catch (err) {
return undefined;
}
}
export function safeForEach<T>(instance: T[], fn: (value: T, index: number, array: T[]) => void): void {
if (extractForEach(instance) === untouchedForEach) {
return instance.forEach(fn);
Expand Down Expand Up @@ -219,6 +227,12 @@ export function safeSort<T>(instance: T[], ...args: [compareFn?: ((a: T, b: T) =
}
return safeApply(untouchedSort, instance, args);
}
export function safeEvery<T>(instance: T[], ...args: [predicate: (value: T) => boolean]): boolean {
if (extractEvery(instance) === untouchedEvery) {
return instance.every(...args);
}
return safeApply(untouchedEvery, instance, args);
}

// Date

Expand Down
8 changes: 8 additions & 0 deletions packages/fast-check/test/e2e/NoRegression.spec.ts
Expand Up @@ -148,6 +148,14 @@ describe(`NoRegression`, () => {
)
).toThrowErrorMatchingSnapshot();
});
it('stringMatching', () => {
expect(() =>
fc.assert(
fc.property(fc.stringMatching(/(^|\s)a+[^a][b-eB-E]+[^b-eB-E](\s|$)/), (v) => testFunc(v)),
settings
)
).toThrowErrorMatchingSnapshot();
});
it('unicodeString', () => {
expect(() =>
fc.assert(
Expand Down
4 changes: 4 additions & 0 deletions packages/fast-check/test/e2e/Poisoning.spec.ts
Expand Up @@ -2,6 +2,9 @@ import { restoreGlobals } from '@fast-check/poisoning';
import * as fc from '../../src/fast-check';
import { seed } from './seed';

// Building the matcher in a polluted context is not working for now
const preBuiltStringMatching = fc.stringMatching(/(^|\s)[0-9a-f]{8}-(\w{4})[^abc][^a-u]\D+(\s|$)/);

describe(`Poisoning (seed: ${seed})`, () => {
it.each<{
name: string;
Expand Down Expand Up @@ -44,6 +47,7 @@ describe(`Poisoning (seed: ${seed})`, () => {
{ name: 'string16bits', arbitraryBuilder: () => fc.string16bits() },
{ name: 'fullUnicodeString', arbitraryBuilder: () => fc.fullUnicodeString() },
{ name: 'stringOf', arbitraryBuilder: () => fc.stringOf(fc.char()) },
{ name: 'stringMatching', arbitraryBuilder: () => preBuiltStringMatching },
// : More specific strings
// related to fc.double: pure-rand is not resilient to prototype poisoning occuring on Array
//{ name: 'json', arbitraryBuilder: () => fc.json() },
Expand Down
102 changes: 61 additions & 41 deletions packages/fast-check/test/e2e/__snapshots__/NoRegression.spec.ts.snap
Expand Up @@ -982,47 +982,6 @@ Execution summary:
. . . . . . . . √ ["~"]"
`;

exports[`NoRegression gen 1`] = `
"Property failed after 1 tests
{ seed: 42, path: "0:0:1:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0", endOnFailure: true }
Counterexample: [[0,-1]]
Shrunk 28 time(s)
Got error: Property failed by returning false

Execution summary:
× [[9,-132539150]]
. × [[0,-132539150]]
. . √ [[0,0]]
. . × [[0,-66269575]]
. . . × [[0,-33134788]]
. . . . × [[0,-16567394]]
. . . . . × [[0,-8283697]]
. . . . . . × [[0,-4141849]]
. . . . . . . × [[0,-2070925]]
. . . . . . . . × [[0,-1035463]]
. . . . . . . . . × [[0,-517732]]
. . . . . . . . . . × [[0,-258866]]
. . . . . . . . . . . × [[0,-129433]]
. . . . . . . . . . . . × [[0,-64717]]
. . . . . . . . . . . . . × [[0,-32359]]
. . . . . . . . . . . . . . × [[0,-16180]]
. . . . . . . . . . . . . . . × [[0,-8090]]
. . . . . . . . . . . . . . . . × [[0,-4045]]
. . . . . . . . . . . . . . . . . × [[0,-2023]]
. . . . . . . . . . . . . . . . . . × [[0,-1012]]
. . . . . . . . . . . . . . . . . . . × [[0,-506]]
. . . . . . . . . . . . . . . . . . . . × [[0,-253]]
. . . . . . . . . . . . . . . . . . . . . × [[0,-127]]
. . . . . . . . . . . . . . . . . . . . . . × [[0,-64]]
. . . . . . . . . . . . . . . . . . . . . . . × [[0,-32]]
. . . . . . . . . . . . . . . . . . . . . . . . × [[0,-16]]
. . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-8]]
. . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-4]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-2]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-1]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . √ [[0,0]]"
`;

exports[`NoRegression base64String 1`] = `
"Property failed after 3 tests
{ seed: 42, path: "2:3:5:5", endOnFailure: true }
Expand Down Expand Up @@ -1957,6 +1916,47 @@ Execution summary:
}]"
`;

exports[`NoRegression gen 1`] = `
"Property failed after 1 tests
{ seed: 42, path: "0:0:1:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0", endOnFailure: true }
Counterexample: [[0,-1]]
Shrunk 28 time(s)
Got error: Property failed by returning false

Execution summary:
× [[9,-132539150]]
. × [[0,-132539150]]
. . √ [[0,0]]
. . × [[0,-66269575]]
. . . × [[0,-33134788]]
. . . . × [[0,-16567394]]
. . . . . × [[0,-8283697]]
. . . . . . × [[0,-4141849]]
. . . . . . . × [[0,-2070925]]
. . . . . . . . × [[0,-1035463]]
. . . . . . . . . × [[0,-517732]]
. . . . . . . . . . × [[0,-258866]]
. . . . . . . . . . . × [[0,-129433]]
. . . . . . . . . . . . × [[0,-64717]]
. . . . . . . . . . . . . × [[0,-32359]]
. . . . . . . . . . . . . . × [[0,-16180]]
. . . . . . . . . . . . . . . × [[0,-8090]]
. . . . . . . . . . . . . . . . × [[0,-4045]]
. . . . . . . . . . . . . . . . . × [[0,-2023]]
. . . . . . . . . . . . . . . . . . × [[0,-1012]]
. . . . . . . . . . . . . . . . . . . × [[0,-506]]
. . . . . . . . . . . . . . . . . . . . × [[0,-253]]
. . . . . . . . . . . . . . . . . . . . . × [[0,-127]]
. . . . . . . . . . . . . . . . . . . . . . × [[0,-64]]
. . . . . . . . . . . . . . . . . . . . . . . × [[0,-32]]
. . . . . . . . . . . . . . . . . . . . . . . . × [[0,-16]]
. . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-8]]
. . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-4]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-2]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . . × [[0,-1]]
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . √ [[0,0]]"
`;

exports[`NoRegression hexaString 1`] = `
"Property failed after 6 tests
{ seed: 42, path: "5:1:1:10:1:5:0", endOnFailure: true }
Expand Down Expand Up @@ -3892,6 +3892,26 @@ Execution summary:
. . √ ["CB"]"
`;

exports[`NoRegression stringMatching 1`] = `
"Property failed after 1 tests
{ seed: 42, path: "0:1:1:1:1:0", endOnFailure: true }
Counterexample: ["a B "]
Shrunk 5 time(s)
Got error: Property failed by returning false

Execution summary:
× ["aa!cBcBx\\t"]
. √ ["a!cBcBx\\t"]
. × ["aa cBcBx\\t"]
. . √ ["a cBcBx\\t"]
. . × ["aa Bx\\t"]
. . . √ ["a Bx\\t"]
. . . × ["aa B \\t"]
. . . . √ ["a B \\t"]
. . . . × ["aa B "]
. . . . . × ["a B "]"
`;

exports[`NoRegression stringOf 1`] = `
"Property failed after 2 tests
{ seed: 42, path: "1:1:0", endOnFailure: true }
Expand Down
26 changes: 16 additions & 10 deletions website/docs/core-blocks/arbitraries/combiners/string.md
Expand Up @@ -65,19 +65,25 @@ String matching the passed regex.
**Usages:**

```js
fc.stringMatching(/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/);
// Note: Buggy IPv4 regex also matching 999.999.999.999
// Examples of generated values: "422.299.995.55", "2.1.3.6", "0.47.62.53", "45.34.48.3", "768.2.602.560"…
fc.stringMatching(/html|php|css|java(script)?/);
// Note: The regex does not contain ^ or $ assertions, so extra text could be added before and after the match
// Examples of generated values: "css", "html", "java", "php", "javascript"…

fc.stringMatching(/[0-9a-f]{8}-[0-9a-f]{4}-[12345][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/);
// Note: regex matching UUID
fc.stringMatching(/^[0-9a-f]{8}-[0-9a-f]{4}-[12345][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/);
// Note: Regex matching UUID
// Examples of generated values:
// • "93a1ec6d-7f8c-3ace-8ea7-ac73d142269b"
// • "11b300d1-dd9e-3ae1-abe9-bdfd61a2e1d0"
// • "dde3a7b7-3afb-5c23-bacb-7cc7b80ba93a"
// • "eb1c3b5f-d8b9-5907-a4bc-aacda161bdeb"
// • "6f3dcbaa-de1c-56ac-9eac-0d1ebf77ba13"
// • "fd606aa1-b53b-1c7b-9e2f-1e2c1ff1b8e9"
// • "e74cec0b-bd5a-4dba-96a9-edbfa9c1a198"
// • "fcccdcf3-908e-5179-adce-7ebae72c12dc"
// • "0eab1fab-5bc2-336c-9ccb-a3fecbe72ee2"
// • "bb3073ee-2283-2538-ba0c-1b976ebb9610"
// • …

fc.stringMatching(
/^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/
);
// Note: Regex matching IP v4, we rather recommend you to rely on `fc.ipV4()`
// Examples of generated values: "003.2.210.06", "253.17.60.12", "250.19.229.08", "3.250.26.253", "200.00.0.254"…
```

Resources: [API reference](https://fast-check.dev/api-reference/functions/stringMatching.html).
Expand Down