From 7298411830200cf7ed14051a5b72a67e2f766b90 Mon Sep 17 00:00:00 2001 From: Nicolas DUBIEN Date: Thu, 11 Apr 2019 00:28:30 +0200 Subject: [PATCH] Implement Url arbitraries Fixes #302 --- documentation/Arbitraries.md | 6 + src/check/arbitrary/HostArbitrary.ts | 41 ++++++ src/check/arbitrary/WebArbitrary.ts | 125 ++++++++++++++++++ .../helpers/SpecificCharacterRange.ts | 43 ++++++ src/fast-check-default.ts | 23 +++- test/e2e/arbitraries/WebArbitrary.spec.ts | 64 +++++++++ .../check/arbitrary/HostArbitrary.spec.ts | 30 +++++ .../unit/check/arbitrary/WebArbitrary.spec.ts | 43 ++++++ 8 files changed, 373 insertions(+), 2 deletions(-) create mode 100644 src/check/arbitrary/HostArbitrary.ts create mode 100644 src/check/arbitrary/WebArbitrary.ts create mode 100644 src/check/arbitrary/helpers/SpecificCharacterRange.ts create mode 100644 test/e2e/arbitraries/WebArbitrary.spec.ts create mode 100644 test/unit/check/arbitrary/HostArbitrary.spec.ts create mode 100644 test/unit/check/arbitrary/WebArbitrary.spec.ts diff --git a/documentation/Arbitraries.md b/documentation/Arbitraries.md index c80a6afb956..abbda42fc39 100644 --- a/documentation/Arbitraries.md +++ b/documentation/Arbitraries.md @@ -70,6 +70,12 @@ More specific strings: - `fc.lorem()`, `fc.lorem(maxWordsCount: number)` or `fc.lorem(maxWordsCount: number, sentencesMode: boolean)` lorem ipsum strings. Generator can be configured by giving it a maximum number of characters by using `maxWordsCount` or switching the mode to sentences by setting `sentencesMode` to `true` in which case `maxWordsCount` is used to cap the number of sentences allowed - `fc.ipV4()` IP v4 strings - `fc.ipV6()` IP v6 strings +- `fc.domain()` Domain name with extension following RFC 1034, RFC 1123 and WHATWG URL Standard +- `fc.webAuthority()` Web authority following RFC 3986 +- `fc.webFragments()` Fragments to build an URI. Fragment is the optional part right after the # in an URI +- `fc.webQueryParameters()` Query parameters to build an URI. Fragment is the optional part right after the ? in an URI +- `fc.webSegment()` Web URL path segment +- `fc.webUrl()` Web URL following the specs specified by RFC 3986 and WHATWG URL Standard ## Combinors of arbitraries (:T) diff --git a/src/check/arbitrary/HostArbitrary.ts b/src/check/arbitrary/HostArbitrary.ts new file mode 100644 index 00000000000..f0424f23e0b --- /dev/null +++ b/src/check/arbitrary/HostArbitrary.ts @@ -0,0 +1,41 @@ +import { array } from './ArrayArbitrary'; +import { + buildAlphaNumericPercentArb, + buildLowerAlphaArb, + buildLowerAlphaNumericArb +} from './helpers/SpecificCharacterRange'; +import { option } from './OptionArbitrary'; +import { stringOf } from './StringArbitrary'; +import { tuple } from './TupleArbitrary'; + +/** @hidden */ +function subdomain() { + const alphaNumericArb = buildLowerAlphaNumericArb([]); + const alphaNumericHyphenArb = buildLowerAlphaNumericArb(['-']); + return tuple(alphaNumericArb, option(tuple(stringOf(alphaNumericHyphenArb), alphaNumericArb))) + .map(([f, d]) => (d === null ? f : `${f}${d[0]}${d[1]}`)) + .filter(d => d.length <= 63); +} + +/** + * For domains + * having an extension with at least two lowercase characters + * + * According to RFC 1034, RFC 1123 and WHATWG URL Standard + * - https://www.ietf.org/rfc/rfc1034.txt + * - https://www.ietf.org/rfc/rfc1123.txt + * - https://url.spec.whatwg.org/ + */ +export function domain() { + const alphaNumericArb = buildLowerAlphaArb([]); + const extensionArb = stringOf(alphaNumericArb, 2, 10); + return tuple(array(subdomain(), 1, 5), extensionArb) + .map(([mid, ext]) => `${mid.join('.')}.${ext}`) + .filter(d => d.length <= 255); +} + +/** @hidden */ +export function hostUserInfo() { + const others = ['-', '.', '_', '~', '!', '$', '&', "'", '(', ')', '*', '+', ',', ';', '=', ':']; + return stringOf(buildAlphaNumericPercentArb(others)); +} diff --git a/src/check/arbitrary/WebArbitrary.ts b/src/check/arbitrary/WebArbitrary.ts new file mode 100644 index 00000000000..aaa01ec9c92 --- /dev/null +++ b/src/check/arbitrary/WebArbitrary.ts @@ -0,0 +1,125 @@ +import { constant } from '../../fast-check-default'; +import { array } from './ArrayArbitrary'; +import { constantFrom } from './ConstantArbitrary'; +import { buildAlphaNumericPercentArb } from './helpers/SpecificCharacterRange'; +import { domain, hostUserInfo } from './HostArbitrary'; +import { nat } from './IntegerArbitrary'; +import { ipV4, ipV6 } from './IpArbitrary'; +import { oneof } from './OneOfArbitrary'; +import { option } from './OptionArbitrary'; +import { stringOf } from './StringArbitrary'; +import { tuple } from './TupleArbitrary'; + +export interface WebAuthorityConstraints { + /** Enable IPv4 in host */ + withIPv4?: boolean; + /** Enable IPv6 in host */ + withIPv6?: boolean; + /** Enable user information prefix */ + withUserInfo?: boolean; + /** Enable port suffix */ + withPort?: boolean; +} + +/** + * For web authority + * + * According to RFC 3986 - https://www.ietf.org/rfc/rfc3986.txt - `authority = [ userinfo "@" ] host [ ":" port ]` + * + * @param constraints + */ +export function webAuthority(constraints?: WebAuthorityConstraints) { + const c = constraints || {}; + const hostnameArbs = [domain()] + .concat(c.withIPv4 === true ? [ipV4()] : []) + .concat(c.withIPv6 === true ? [ipV6().map(ip => `[${ip}]`)] : []); + return tuple( + c.withUserInfo === true ? option(hostUserInfo()) : constant(null), + oneof(...hostnameArbs), + c.withPort === true ? option(nat(65536)) : constant(null) + ).map(([u, h, p]) => (u === null ? '' : `${u}@`) + h + (p === null ? '' : `:${p}`)); +} + +/** + * For internal segment of an URI (web included) + * + * According to RFC 3986 - https://www.ietf.org/rfc/rfc3986.txt + * + * eg.: In the url `https://github.com/dubzzz/fast-check/`, `dubzzz` and `fast-check` are segments + */ +export function webSegment() { + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + // segment = *pchar + const others = ['-', '.', '_', '~', '!', '$', '&', "'", '(', ')', '*', '+', ',', ';', '=', ':', '@']; + return stringOf(buildAlphaNumericPercentArb(others)); +} + +/** @hidden */ +function uriQueryOrFragment() { + // query = *( pchar / "/" / "?" ) + // fragment = *( pchar / "/" / "?" ) + const others = ['-', '.', '_', '~', '!', '$', '&', "'", '(', ')', '*', '+', ',', ';', '=', ':', '@', '/', '?']; + return stringOf(buildAlphaNumericPercentArb(others)); +} + +/** + * For query parameters of an URI (web included) + * + * According to RFC 3986 - https://www.ietf.org/rfc/rfc3986.txt + * + * eg.: In the url `https://domain/plop/?hello=1&world=2`, `?hello=1&world=2` are query parameters + */ +export function webQueryParameters() { + return uriQueryOrFragment(); +} + +/** + * For fragments of an URI (web included) + * + * According to RFC 3986 - https://www.ietf.org/rfc/rfc3986.txt + * + * eg.: In the url `https://domain/plop?page=1#hello=1&world=2`, `?hello=1&world=2` are query parameters + */ +export function webFragments() { + return uriQueryOrFragment(); +} + +export interface WebUrlConstraints { + /** Enforce specific schemes, eg.: http, https */ + validSchemes?: string[]; + /** Settings for {@see webAuthority} */ + authoritySettings?: WebAuthorityConstraints; + /** Enable query parameters in the generated url */ + withQueryParameters?: boolean; + /** Enable fragments in the generated url */ + withFragments?: boolean; +} + +/** + * For web url + * + * According to RFC 3986 and WHATWG URL Standard + * - https://www.ietf.org/rfc/rfc3986.txt + * - https://url.spec.whatwg.org/ + * + * @param constraints + */ +export function webUrl(constraints?: { + validSchemes?: string[]; + authoritySettings?: WebAuthorityConstraints; + withQueryParameters?: boolean; + withFragments?: boolean; +}) { + const c = constraints || {}; + const validSchemes = c.validSchemes || ['http', 'https']; + const schemeArb = constantFrom(...validSchemes); + const authorityArb = webAuthority(c.authoritySettings); + const pathArb = array(webSegment()).map(p => p.map(v => `/${v}`).join('')); + return tuple( + schemeArb, + authorityArb, + pathArb, + c.withQueryParameters === true ? option(uriQueryOrFragment()) : constant(null), + c.withFragments === true ? option(uriQueryOrFragment()) : constant(null) + ).map(([s, a, p, q, f]) => `${s}://${a}${p}${q === null ? '' : `?${q}`}${f === null ? '' : `#${f}`}`); +} diff --git a/src/check/arbitrary/helpers/SpecificCharacterRange.ts b/src/check/arbitrary/helpers/SpecificCharacterRange.ts new file mode 100644 index 00000000000..59d0bc3daf3 --- /dev/null +++ b/src/check/arbitrary/helpers/SpecificCharacterRange.ts @@ -0,0 +1,43 @@ +import { fullUnicode } from '../CharacterArbitrary'; +import { frequency } from '../FrequencyArbitrary'; +import { mapToConstant } from '../MapToConstantArbitrary'; + +/** @hidden */ +const lowerCaseMapper = { num: 26, build: (v: number) => String.fromCharCode(v + 0x61) }; + +/** @hidden */ +const upperCaseMapper = { num: 26, build: (v: number) => String.fromCharCode(v + 0x41) }; + +/** @hidden */ +const numericMapper = { num: 10, build: (v: number) => String.fromCharCode(v + 0x30) }; + +/** @hidden */ +const percentCharArb = fullUnicode().map(c => { + const encoded = encodeURIComponent(c); + return c !== encoded ? encoded : `%${c.charCodeAt(0).toString(16)}`; // always %xy / no %x or %xyz +}); + +/** @hidden */ +export const buildLowerAlphaArb = (others: string[]) => + mapToConstant(lowerCaseMapper, { num: others.length, build: v => others[v] }); + +/** @hidden */ +export const buildLowerAlphaNumericArb = (others: string[]) => + mapToConstant(lowerCaseMapper, numericMapper, { num: others.length, build: v => others[v] }); + +/** @hidden */ +export const buildAlphaNumericArb = (others: string[]) => + mapToConstant(lowerCaseMapper, upperCaseMapper, numericMapper, { num: others.length, build: v => others[v] }); + +/** @hidden */ +export const buildAlphaNumericPercentArb = (others: string[]) => + frequency( + { + weight: 10, + arbitrary: buildAlphaNumericArb(others) + }, + { + weight: 1, + arbitrary: percentCharArb + } + ); diff --git a/src/fast-check-default.ts b/src/fast-check-default.ts index e286dbd3b99..7b516786e3f 100644 --- a/src/fast-check-default.ts +++ b/src/fast-check-default.ts @@ -19,6 +19,7 @@ import { dictionary } from './check/arbitrary/DictionaryArbitrary'; import { double, float } from './check/arbitrary/FloatingPointArbitrary'; import { frequency } from './check/arbitrary/FrequencyArbitrary'; import { compareBooleanFunc, compareFunc, func } from './check/arbitrary/FunctionArbitrary'; +import { domain } from './check/arbitrary/HostArbitrary'; import { integer, maxSafeInteger, maxSafeNat, nat } from './check/arbitrary/IntegerArbitrary'; import { ipV4, ipV6 } from './check/arbitrary/IpArbitrary'; import { lorem } from './check/arbitrary/LoremArbitrary'; @@ -49,6 +50,15 @@ import { } from './check/arbitrary/StringArbitrary'; import { shuffledSubarray, subarray } from './check/arbitrary/SubarrayArbitrary'; import { genericTuple, tuple } from './check/arbitrary/TupleArbitrary'; +import { + webAuthority, + WebAuthorityConstraints, + webFragments, + webQueryParameters, + webSegment, + webUrl, + WebUrlConstraints +} from './check/arbitrary/WebArbitrary'; import { AsyncCommand } from './check/model/command/AsyncCommand'; import { Command } from './check/model/command/Command'; @@ -111,8 +121,6 @@ export { hexaString, base64String, lorem, - ipV4, - ipV6, constant, constantFrom, clonedConstant, @@ -140,6 +148,15 @@ export { compareFunc, func, context, + // web + ipV4, + ipV6, + domain, + webAuthority, + webSegment, + webFragments, + webQueryParameters, + webUrl, // model-based AsyncCommand, Command, @@ -158,6 +175,8 @@ export { ObjectConstraints, Parameters, RecordConstraints, + WebAuthorityConstraints, + WebUrlConstraints, RunDetails, Random, Stream, diff --git a/test/e2e/arbitraries/WebArbitrary.spec.ts b/test/e2e/arbitraries/WebArbitrary.spec.ts new file mode 100644 index 00000000000..24f486a4bfc --- /dev/null +++ b/test/e2e/arbitraries/WebArbitrary.spec.ts @@ -0,0 +1,64 @@ +import * as fc from '../../../src/fast-check'; +import { URL } from 'url'; + +const seed = Date.now(); +describe(`WebArbitrary (seed: ${seed})`, () => { + it('Should produce valid domains', () => { + fc.assert( + fc.property(fc.domain(), domain => { + const p = `http://user:pass@${domain}/path/?query#fragment`; + const u = new URL(p); + expect(u.hostname).toEqual(domain); + }), + { seed: seed } + ); + }); + it('Should produce valid authorities', () => { + fc.assert( + fc.property( + fc.webAuthority({ + withIPv4: false, + withIPv6: false, + withUserInfo: true, + withPort: true + }), + authority => { + const domain = /(^|@)([-a-z0-9\.]+)(:\d+$|$)/.exec(authority)![2]; + const p = `http://${authority}`; + const u = new URL(p); + expect(u.hostname).toEqual(domain); + } + ), + { seed: seed } + ); + }); + it('Should produce valid URL parts', () => { + fc.assert( + fc.property( + fc.webAuthority({ withIPv4: true, withIPv6: true, withUserInfo: true, withPort: true }), + fc.array(fc.webSegment()).map(p => p.map(v => `/${v}`).join('')), + fc.webQueryParameters(), + fc.webFragments(), + (authority, path, query, fragment) => { + const p = `http://${authority}${path}?${query}#${fragment}`; + const u = new URL(p); + expect({ search: decodeURIComponent(u.search), hash: u.hash }).toEqual({ + search: query === '' ? '' : decodeURIComponent(`?${query}`), + hash: fragment === '' ? '' : `#${fragment}` + }); + + const dotSanitizedPath = path + .replace(/\/(%2e|%2E)($|\/)/g, '/.$2') + .replace(/\/(%2e|%2E)(%2e|%2E)($|\/)/g, '/..$3'); + if (!dotSanitizedPath.includes('/..')) { + const sanitizedPath = dotSanitizedPath + .replace(/\/\.\/(\.\/)*/g, '/') // replace /./, /././, etc.. by / + .replace(/\/\.$/, '/'); // replace trailing /. by / if any + expect(u.pathname).toEqual(sanitizedPath === '' ? '/' : sanitizedPath); + } + } + ), + { seed: seed } + ); + }); +}); diff --git a/test/unit/check/arbitrary/HostArbitrary.spec.ts b/test/unit/check/arbitrary/HostArbitrary.spec.ts new file mode 100644 index 00000000000..f4552d5e6b5 --- /dev/null +++ b/test/unit/check/arbitrary/HostArbitrary.spec.ts @@ -0,0 +1,30 @@ +import { domain } from '../../../../src/check/arbitrary/HostArbitrary'; + +import * as genericHelper from './generic/GenericArbitraryHelper'; + +const isValidDomain = (t: string) => { + // According to https://www.ietf.org/rfc/rfc1034.txt + // ::= | " " + // ::=