diff --git a/README.md b/README.md index b0ba843..947f011 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ This library allows users to create regular expressions in a structured way, mak const hexColor = /^#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})$/; // TS Regex Builder DSL -const hexDigit = charClass(charRange('a', 'f'), charRange('A', 'F'), charRange('0', '9')); +const hexDigit = /[a-fA-F0-9]/; // or: charClass(charRange('a', 'f'), charRange('A', 'F'), charRange('0', '9')); const hexColor = buildRegExp([ startOfString, @@ -116,33 +116,15 @@ See [Constructs API doc](https://callstack.github.io/ts-regex-builder/api/constr | Quantifier | Regex Syntax | Description | | -------------------------------- | ------------ | ------------------------------------------------- | -| `zeroOrMore(x)` | `x*` | Zero or more occurence of a pattern | -| `oneOrMore(x)` | `x+` | One or more occurence of a pattern | -| `optional(x)` | `x?` | Zero or one occurence of a pattern | +| `zeroOrMore(x)` | `x*` | Zero or more occurrence of a pattern | +| `oneOrMore(x)` | `x+` | One or more occurrence of a pattern | +| `optional(x)` | `x?` | Zero or one occurrence of a pattern | | `repeat(x, n)` | `x{n}` | Pattern repeats exact number of times | | `repeat(x, { min: n, })` | `x{n,}` | Pattern repeats at least given number of times | | `repeat(x, { min: n, max: n2 })` | `x{n1,n2}` | Pattern repeats between n1 and n2 number of times | See [Quantifiers API doc](https://callstack.github.io/ts-regex-builder/api/quantifiers) for more info. -### Character classes - -| Character class | Regex Syntax | Description | -| ---------------------- | ------------ | ------------------------------------------------- | -| `any` | `.` | Any character | -| `word` | `\w` | Word character: letter, digit, underscore | -| `digit` | `\d` | Digit character: 0 to 9 | -| `whitespace` | `\s` | Whitespace character: space, tab, line break, ... | -| `anyOf('abc')` | `[abc]` | Any of provided characters | -| `charRange('a', 'z')` | `[a-z]` | Character in a range | -| `charClass(...)` | `[...]` | Union of multiple character classes | -| `negated(...)` | `[^...]` | Negation of a given character class | -| `char(...)` | `\uXXXX` | Character specified given Unicode code point | -| `unicodeProperty(...)` | `\p{...}` | Characters with given Unicode property | - - -See [Character Classes API doc](https://callstack.github.io/ts-regex-builder/api/character-classes) and [Unicode API doc](https://callstack.github.io/ts-regex-builder/api/unicode) for more info. - ### Assertions | Assertion | Regex Syntax | Description | @@ -151,12 +133,30 @@ See [Character Classes API doc](https://callstack.github.io/ts-regex-builder/api | `endOfString` | `$` | Match the end of the string (or the end of a line in multiline mode) | | `wordBoundary` | `\b` | Match the start or end of a word without consuming characters | | `lookahead(...)` | `(?=...)` | Match subsequent text without consuming it | -| `negativeLookhead(...)` | `(?!...)` | Reject subsequent text without consuming it | +| `negativeLookahead(...)` | `(?!...)` | Reject subsequent text without consuming it | | `lookbehind(...)` | `(?<=...)` | Match preceding text without consuming it | | `negativeLookbehind(...)` | `(? [!TIP] +> You may also use inline regexes for specifying character classes, as they offer a concise yet readable syntax. For example, `/[a-z0-9_]/`. + +| Character class | Regex Syntax | Description | +| --------------------- | ------------ | ------------------------------------------------- | +| `any` | `.` | Any character | +| `word` | `\w` | Word character: letter, digit, underscore | +| `digit` | `\d` | Digit character: 0 to 9 | +| `whitespace` | `\s` | Whitespace character: space, tab, line break, ... | +| `anyOf('abc')` | `[abc]` | Any of provided characters | +| `charRange('a', 'z')` | `[a-z]` | Character in a range | +| `charClass(...)` | `[...]` | Union of multiple character classes | +| `negated(...)` | `[^...]` | Negation of a given character class | + +See [Character Classes API doc](https://callstack.github.io/ts-regex-builder/api/character-classes) and [Unicode API doc](https://callstack.github.io/ts-regex-builder/api/unicode) for more info. + ## Examples See [Examples](https://callstack.github.io/ts-regex-builder/examples). @@ -185,8 +185,6 @@ TS Regex Builder is inspired by [Swift Regex Builder API](https://developer.appl - [Swift Evolution 351: Regex Builder DSL](https://github.com/apple/swift-evolution/blob/main/proposals/0351-regex-builder.md) - [Swift Regex Builder API docs](https://developer.apple.com/documentation/regexbuilder) - - --- Made with [create-react-native-library](https://github.com/callstack/react-native-builder-bob) diff --git a/src/constructs/anchors.ts b/src/constructs/anchors.ts index 6d61f42..df9f276 100644 --- a/src/constructs/anchors.ts +++ b/src/constructs/anchors.ts @@ -1,20 +1,32 @@ import type { EncodedRegex } from '../types'; +/** + * Start of string anchor. Matches the start of of string. In `multiline` mode, also matches immediately following a newline. + */ export const startOfString: EncodedRegex = { precedence: 'atom', pattern: '^', }; +/** + * End of string anchor. Matches the end of a string. In `multiline` mode, also matches immediately preceding a newline. + */ export const endOfString: EncodedRegex = { precedence: 'atom', pattern: '$', }; +/** + * Word boundary anchor. Matches the position where one side is a word character (alphanumeric or underscore) and the other side is a non-word character (anything else). + */ export const wordBoundary: EncodedRegex = { precedence: 'atom', pattern: '\\b', }; +/** + * Non-word boundary anchor. Matches the position where both sides are word characters. + */ export const nonWordBoundary: EncodedRegex = { precedence: 'atom', pattern: '\\B', diff --git a/src/constructs/char-class.ts b/src/constructs/char-class.ts index 5724503..c18be71 100644 --- a/src/constructs/char-class.ts +++ b/src/constructs/char-class.ts @@ -1,6 +1,12 @@ import type { CharacterClass, CharacterEscape, EncodedRegex } from '../types'; import { ensureText } from '../utils'; +/** + * Creates a character class which matches any one of the given characters. + * + * @param elements - Member characters or character ranges. + * @returns Character class. + */ export function charClass(...elements: Array): CharacterClass { if (!elements.length) { throw new Error('Expected at least one element'); @@ -13,6 +19,13 @@ export function charClass(...elements: Array): }; } +/** + * Creates a character class which matches any one of the characters in the range. + * + * @param start - Start of the range (single character). + * @param end - End of the range (single character). + * @returns Character class. + */ export function charRange(start: string, end: string): CharacterClass { if (start.length !== 1 || end.length !== 1) { throw new Error(`Expected single characters, but received "${start}" & "${end}"`); @@ -29,6 +42,12 @@ export function charRange(start: string, end: string): CharacterClass { }; } +/** + * Creates a character class which matches any one of the given characters. + * + * @param chars - Characters to match. + * @returns Character class. + */ export function anyOf(chars: string): CharacterClass { ensureText(chars); @@ -38,6 +57,12 @@ export function anyOf(chars: string): CharacterClass { }; } +/** + * Creates a negated character class which matches any character that is not in the given character class. + * + * @param element - Character class or character escape to negate. + * @returns Negated character class. + */ export function negated(element: CharacterClass | CharacterEscape): EncodedRegex { return encodeCharClass.call(element, true); } diff --git a/src/constructs/char-escape.ts b/src/constructs/char-escape.ts index 77aa2cb..70456b0 100644 --- a/src/constructs/char-escape.ts +++ b/src/constructs/char-escape.ts @@ -9,36 +9,54 @@ export const any: EncodedRegex = { pattern: '.', }; +/** + * Matches any digit (0-9). + */ export const digit: CharacterEscape = { precedence: 'atom', pattern: '\\d', chars: ['\\d'], }; +/** + * Matches any non-digit (0-9) character. + */ export const nonDigit: CharacterEscape = { precedence: 'atom', pattern: '\\D', chars: ['\\D'], }; +/** + * Matches any word character (alphanumeric or underscore). + */ export const word: CharacterEscape = { precedence: 'atom', pattern: '\\w', chars: ['\\w'], }; +/** + * Matches any non-word (alphanumeric or underscore) character. + */ export const nonWord: CharacterEscape = { precedence: 'atom', pattern: '\\W', chars: ['\\W'], }; +/** + * Matches any whitespace character (space, tab, newline, etc.). + */ export const whitespace: CharacterEscape = { precedence: 'atom', pattern: '\\s', chars: ['\\s'], }; +/** + * Matches any non-whitespace (space, tab, newline, etc.) character. + */ export const nonWhitespace: CharacterEscape = { precedence: 'atom', pattern: '\\S', diff --git a/src/constructs/choice-of.ts b/src/constructs/choice-of.ts index a899c94..0434174 100644 --- a/src/constructs/choice-of.ts +++ b/src/constructs/choice-of.ts @@ -1,6 +1,12 @@ import { encode } from '../encoder'; import type { EncodedRegex, RegexSequence } from '../types'; +/** + * Creates a disjunction (choice of) which matches any of the alternatives. + * + * @param alternatives - Alternatives to choose from. + * @returns Choice of alternatives. + */ export function choiceOf(...alternatives: RegexSequence[]): EncodedRegex { if (alternatives.length === 0) { throw new Error('Expected at least one alternative'); diff --git a/src/constructs/quantifiers.ts b/src/constructs/quantifiers.ts index 70e0869..3473a1e 100644 --- a/src/constructs/quantifiers.ts +++ b/src/constructs/quantifiers.ts @@ -6,6 +6,12 @@ export interface QuantifierOptions { greedy?: boolean; } +/** + * Creates a quantifier which matches zero or more of the given elements. + * + * @param sequence - Elements to match zero or more of. + * @param options - Quantifier options. + */ export function zeroOrMore(sequence: RegexSequence, options?: QuantifierOptions): EncodedRegex { const elements = ensureElements(sequence); return { @@ -14,6 +20,12 @@ export function zeroOrMore(sequence: RegexSequence, options?: QuantifierOptions) }; } +/** + * Creates a quantifier which matches one or more of the given elements. + * + * @param sequence - Elements to match one or more of. + * @param options - Quantifier options. + */ export function oneOrMore(sequence: RegexSequence, options?: QuantifierOptions): EncodedRegex { const elements = ensureElements(sequence); return { @@ -22,6 +34,12 @@ export function oneOrMore(sequence: RegexSequence, options?: QuantifierOptions): }; } +/** + * Creates a quantifier which matches zero or one of the given elements. + * + * @param sequence - Elements to match zero or one of. + * @param options - Quantifier options. + */ export function optional(sequence: RegexSequence, options?: QuantifierOptions): EncodedRegex { const elements = ensureElements(sequence); return { diff --git a/src/constructs/regex.ts b/src/constructs/regex.ts index 3f67ae5..d9e86b5 100644 --- a/src/constructs/regex.ts +++ b/src/constructs/regex.ts @@ -1,6 +1,11 @@ import { encode } from '../encoder'; import type { EncodedRegex, RegexSequence } from '../types'; +/** + * Groups the given sequence into a single element. + * + * @param sequence - Sequence to group. + */ export function regex(sequence: RegexSequence): EncodedRegex { return encode(sequence); } diff --git a/src/constructs/repeat.ts b/src/constructs/repeat.ts index ddb42e3..b445ded 100644 --- a/src/constructs/repeat.ts +++ b/src/constructs/repeat.ts @@ -2,8 +2,21 @@ import { encodeAtomic } from '../encoder'; import type { EncodedRegex, RegexSequence } from '../types'; import { ensureElements } from '../utils'; +/** + * Options for the `repeat` function. + * + * @param min - Minimum number of times to match. + * @param max - Maximum number of times to match (default: unlimited). + * @param greedy - Whether to use greedy quantifiers (default: true). + */ export type RepeatOptions = number | { min: number; max?: number; greedy?: boolean }; +/** + * Creates a quantifier which matches the given sequence a specific number of times. + * + * @param sequence - Sequence to match. + * @param options - Quantifier options. + */ export function repeat(sequence: RegexSequence, options: RepeatOptions): EncodedRegex { const elements = ensureElements(sequence); diff --git a/website/docs/api/overview.md b/website/docs/api/overview.md index ab6031e..5092113 100644 --- a/website/docs/api/overview.md +++ b/website/docs/api/overview.md @@ -24,7 +24,8 @@ Most of the regex constructs accept a regex sequence as their argument. Regex constructs can be composed into a tree structure: ```ts -const currencyCode = repeat(charRange('A', 'Z'), 3); +const currencyCode = repeat(/[A-Z]/, 3); // or repeat(charRange('A', 'Z'), 3); + const currencyAmount = buildRegExp([ choiceOf('$', '€', currencyCode), // currency capture( @@ -64,32 +65,15 @@ TS Regex Builder does not have a construct for non-capturing groups. Such groups | Quantifier | Regex Syntax | Description | | -------------------------------- | ------------ | ------------------------------------------------- | -| `zeroOrMore(x)` | `x*` | Zero or more occurence of a pattern | -| `oneOrMore(x)` | `x+` | One or more occurence of a pattern | -| `optional(x)` | `x?` | Zero or one occurence of a pattern | +| `zeroOrMore(x)` | `x*` | Zero or more occurrence of a pattern | +| `oneOrMore(x)` | `x+` | One or more occurrence of a pattern | +| `optional(x)` | `x?` | Zero or one occurrence of a pattern | | `repeat(x, n)` | `x{n}` | Pattern repeats exact number of times | | `repeat(x, { min: n, })` | `x{n,}` | Pattern repeats at least given number of times | | `repeat(x, { min: n, max: n2 })` | `x{n1,n2}` | Pattern repeats between n1 and n2 number of times | See [Quantifiers](./api/quantifiers) for more info. -### Character classes - -| Character class | Regex Syntax | Description | -| ---------------------- | ------------ | ------------------------------------------------- | -| `any` | `.` | Any character | -| `word` | `\w` | Word character: letter, digit, underscore | -| `digit` | `\d` | Digit character: 0 to 9 | -| `whitespace` | `\s` | Whitespace character: space, tab, line break, ... | -| `anyOf('abc')` | `[abc]` | Any of provided characters | -| `charRange('a', 'z')` | `[a-z]` | Character in a range | -| `charClass(...)` | `[...]` | Union of multiple character classes | -| `negated(...)` | `[^...]` | Negation of a given character class | -| `char(...)` | `\uXXXX` | Character specified given Unicode code point | -| `unicodeProperty(...)` | `\p{...}` | Characters with given Unicode property | - -See [Character Classes](./api/character-classes) and [Unicode](./api/unicode) for more info. - ### Assertions | Assertion | Regex Syntax | Description | @@ -98,8 +82,29 @@ See [Character Classes](./api/character-classes) and [Unicode](./api/unicode) fo | `endOfString` | `$` | Match the end of the string (or the end of a line in multiline mode) | | `wordBoundary` | `\b` | Match the start or end of a word without consuming characters | | `lookahead(...)` | `(?=...)` | Match subsequent text without consuming it | -| `negativeLookhead(...)` | `(?!...)` | Reject subsequent text without consuming it | +| `negativeLookahead(...)` | `(?!...)` | Reject subsequent text without consuming it | | `lookbehind(...)` | `(?<=...)` | Match preceding text without consuming it | | `negativeLookbehind(...)` | `(?