From 891407ec76ee7f329d4adc58a8ba68709c01bdf9 Mon Sep 17 00:00:00 2001 From: Alexander KIRILOV Date: Tue, 21 Feb 2023 18:53:51 +0100 Subject: [PATCH] feat(gherkin): added custom flavor registry --- .../src/GherkinInMarkdownTokenMatcher.ts | 53 ++--- .../src/flavors/CustomFlavorRegistry.ts | 42 ++++ javascript/src/flavors/GherkinFlavor.ts | 14 ++ javascript/src/flavors/KeywordPrefixes.ts | 4 + javascript/src/generateMessages.ts | 21 +- javascript/src/makeSourceEnvelope.ts | 11 +- .../test/GherkinAsciidocTokenMatcherTest.ts | 189 ++++++++++++++++++ .../test/GherkinInAsciidocTokenMatcher.ts | 12 ++ javascript/test/ParserTest.ts | 34 +++- 9 files changed, 339 insertions(+), 41 deletions(-) create mode 100644 javascript/src/flavors/CustomFlavorRegistry.ts create mode 100644 javascript/src/flavors/GherkinFlavor.ts create mode 100644 javascript/src/flavors/KeywordPrefixes.ts create mode 100644 javascript/test/GherkinAsciidocTokenMatcherTest.ts create mode 100644 javascript/test/GherkinInAsciidocTokenMatcher.ts diff --git a/javascript/src/GherkinInMarkdownTokenMatcher.ts b/javascript/src/GherkinInMarkdownTokenMatcher.ts index 742645a0f..06fed3400 100644 --- a/javascript/src/GherkinInMarkdownTokenMatcher.ts +++ b/javascript/src/GherkinInMarkdownTokenMatcher.ts @@ -1,13 +1,14 @@ import ITokenMatcher from './ITokenMatcher' import Dialect from './Dialect' -import { Token, TokenType } from './Parser' +import {Token, TokenType} from './Parser' import DIALECTS from './gherkin-languages.json' -import { Item } from './IToken' +import {Item} from './IToken' import * as messages from '@cucumber/messages' -import { NoSuchLanguageException } from './Errors' +import {NoSuchLanguageException} from './Errors' +import {KeywordPrefixes} from "./flavors/KeywordPrefixes"; -const DIALECT_DICT: { [key: string]: Dialect } = DIALECTS -const DEFAULT_DOC_STRING_SEPARATOR = /^(```[`]*)(.*)/ +export const DIALECT_DICT: { [key: string]: Dialect } = DIALECTS +export const DEFAULT_DOC_STRING_SEPARATOR = /^(```[`]*)(.*)/ function addKeywordTypeMappings(h: { [key: string]: messages.StepKeywordType[] }, keywords: readonly string[], keywordType: messages.StepKeywordType) { for (const k of keywords) { @@ -19,17 +20,23 @@ function addKeywordTypeMappings(h: { [key: string]: messages.StepKeywordType[] } } export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher { - private dialect: Dialect - private dialectName: string - private readonly nonStarStepKeywords: string[] + dialect: Dialect + dialectName: string + readonly nonStarStepKeywords: string[] private readonly stepRegexp: RegExp private readonly headerRegexp: RegExp private activeDocStringSeparator: RegExp private indentToRemove: number - private matchedFeatureLine: boolean + matchedFeatureLine: boolean + private prefixes: KeywordPrefixes = { + // https://spec.commonmark.org/0.29/#bullet-list-marker + BULLET: '^(\\s*[*+-]\\s*)', + HEADER: '^(#{1,6}\\s)', + } private keywordTypesMap: { [key: string]: messages.StepKeywordType[] } - constructor(private readonly defaultDialectName: string = 'en') { + constructor(private readonly defaultDialectName: string = 'en', prefixes?: KeywordPrefixes) { + prefixes ? this.prefixes = prefixes : null; this.dialect = DIALECT_DICT[defaultDialectName] this.nonStarStepKeywords = [] .concat(this.dialect.given) @@ -41,7 +48,7 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher self.indexOf(value) === index) this.headerRegexp = new RegExp( - `${KeywordPrefix.HEADER}(${headerKeywords.map(escapeRegExp).join('|')})` + `${this.prefixes.HEADER}(${headerKeywords.map(escapeRegExp).join('|')})` ) this.reset() @@ -171,7 +178,7 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher; + + constructor() { + this.flavors = new Array(); + } + + public registerFlavor(name: string, fileExtension: string, tokenMatcher: ITokenMatcher) { + this.flavors.push(new GherkinFlavor(name, fileExtension, tokenMatcher)); + } + + mediaTypeFor(uri: string): CustomMediaType { + const flavor = this.flavors.find(flavor => uri.endsWith(flavor.fileExtension)) + return flavor.mediaType; + } + + tokenMatcherFor(sourceMediaType: SourceMediaType | CustomMediaType): ITokenMatcher { + const flavor = this.flavors.find(flavor => flavor.mediaType === sourceMediaType); + return flavor.tokenMatcher; + } + + private static instance: CustomFlavorRegistry; + public static getInstance() { + if(!this.instance) { + this.instance = new CustomFlavorRegistry(); + } + + return this.instance; + } +} \ No newline at end of file diff --git a/javascript/src/flavors/GherkinFlavor.ts b/javascript/src/flavors/GherkinFlavor.ts new file mode 100644 index 000000000..5c673a5d1 --- /dev/null +++ b/javascript/src/flavors/GherkinFlavor.ts @@ -0,0 +1,14 @@ +import ITokenMatcher from "../ITokenMatcher"; +import {TokenType} from "../Parser"; +import {CustomMediaType} from "@cucumber/messages/src"; + +export default class GherkinFlavor { + + constructor(public name: string, public fileExtension: string, public tokenMatcher: ITokenMatcher) { + + } + + get mediaType(): CustomMediaType { + return `text/x.cucumber.gherkin+${this.name}`; + } +} \ No newline at end of file diff --git a/javascript/src/flavors/KeywordPrefixes.ts b/javascript/src/flavors/KeywordPrefixes.ts new file mode 100644 index 000000000..5b4fa541e --- /dev/null +++ b/javascript/src/flavors/KeywordPrefixes.ts @@ -0,0 +1,4 @@ +export type KeywordPrefixes = { + BULLET: string, + HEADER: string, +} \ No newline at end of file diff --git a/javascript/src/generateMessages.ts b/javascript/src/generateMessages.ts index cf693fa1d..11d3d6011 100644 --- a/javascript/src/generateMessages.ts +++ b/javascript/src/generateMessages.ts @@ -7,22 +7,25 @@ import IGherkinOptions from './IGherkinOptions' import makeSourceEnvelope from './makeSourceEnvelope' import ITokenMatcher from './ITokenMatcher' import GherkinInMarkdownTokenMatcher from './GherkinInMarkdownTokenMatcher' +import CustomFlavorRegistry from "./flavors/CustomFlavorRegistry"; export default function generateMessages( data: string, uri: string, - mediaType: messages.SourceMediaType, + mediaType: messages.SourceMediaType | messages.CustomMediaType, options: IGherkinOptions ): readonly messages.Envelope[] { + let tokenMatcher: ITokenMatcher - switch (mediaType) { - case messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_PLAIN: - tokenMatcher = new GherkinClassicTokenMatcher(options.defaultDialect) - break - case messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_MARKDOWN: - tokenMatcher = new GherkinInMarkdownTokenMatcher(options.defaultDialect) - break - default: + const customFlavorsRegistry = CustomFlavorRegistry.getInstance(); + + if (mediaType === 'text/x.cucumber.gherkin+plain') { + tokenMatcher = new GherkinClassicTokenMatcher(options.defaultDialect) + } else if (mediaType === 'text/x.cucumber.gherkin+markdown') { + tokenMatcher = new GherkinInMarkdownTokenMatcher(options.defaultDialect) + } else { + tokenMatcher = customFlavorsRegistry.tokenMatcherFor(mediaType) + if(!tokenMatcher) throw new Error(`Unsupported media type: ${mediaType}`) } diff --git a/javascript/src/makeSourceEnvelope.ts b/javascript/src/makeSourceEnvelope.ts index fd0021860..4b8767108 100644 --- a/javascript/src/makeSourceEnvelope.ts +++ b/javascript/src/makeSourceEnvelope.ts @@ -1,11 +1,16 @@ import * as messages from '@cucumber/messages' +import CustomFlavorRegistry from "./flavors/CustomFlavorRegistry"; export default function makeSourceEnvelope(data: string, uri: string): messages.Envelope { - let mediaType: messages.SourceMediaType + let mediaType: messages.SourceMediaType | messages.CustomMediaType; + let customFlavorsRegistry = CustomFlavorRegistry.getInstance(); + if (uri.endsWith('.feature')) { - mediaType = messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_PLAIN + mediaType = 'text/x.cucumber.gherkin+plain'; } else if (uri.endsWith('.md')) { - mediaType = messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_MARKDOWN + mediaType = 'text/x.cucumber.gherkin+markdown'; + } else { + mediaType = customFlavorsRegistry.mediaTypeFor(uri); } if (!mediaType) throw new Error(`The uri (${uri}) must end with .feature or .md`) return { diff --git a/javascript/test/GherkinAsciidocTokenMatcherTest.ts b/javascript/test/GherkinAsciidocTokenMatcherTest.ts new file mode 100644 index 000000000..84c36ec10 --- /dev/null +++ b/javascript/test/GherkinAsciidocTokenMatcherTest.ts @@ -0,0 +1,189 @@ +import assert from 'assert' +import GherkinLine from '../src/GherkinLine' +import * as messages from '@cucumber/messages' +import {Token, TokenType} from '../src/Parser' +import ITokenMatcher from '../src/ITokenMatcher' +import {Item} from '../src/IToken' +import GherkinInAsciidocTokenMatcher from "./GherkinInAsciidocTokenMatcher"; + +describe('GherkinInAsciidocTokenMatcher', function () { + let tm: ITokenMatcher + let location: messages.Location + + beforeEach(() => { + tm = new GherkinInAsciidocTokenMatcher('en') + location = { line: 1, column: 1 } + }) + + it('matches FeatureLine', () => { + const line = new GherkinLine('== Feature: hello', location.line) + const token = new Token(line, location) + assert(tm.match_FeatureLine(token)) + assert.strictEqual(token.matchedType, TokenType.FeatureLine) + assert.strictEqual(token.matchedKeyword, 'Feature') + assert.strictEqual(token.matchedText, 'hello') + }) + + it('matches FeatureLine in French', () => { + tm = new GherkinInAsciidocTokenMatcher('fr') + const line = new GherkinLine('== Fonctionnalité: hello', location.line) + const token = new Token(line, location) + assert(tm.match_FeatureLine(token)) + assert.strictEqual(token.matchedType, TokenType.FeatureLine) + assert.strictEqual(token.matchedKeyword, 'Fonctionnalité') + assert.strictEqual(token.matchedText, 'hello') + }) + + it('matches bullet Step', () => { + const line = new GherkinLine(' * Given I have 3 cukes', location.line) + const token = new Token(line, location) + assert(tm.match_StepLine(token)) + assert.strictEqual(token.matchedType, TokenType.StepLine) + assert.strictEqual(token.matchedKeyword, 'Given ') + assert.strictEqual(token.matchedText, 'I have 3 cukes') + assert.strictEqual(token.location.column, 6) + }) + + it('matches period Step', () => { + const line = new GherkinLine(' . Given I have 3 cukes', location.line) + const token = new Token(line, location) + assert(tm.match_StepLine(token)) + assert.strictEqual(token.matchedType, TokenType.StepLine) + assert.strictEqual(token.matchedKeyword, 'Given ') + assert.strictEqual(token.matchedText, 'I have 3 cukes') + assert.strictEqual(token.location.column, 6) + }) + + it('matches hyphen Step', () => { + const line = new GherkinLine(' - Given I have 3 cukes', location.line) + const token = new Token(line, location) + assert(tm.match_StepLine(token)) + assert.strictEqual(token.matchedType, TokenType.StepLine) + assert.strictEqual(token.matchedKeyword, 'Given ') + assert.strictEqual(token.matchedText, 'I have 3 cukes') + assert.strictEqual(token.location.column, 6) + }) + + it('matches arbitrary text as Other', () => { + const line = new GherkinLine('Whatever', location.line) + const token = new Token(line, location) + assert(tm.match_Other(token)) + assert.strictEqual(token.matchedType, TokenType.Other) + }) + + it('matches a non-keyword line as Other', () => { + const line = new GherkinLine('whatever Given', location.line) + const token = new Token(line, location) + assert(tm.match_Other(token)) + assert.strictEqual(token.matchedType, TokenType.Other) + }) + + it('matches a non-keyword bullet line as Other', () => { + const line = new GherkinLine('* whatever Given', location.line) + const token = new Token(line, location) + assert(tm.match_Other(token)) + assert.strictEqual(token.matchedType, TokenType.Other) + }) + + it('matches a non-keyword header line as Other', () => { + const line = new GherkinLine('== The world is wet', location.line) + const token = new Token(line, location) + assert(tm.match_Other(token)) + assert.strictEqual(token.matchedType, TokenType.Other) + }) + + it('matches ``` docstring separator', () => { + const line = new GherkinLine(' ```somefink', location.line) + const token = new Token(line, location) + assert(tm.match_DocStringSeparator(token)) + assert.strictEqual(token.matchedType, TokenType.DocStringSeparator) + assert.strictEqual(token.matchedKeyword, '```') + assert.strictEqual(token.matchedText, 'somefink') + }) + + it('matches ```` docstring separator', () => { + const t1 = new Token(new GherkinLine(' ````', location.line), location) + assert(tm.match_DocStringSeparator(t1)) + assert.strictEqual(t1.matchedType, TokenType.DocStringSeparator) + assert.strictEqual(t1.matchedKeyword, '````') + assert.strictEqual(t1.matchedIndent, 2) + assert.strictEqual(t1.matchedText, '') + + const t2 = new Token(new GherkinLine(' ```', location.line), location) + assert(tm.match_Other(t2)) + assert.strictEqual(t2.matchedType, TokenType.Other) + assert.strictEqual(t2.matchedKeyword, undefined) + assert.strictEqual(t2.matchedText, '```') + + const t3 = new Token(new GherkinLine(' ````', location.line), location) + assert(tm.match_DocStringSeparator(t3)) + assert.strictEqual(t3.matchedType, TokenType.DocStringSeparator) + assert.strictEqual(t3.matchedKeyword, '````') + assert.strictEqual(t2.matchedIndent, 2) + assert.strictEqual(t3.matchedText, '') + }) + + it('matches table row indented 2 spaces', () => { + const t = new Token(new GherkinLine(' |foo|bar|', location.line), location) + assert(tm.match_TableRow(t)) + assert.strictEqual(t.matchedType, TokenType.TableRow) + assert.strictEqual(t.matchedKeyword, '|') + const expectedItems: Item[] = [ + { column: 4, text: 'foo' }, + { column: 8, text: 'bar' }, + ] + assert.deepStrictEqual(t.matchedItems, expectedItems) + }) + + it('matches table row indented 5 spaces', () => { + const t = new Token(new GherkinLine(' |foo|bar|', location.line), location) + assert(tm.match_TableRow(t)) + assert.strictEqual(t.matchedType, TokenType.TableRow) + assert.strictEqual(t.matchedKeyword, '|') + const expectedItems: Item[] = [ + { column: 7, text: 'foo' }, + { column: 11, text: 'bar' }, + ] + assert.deepStrictEqual(t.matchedItems, expectedItems) + }) + + it('does not matche table cells indented 1 space', () => { + const t = new Token(new GherkinLine(' |foo|bar|', location.line), location) + assert(!tm.match_TableRow(t)) + }) + + it('does not matche table cells indented 6 spaces', () => { + const t = new Token(new GherkinLine(' |foo|bar|', location.line), location) + assert(!tm.match_TableRow(t)) + }) + + it('matches table separator row as comment', () => { + assert(tm.match_TableRow(new Token(new GherkinLine(' | h1 | h2 |', location.line), location))) + + const t2 = new Token(new GherkinLine(' | --- | --- |', location.line), location) + assert(!tm.match_TableRow(t2)) + assert(tm.match_Comment(t2)) + }) + + it('matches indented tags', () => { + const t = new Token(new GherkinLine(' `@foo` `@bar`', location.line), location) + assert(tm.match_TagLine(t)) + assert.strictEqual(t.matchedType, TokenType.TagLine) + const expectedItems: Item[] = [ + { column: 4, text: '@foo' }, + { column: 11, text: '@bar' }, + ] + assert.deepStrictEqual(t.matchedItems, expectedItems) + }) + + it('matches unindented tags', () => { + const t = new Token(new GherkinLine('`@foo` `@bar`', location.line), location) + assert(tm.match_TagLine(t)) + assert.strictEqual(t.matchedType, TokenType.TagLine) + const expectedItems: Item[] = [ + { column: 2, text: '@foo' }, + { column: 11, text: '@bar' }, + ] + assert.deepStrictEqual(t.matchedItems, expectedItems) + }) +}) diff --git a/javascript/test/GherkinInAsciidocTokenMatcher.ts b/javascript/test/GherkinInAsciidocTokenMatcher.ts new file mode 100644 index 000000000..201a6ddfe --- /dev/null +++ b/javascript/test/GherkinInAsciidocTokenMatcher.ts @@ -0,0 +1,12 @@ +import {GherkinInMarkdownTokenMatcher} from "../src"; + +export default class GherkinInAsciidocTokenMatcher extends GherkinInMarkdownTokenMatcher { + constructor(defaultDialectName: string = 'en') { + const asciidocPrefixes = { + BULLET: '^(\\s*[*\\.-]\\s*)', + HEADER: '^(={1,6}\\s)', + } + super(defaultDialectName, asciidocPrefixes); + } + +} \ No newline at end of file diff --git a/javascript/test/ParserTest.ts b/javascript/test/ParserTest.ts index 90635320f..251351262 100644 --- a/javascript/test/ParserTest.ts +++ b/javascript/test/ParserTest.ts @@ -6,6 +6,8 @@ import GherkinClassicTokenMatcher from '../src/GherkinClassicTokenMatcher' import AstNode from '../src/AstNode' import generateMessages from '../src/generateMessages' import GherkinInMarkdownTokenMatcher from '../src/GherkinInMarkdownTokenMatcher' +import CustomFlavorRegistry from "../src/flavors/CustomFlavorRegistry"; +import GherkinInAsciidocTokenMatcher from "./GherkinInAsciidocTokenMatcher"; describe('Parser', function () { describe('with Gherkin Classic', () => { @@ -161,7 +163,7 @@ describe('Parser', function () { ' | is (not) triggered | value |\n' + ' | is triggered | foo |\n ', '', - messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_PLAIN, + 'text/x.cucumber.gherkin+plain', { includePickles: true, newId: messages.IdGenerator.incrementing() } ) @@ -291,7 +293,7 @@ description const envelopes = generateMessages( markdown, 'test.md', - messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_MARKDOWN, + 'text/x.cucumber.gherkin+markdown', { includePickles: true, includeGherkinDocument: true, @@ -309,4 +311,30 @@ description assert.strictEqual(pickle.steps[0].argument.docString.content, '```what') }) }) -}) + + it('parses custom flavor Gherkin such as asciidoc', async () => { + const asciidoc = ` += Feature: DocString variations +== Scenario: minimalistic +* Given a step +` + CustomFlavorRegistry.getInstance().registerFlavor('asciidoc', 'adoc', new GherkinInAsciidocTokenMatcher()) + + const envelopes = generateMessages( + asciidoc, + 'test.feature.adoc', + 'text/x.cucumber.gherkin+asciidoc', + { + includePickles: true, + includeGherkinDocument: true, + newId: messages.IdGenerator.incrementing(), + } + ) + + const pickle = envelopes.find((envelope) => envelope.pickle).pickle + + assert.strictEqual(pickle.uri, 'test.feature.adoc'); + assert.strictEqual(pickle.name, 'minimalistic'); + assert.strictEqual(pickle.steps[0].text, 'a step'); + }); +});