diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bddd22..a576b7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## next +- Added `tokenize` option to the `fork()` method to allow custom tokenization - Added support for the [`@container`](https://drafts.csswg.org/css-contain-3/#container-rule) at-rule - Added support for the [`@starting-style`](https://drafts.csswg.org/css-transitions-2/#defining-before-change-style) at-rule - Added support for the [`@scope`](https://drafts.csswg.org/css-cascade-6/#scoped-styles) at-rule diff --git a/lib/__tests/common.js b/lib/__tests/common.js index 326c8cb..237ac96 100644 --- a/lib/__tests/common.js +++ b/lib/__tests/common.js @@ -1,7 +1,7 @@ import fs from 'fs'; import path from 'path'; import assert from 'assert'; -import { parse, walk, fork, version } from 'css-tree'; +import { parse, walk, fork, lexer, generate, version, tokenTypes } from 'css-tree'; const fixtureFilename = './fixtures/stringify.css'; const fixture = normalize(fs.readFileSync(fixtureFilename, 'utf-8'));; @@ -68,4 +68,95 @@ describe('Common', () => { }); }); }); + + describe('custom tokenizer should work via fork()', () => { + it('custom tokenizer should be set', () => { + const customTokenizer = () => {}; + + const forkedCssTree = fork({ + tokenize: customTokenizer + }); + + assert.strictEqual(forkedCssTree.tokenize, customTokenizer); + }); + + it('custom tokenizer should affect the parser', () => { + const customTokenizer = (source, onToken) => { + onToken(tokenTypes.Ident, 0, source.length); + }; + + const forkedCssTree = fork({ + tokenize: customTokenizer + }); + + const parserOptions = { context: 'value' }; + const input = 'foo(bar)'; + + const defaultAst = parse(input, parserOptions); + const forkAst = forkedCssTree.parse(input, parserOptions); + + // Default parser should give an AST with a function node whose first child is an identifier + assert.strictEqual(forkAst.children.size, 1); + assert.strictEqual(defaultAst.children.first.type, 'Function'); + assert.strictEqual(defaultAst.children.first.children.size, 1); + assert.strictEqual(defaultAst.children.first.children.first.type, 'Identifier'); + + // Forked parser should give an AST with an identifier node + assert.strictEqual(forkAst.children.size, 1); + assert.strictEqual(forkAst.children.first.type, 'Identifier'); + }); + + it('custom tokenizer should affect the lexer', () => { + const customTokenizer = (source, onToken) => { + onToken(tokenTypes.Ident, 0, source.length); + }; + + const forkedCssTree = fork({ + tokenize: customTokenizer + }); + + const syntax = 'foo( )'; + const input = 'foo(1)'; + + // Default lexer should match the function syntax + assert(lexer.match(syntax, input).matched); + + // Forked lexer should not match the function syntax, because the input isn't tokenized as a function + const forkedResult = forkedCssTree.lexer.match(syntax, input); + assert.strictEqual(forkedResult.matched, null); + }); + + it('custom tokenizer should affect the generator', () => { + // This custom tokenizer only generates a single token + const customTokenizer = (_, onToken) => { + onToken(tokenTypes.Ident, 0, 1); + }; + + const forkedCssTree = fork({ + tokenize: customTokenizer, + node: { + Identifier: { + structure: { + name: String + }, + generate(node) { + // This should be the custom tokenizer + this.tokenize(node.name); + } + } + } + }); + + const parserOptions = { context: 'value' }; + const input = 'foo'; + const ast = parse(input, parserOptions); + + // Default generator should generate the whole input as-is + assert.equal(generate(ast), input); + + // Custom tokenizer only generates a single token for the first character, + // so if the generator uses the custom tokenizer, it should only generate the first character + assert.equal(forkedCssTree.generate(ast), input[0]); + }); + }); }); diff --git a/lib/generator/create.js b/lib/generator/create.js index c542f4f..bc2464b 100644 --- a/lib/generator/create.js +++ b/lib/generator/create.js @@ -1,4 +1,5 @@ -import { tokenize, Delim, WhiteSpace } from '../tokenizer/index.js'; +import { Delim, WhiteSpace } from '../tokenizer/index.js'; +import { getTokenizer } from '../utils/get-tokenizer.js'; import { generateSourceMap } from './sourceMap.js'; import * as tokenBefore from './token-before.js'; @@ -23,12 +24,6 @@ function processChildren(node, delimeter) { node.children.forEach(this.node, this); } -function processChunk(chunk) { - tokenize(chunk, (type, start, end) => { - this.token(type, chunk.slice(start, end)); - }); -} - export function createGenerator(config) { const types = new Map(); @@ -87,7 +82,13 @@ export function createGenerator(config) { node: (node) => handlers.node(node), children: processChildren, token: (type, value) => handlers.token(type, value), - tokenize: processChunk + tokenize: function (chunk) { + const tokenize = getTokenizer(config); + + return tokenize(chunk, (type, start, end) => { + this.token(type, chunk.slice(start, end)); + }); + } }; handlers.node(node); diff --git a/lib/lexer/prepare-tokens.js b/lib/lexer/prepare-tokens.js index 4243fa8..a1dd1bf 100644 --- a/lib/lexer/prepare-tokens.js +++ b/lib/lexer/prepare-tokens.js @@ -1,4 +1,4 @@ -import { tokenize } from '../tokenizer/index.js'; +import { getTokenizer } from '../utils/get-tokenizer.js'; const astToTokens = { decorator(handlers) { @@ -27,8 +27,9 @@ const astToTokens = { } }; -function stringToTokens(str) { +function stringToTokens(str, syntax) { const tokens = []; + const tokenize = getTokenizer(syntax); tokenize(str, (type, start, end) => tokens.push({ @@ -43,7 +44,7 @@ function stringToTokens(str) { export default function(value, syntax) { if (typeof value === 'string') { - return stringToTokens(value); + return stringToTokens(value, syntax); } return syntax.generate(value, astToTokens); diff --git a/lib/parser/create.js b/lib/parser/create.js index 8c08d81..4ee205e 100644 --- a/lib/parser/create.js +++ b/lib/parser/create.js @@ -1,7 +1,6 @@ import { List } from '../utils/List.js'; import { SyntaxError } from './SyntaxError.js'; import { - tokenize, OffsetToLocation, TokenStream, tokenNames, @@ -21,6 +20,7 @@ import { Number as NumberToken } from '../tokenizer/index.js'; import { readSequence } from './sequence.js'; +import { getTokenizer } from '../utils/get-tokenizer.js'; const NOOP = () => {}; const EXCLAMATIONMARK = 0x0021; // U+0021 EXCLAMATION MARK (!) @@ -57,7 +57,8 @@ function processConfig(config) { scope: Object.assign(Object.create(null), config.scope), atrule: fetchParseValues(config.atrule), pseudo: fetchParseValues(config.pseudo), - node: fetchParseValues(config.node) + node: fetchParseValues(config.node), + tokenize: getTokenizer(config) }; for (const [name, context] of Object.entries(config.parseContext)) { @@ -297,7 +298,7 @@ export function createParser(config) { source = source_; options = options || {}; - parser.setSource(source, tokenize); + parser.setSource(source, parser.tokenize); locationMap.setSource( source, options.offset, diff --git a/lib/syntax/config/mix.js b/lib/syntax/config/mix.js index 2a5f409..35f7a98 100644 --- a/lib/syntax/config/mix.js +++ b/lib/syntax/config/mix.js @@ -110,6 +110,12 @@ export default function mix(dest, src) { ...sliceProps(value, ['name', 'structure', 'parse', 'generate', 'walkContext']) }; break; + + case 'tokenize': + if (typeof value === 'function') { + result[prop] = value; + } + break; } } diff --git a/lib/syntax/config/parser.js b/lib/syntax/config/parser.js index 0b455aa..4fae613 100644 --- a/lib/syntax/config/parser.js +++ b/lib/syntax/config/parser.js @@ -2,6 +2,7 @@ import * as scope from '../scope/index.js'; import atrule from '../atrule/index.js'; import pseudo from '../pseudo/index.js'; import * as node from '../node/index-parse.js'; +import { tokenize } from '../../tokenizer/index.js'; export default { parseContext: { @@ -41,5 +42,6 @@ export default { scope, atrule, pseudo, - node + node, + tokenize }; diff --git a/lib/syntax/create.js b/lib/syntax/create.js index 73e6cf7..c540c81 100644 --- a/lib/syntax/create.js +++ b/lib/syntax/create.js @@ -1,10 +1,10 @@ -import { tokenize } from '../tokenizer/index.js'; import { createParser } from '../parser/create.js'; import { createGenerator } from '../generator/create.js'; import { createConvertor } from '../convertor/create.js'; import { createWalker } from '../walker/create.js'; import { Lexer } from '../lexer/Lexer.js'; import mix from './config/mix.js'; +import { getTokenizer } from '../utils/get-tokenizer.js'; function createSyntax(config) { const parse = createParser(config); @@ -16,7 +16,7 @@ function createSyntax(config) { lexer: null, createLexer: config => new Lexer(config, syntax, syntax.lexer.structure), - tokenize, + tokenize: getTokenizer(config), parse, generate, diff --git a/lib/utils/get-tokenizer.js b/lib/utils/get-tokenizer.js new file mode 100644 index 0000000..3b4bc4b --- /dev/null +++ b/lib/utils/get-tokenizer.js @@ -0,0 +1,18 @@ +import { tokenize } from '../tokenizer/index.js'; + +const FUNCTION_TYPE = 'function'; + +/** + * Gets the tokenizer function from the configuration object or returns the default tokenizer + * + * @param config Configuration object + * @returns Corresponding tokenizer function + */ +export function getTokenizer(config) { + if (config && typeof config.tokenize === FUNCTION_TYPE) { + return config.tokenize; + } + + // Fallback to the default tokenizer + return tokenize; +}