Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Customize tokenizer via the fork API #264

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
@@ -1,5 +1,6 @@
## next

- Added `tokenize` option to the `fork()` method to allow custom tokenization
- Added support for the [`@container`](https://drafts.csswg.org/css-contain-3/#container-rule) at-rule
- Added support for the [`@starting-style`](https://drafts.csswg.org/css-transitions-2/#defining-before-change-style) at-rule
- Added support for the [`@scope`](https://drafts.csswg.org/css-cascade-6/#scoped-styles) at-rule
Expand Down
93 changes: 92 additions & 1 deletion lib/__tests/common.js
@@ -1,7 +1,7 @@
import fs from 'fs';
import path from 'path';
import assert from 'assert';
import { parse, walk, fork, version } from 'css-tree';
import { parse, walk, fork, lexer, generate, version, tokenTypes } from 'css-tree';

const fixtureFilename = './fixtures/stringify.css';
const fixture = normalize(fs.readFileSync(fixtureFilename, 'utf-8'));;
Expand Down Expand Up @@ -68,4 +68,95 @@ describe('Common', () => {
});
});
});

describe('custom tokenizer should work via fork()', () => {
it('custom tokenizer should be set', () => {
const customTokenizer = () => {};

const forkedCssTree = fork({
tokenize: customTokenizer
});

assert.strictEqual(forkedCssTree.tokenize, customTokenizer);
});

it('custom tokenizer should affect the parser', () => {
const customTokenizer = (source, onToken) => {
onToken(tokenTypes.Ident, 0, source.length);
};

const forkedCssTree = fork({
tokenize: customTokenizer
});

const parserOptions = { context: 'value' };
const input = 'foo(bar)';

const defaultAst = parse(input, parserOptions);
const forkAst = forkedCssTree.parse(input, parserOptions);

// Default parser should give an AST with a function node whose first child is an identifier
assert.strictEqual(forkAst.children.size, 1);
assert.strictEqual(defaultAst.children.first.type, 'Function');
assert.strictEqual(defaultAst.children.first.children.size, 1);
assert.strictEqual(defaultAst.children.first.children.first.type, 'Identifier');

// Forked parser should give an AST with an identifier node
assert.strictEqual(forkAst.children.size, 1);
assert.strictEqual(forkAst.children.first.type, 'Identifier');
});

it('custom tokenizer should affect the lexer', () => {
const customTokenizer = (source, onToken) => {
onToken(tokenTypes.Ident, 0, source.length);
};

const forkedCssTree = fork({
tokenize: customTokenizer
});

const syntax = 'foo( <number> )';
const input = 'foo(1)';

// Default lexer should match the function syntax
assert(lexer.match(syntax, input).matched);

// Forked lexer should not match the function syntax, because the input isn't tokenized as a function
const forkedResult = forkedCssTree.lexer.match(syntax, input);
assert.strictEqual(forkedResult.matched, null);
});

it('custom tokenizer should affect the generator', () => {
// This custom tokenizer only generates a single token
const customTokenizer = (_, onToken) => {
onToken(tokenTypes.Ident, 0, 1);
};

const forkedCssTree = fork({
tokenize: customTokenizer,
node: {
Identifier: {
structure: {
name: String
},
generate(node) {
// This should be the custom tokenizer
this.tokenize(node.name);
}
}
}
});

const parserOptions = { context: 'value' };
const input = 'foo';
const ast = parse(input, parserOptions);

// Default generator should generate the whole input as-is
assert.equal(generate(ast), input);

// Custom tokenizer only generates a single token for the first character,
// so if the generator uses the custom tokenizer, it should only generate the first character
assert.equal(forkedCssTree.generate(ast), input[0]);
});
});
});
17 changes: 9 additions & 8 deletions lib/generator/create.js
@@ -1,4 +1,5 @@
import { tokenize, Delim, WhiteSpace } from '../tokenizer/index.js';
import { Delim, WhiteSpace } from '../tokenizer/index.js';
import { getTokenizer } from '../utils/get-tokenizer.js';
import { generateSourceMap } from './sourceMap.js';
import * as tokenBefore from './token-before.js';

Expand All @@ -23,12 +24,6 @@ function processChildren(node, delimeter) {
node.children.forEach(this.node, this);
}

function processChunk(chunk) {
tokenize(chunk, (type, start, end) => {
this.token(type, chunk.slice(start, end));
});
}

export function createGenerator(config) {
const types = new Map();

Expand Down Expand Up @@ -87,7 +82,13 @@ export function createGenerator(config) {
node: (node) => handlers.node(node),
children: processChildren,
token: (type, value) => handlers.token(type, value),
tokenize: processChunk
tokenize: function (chunk) {
const tokenize = getTokenizer(config);

return tokenize(chunk, (type, start, end) => {
this.token(type, chunk.slice(start, end));
});
}
};

handlers.node(node);
Expand Down
7 changes: 4 additions & 3 deletions lib/lexer/prepare-tokens.js
@@ -1,4 +1,4 @@
import { tokenize } from '../tokenizer/index.js';
import { getTokenizer } from '../utils/get-tokenizer.js';

const astToTokens = {
decorator(handlers) {
Expand Down Expand Up @@ -27,8 +27,9 @@ const astToTokens = {
}
};

function stringToTokens(str) {
function stringToTokens(str, syntax) {
const tokens = [];
const tokenize = getTokenizer(syntax);

tokenize(str, (type, start, end) =>
tokens.push({
Expand All @@ -43,7 +44,7 @@ function stringToTokens(str) {

export default function(value, syntax) {
if (typeof value === 'string') {
return stringToTokens(value);
return stringToTokens(value, syntax);
}

return syntax.generate(value, astToTokens);
Expand Down
7 changes: 4 additions & 3 deletions lib/parser/create.js
@@ -1,7 +1,6 @@
import { List } from '../utils/List.js';
import { SyntaxError } from './SyntaxError.js';
import {
tokenize,
OffsetToLocation,
TokenStream,
tokenNames,
Expand All @@ -21,6 +20,7 @@ import {
Number as NumberToken
} from '../tokenizer/index.js';
import { readSequence } from './sequence.js';
import { getTokenizer } from '../utils/get-tokenizer.js';

const NOOP = () => {};
const EXCLAMATIONMARK = 0x0021; // U+0021 EXCLAMATION MARK (!)
Expand Down Expand Up @@ -57,7 +57,8 @@ function processConfig(config) {
scope: Object.assign(Object.create(null), config.scope),
atrule: fetchParseValues(config.atrule),
pseudo: fetchParseValues(config.pseudo),
node: fetchParseValues(config.node)
node: fetchParseValues(config.node),
tokenize: getTokenizer(config)
};

for (const [name, context] of Object.entries(config.parseContext)) {
Expand Down Expand Up @@ -297,7 +298,7 @@ export function createParser(config) {
source = source_;
options = options || {};

parser.setSource(source, tokenize);
parser.setSource(source, parser.tokenize);
locationMap.setSource(
source,
options.offset,
Expand Down
6 changes: 6 additions & 0 deletions lib/syntax/config/mix.js
Expand Up @@ -110,6 +110,12 @@ export default function mix(dest, src) {
...sliceProps(value, ['name', 'structure', 'parse', 'generate', 'walkContext'])
};
break;

case 'tokenize':
if (typeof value === 'function') {
result[prop] = value;
}
break;
}
}

Expand Down
4 changes: 3 additions & 1 deletion lib/syntax/config/parser.js
Expand Up @@ -2,6 +2,7 @@ import * as scope from '../scope/index.js';
import atrule from '../atrule/index.js';
import pseudo from '../pseudo/index.js';
import * as node from '../node/index-parse.js';
import { tokenize } from '../../tokenizer/index.js';

export default {
parseContext: {
Expand Down Expand Up @@ -41,5 +42,6 @@ export default {
scope,
atrule,
pseudo,
node
node,
tokenize
};
4 changes: 2 additions & 2 deletions lib/syntax/create.js
@@ -1,10 +1,10 @@
import { tokenize } from '../tokenizer/index.js';
import { createParser } from '../parser/create.js';
import { createGenerator } from '../generator/create.js';
import { createConvertor } from '../convertor/create.js';
import { createWalker } from '../walker/create.js';
import { Lexer } from '../lexer/Lexer.js';
import mix from './config/mix.js';
import { getTokenizer } from '../utils/get-tokenizer.js';

function createSyntax(config) {
const parse = createParser(config);
Expand All @@ -16,7 +16,7 @@ function createSyntax(config) {
lexer: null,
createLexer: config => new Lexer(config, syntax, syntax.lexer.structure),

tokenize,
tokenize: getTokenizer(config),
parse,
generate,

Expand Down
18 changes: 18 additions & 0 deletions lib/utils/get-tokenizer.js
@@ -0,0 +1,18 @@
import { tokenize } from '../tokenizer/index.js';

const FUNCTION_TYPE = 'function';

/**
* Gets the tokenizer function from the configuration object or returns the default tokenizer
*
* @param config Configuration object
* @returns Corresponding tokenizer function
*/
export function getTokenizer(config) {
if (config && typeof config.tokenize === FUNCTION_TYPE) {
return config.tokenize;
}

// Fallback to the default tokenizer
return tokenize;
}