Skip to content

Commit

Permalink
Merge 4659880 into ba6dfd8
Browse files Browse the repository at this point in the history
  • Loading branch information
scripthunter7 committed Oct 24, 2023
2 parents ba6dfd8 + 4659880 commit a643cbb
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
@@ -1,5 +1,6 @@
## next

- Added `tokenize` option to the `fork()` method to allow custom tokenization
- Added support for the [`@container`](https://drafts.csswg.org/css-contain-3/#container-rule) at-rule
- Added support for the [`@starting-style`](https://drafts.csswg.org/css-transitions-2/#defining-before-change-style) at-rule
- Added support for the [`@scope`](https://drafts.csswg.org/css-cascade-6/#scoped-styles) at-rule
Expand Down
93 changes: 92 additions & 1 deletion lib/__tests/common.js
@@ -1,7 +1,7 @@
import fs from 'fs';
import path from 'path';
import assert from 'assert';
import { parse, walk, fork, version } from 'css-tree';
import { parse, walk, fork, lexer, generate, version, tokenTypes } from 'css-tree';

const fixtureFilename = './fixtures/stringify.css';
const fixture = normalize(fs.readFileSync(fixtureFilename, 'utf-8'));;
Expand Down Expand Up @@ -68,4 +68,95 @@ describe('Common', () => {
});
});
});

describe('custom tokenizer should work via fork()', () => {
it('custom tokenizer should be set', () => {
const customTokenizer = () => {};

const forkedCssTree = fork({
tokenize: customTokenizer
});

assert.strictEqual(forkedCssTree.tokenize, customTokenizer);
});

it('custom tokenizer should affect the parser', () => {
const customTokenizer = (source, onToken) => {
onToken(tokenTypes.Ident, 0, source.length);
};

const forkedCssTree = fork({
tokenize: customTokenizer
});

const parserOptions = { context: 'value' };
const input = 'foo(bar)';

const defaultAst = parse(input, parserOptions);
const forkAst = forkedCssTree.parse(input, parserOptions);

// Default parser should give an AST with a function node whose first child is an identifier
assert.strictEqual(forkAst.children.size, 1);
assert.strictEqual(defaultAst.children.first.type, 'Function');
assert.strictEqual(defaultAst.children.first.children.size, 1);
assert.strictEqual(defaultAst.children.first.children.first.type, 'Identifier');

// Forked parser should give an AST with an identifier node
assert.strictEqual(forkAst.children.size, 1);
assert.strictEqual(forkAst.children.first.type, 'Identifier');
});

it('custom tokenizer should affect the lexer', () => {
const customTokenizer = (source, onToken) => {
onToken(tokenTypes.Ident, 0, source.length);
};

const forkedCssTree = fork({
tokenize: customTokenizer
});

const syntax = 'foo( <number> )';
const input = 'foo(1)';

// Default lexer should match the function syntax
assert(lexer.match(syntax, input).matched);

// Forked lexer should not match the function syntax, because the input isn't tokenized as a function
const forkedResult = forkedCssTree.lexer.match(syntax, input);
assert.strictEqual(forkedResult.matched, null);
});

it('custom tokenizer should affect the generator', () => {
// This custom tokenizer only generates a single token
const customTokenizer = (_, onToken) => {
onToken(tokenTypes.Ident, 0, 1);
};

const forkedCssTree = fork({
tokenize: customTokenizer,
node: {
Identifier: {
structure: {
name: String
},
generate(node) {
// This should be the custom tokenizer
this.tokenize(node.name);
}
}
}
});

const parserOptions = { context: 'value' };
const input = 'foo';
const ast = parse(input, parserOptions);

// Default generator should generate the whole input as-is
assert.equal(generate(ast), input);

// Custom tokenizer only generates a single token for the first character,
// so if the generator uses the custom tokenizer, it should only generate the first character
assert.equal(forkedCssTree.generate(ast), input[0]);
});
});
});
17 changes: 9 additions & 8 deletions lib/generator/create.js
@@ -1,4 +1,5 @@
import { tokenize, Delim, WhiteSpace } from '../tokenizer/index.js';
import { Delim, WhiteSpace } from '../tokenizer/index.js';
import { getTokenizer } from '../utils/get-tokenizer.js';
import { generateSourceMap } from './sourceMap.js';
import * as tokenBefore from './token-before.js';

Expand All @@ -23,12 +24,6 @@ function processChildren(node, delimeter) {
node.children.forEach(this.node, this);
}

function processChunk(chunk) {
tokenize(chunk, (type, start, end) => {
this.token(type, chunk.slice(start, end));
});
}

export function createGenerator(config) {
const types = new Map();

Expand Down Expand Up @@ -87,7 +82,13 @@ export function createGenerator(config) {
node: (node) => handlers.node(node),
children: processChildren,
token: (type, value) => handlers.token(type, value),
tokenize: processChunk
tokenize: function (chunk) {
const tokenize = getTokenizer(config);

return tokenize(chunk, (type, start, end) => {
this.token(type, chunk.slice(start, end));
});
}
};

handlers.node(node);
Expand Down
7 changes: 4 additions & 3 deletions lib/lexer/prepare-tokens.js
@@ -1,4 +1,4 @@
import { tokenize } from '../tokenizer/index.js';
import { getTokenizer } from '../utils/get-tokenizer.js';

const astToTokens = {
decorator(handlers) {
Expand Down Expand Up @@ -27,8 +27,9 @@ const astToTokens = {
}
};

function stringToTokens(str) {
function stringToTokens(str, syntax) {
const tokens = [];
const tokenize = getTokenizer(syntax);

tokenize(str, (type, start, end) =>
tokens.push({
Expand All @@ -43,7 +44,7 @@ function stringToTokens(str) {

export default function(value, syntax) {
if (typeof value === 'string') {
return stringToTokens(value);
return stringToTokens(value, syntax);
}

return syntax.generate(value, astToTokens);
Expand Down
7 changes: 4 additions & 3 deletions lib/parser/create.js
@@ -1,7 +1,6 @@
import { List } from '../utils/List.js';
import { SyntaxError } from './SyntaxError.js';
import {
tokenize,
OffsetToLocation,
TokenStream,
tokenNames,
Expand All @@ -21,6 +20,7 @@ import {
Number as NumberToken
} from '../tokenizer/index.js';
import { readSequence } from './sequence.js';
import { getTokenizer } from '../utils/get-tokenizer.js';

const NOOP = () => {};
const EXCLAMATIONMARK = 0x0021; // U+0021 EXCLAMATION MARK (!)
Expand Down Expand Up @@ -57,7 +57,8 @@ function processConfig(config) {
scope: Object.assign(Object.create(null), config.scope),
atrule: fetchParseValues(config.atrule),
pseudo: fetchParseValues(config.pseudo),
node: fetchParseValues(config.node)
node: fetchParseValues(config.node),
tokenize: getTokenizer(config)
};

for (const [name, context] of Object.entries(config.parseContext)) {
Expand Down Expand Up @@ -297,7 +298,7 @@ export function createParser(config) {
source = source_;
options = options || {};

parser.setSource(source, tokenize);
parser.setSource(source, parser.tokenize);
locationMap.setSource(
source,
options.offset,
Expand Down
6 changes: 6 additions & 0 deletions lib/syntax/config/mix.js
Expand Up @@ -110,6 +110,12 @@ export default function mix(dest, src) {
...sliceProps(value, ['name', 'structure', 'parse', 'generate', 'walkContext'])
};
break;

case 'tokenize':
if (typeof value === 'function') {
result[prop] = value;
}
break;
}
}

Expand Down
4 changes: 3 additions & 1 deletion lib/syntax/config/parser.js
Expand Up @@ -2,6 +2,7 @@ import * as scope from '../scope/index.js';
import atrule from '../atrule/index.js';
import pseudo from '../pseudo/index.js';
import * as node from '../node/index-parse.js';
import { tokenize } from '../../tokenizer/index.js';

export default {
parseContext: {
Expand Down Expand Up @@ -41,5 +42,6 @@ export default {
scope,
atrule,
pseudo,
node
node,
tokenize
};
4 changes: 2 additions & 2 deletions lib/syntax/create.js
@@ -1,10 +1,10 @@
import { tokenize } from '../tokenizer/index.js';
import { createParser } from '../parser/create.js';
import { createGenerator } from '../generator/create.js';
import { createConvertor } from '../convertor/create.js';
import { createWalker } from '../walker/create.js';
import { Lexer } from '../lexer/Lexer.js';
import mix from './config/mix.js';
import { getTokenizer } from '../utils/get-tokenizer.js';

function createSyntax(config) {
const parse = createParser(config);
Expand All @@ -16,7 +16,7 @@ function createSyntax(config) {
lexer: null,
createLexer: config => new Lexer(config, syntax, syntax.lexer.structure),

tokenize,
tokenize: getTokenizer(config),
parse,
generate,

Expand Down
18 changes: 18 additions & 0 deletions lib/utils/get-tokenizer.js
@@ -0,0 +1,18 @@
import { tokenize } from '../tokenizer/index.js';

const FUNCTION_TYPE = 'function';

/**
* Gets the tokenizer function from the configuration object or returns the default tokenizer
*
* @param config Configuration object
* @returns Corresponding tokenizer function
*/
export function getTokenizer(config) {
if (config && typeof config.tokenize === FUNCTION_TYPE) {
return config.tokenize;
}

// Fallback to the default tokenizer
return tokenize;
}

0 comments on commit a643cbb

Please sign in to comment.