Merge branch 'master' into encode-decode

csstree · May 18, 2020 · a2b8191 · a2b8191
2 parents ab09af1 + 2830669
commit a2b8191
Show file tree

Hide file tree

Showing 56 changed files with 741 additions and 545 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,30 +1,39 @@
 ## next
 
-- Removed `List#each()` and `List#eachRight()` methods, `List#forEach()` and `List#forEachRight()` should be used instead
-- Changed `List` to be iterable (iterates data)
-- Changed `List#first`, `List#last` and `List#isEmpty` to getters
-- Changed `List#getSize()` method to `List#size` getters
-- Removed `Lexer#matchDeclaration()` method
-- Exposed parser's inner configuration as `parse.config`
-- Changed `TokenStream#getRawLength()` to take second parameter as a function (rule) that check a char code for stop scan
-- Added `consumeUntilBalanceEnd()`, `consumeUntilLeftCurlyBracket()`, `consumeUntilLeftCurlyBracketOrSemicolon()`, `consumeUntilExclamationMarkOrSemicolon()` and `consumeUntilSemicolonIncluded()` methods to parser to use with `Raw` instead of `Raw.mode`
-- Fixed `Lexer#dump()` to dump atrules syntaxes as well
+- Exposed `version` of the lib (i.e. `import { version } from 'css-tree'`)
 - Removed `dist/default-syntax.json` from package
-- Exposed `version` of the lib
-- Changed generator's handler `chunk()` to `token()` and `tokenize()`
-- Parser:
-    - Produce `{ type: 'Combinator', name: ' ' }` node instead of `WhiteSpace` node
-    - Don't produce `WhiteSpace` nodes anymore, with the single exception: a custom property declaration with no tokens in a value except a white space
-    - Add a whitespace to `+` and `-` operators, when a whitespace before and/or after an operatorß
-- Changed `Nth` to serialize `+n` as `n`
-- Changed generator to determine when a white space required between emiting tokens
-- Added `mode` option for `generate()` to specify a mode of token separation: `spec` or `safe` (by default)
-- Renamed `HexColor` node type into `Hash`
-- Removed `element()` specific parsing rules
 - Changed `String` node type to store decoded string value (and auto encode a value on serialize)
 - Changed `Url` node type to store decoded url value (and auto encode a value on serialize)
 - Removed `SyntaxError` (custom parse error class) from public API
 - Removed `parseError` field in parse `SyntaxError`
+- Tokenizer
+    - Changed `tokenize()` to take a function as second argument, which will be called for every token. No stream instance is creating when second argument is ommited.
+    - Changed `TokenStream#getRawLength()` to take second parameter as a function (rule) that check a char code to stop a scanning
+    - Added `TokenStream#forEachToken(fn)` method
+    - Removed `TokenStream#skipWS()` method
+    - Removed `TokenStream#getTokenLength()` method
+- Parser
+    - Renamed `HexColor` node type into `Hash`
+    - Changed selector parsing to produce `{ type: 'Combinator', name: ' ' }` node instead of `WhiteSpace` node
+    - Don't produce `WhiteSpace` nodes anymore, with the single exception: a custom property declaration with a single white space token as a value
+    - Parser adds a whitespace to `+` and `-` operators, when a whitespace is before and/or after an operator
+    - Removed `element()` specific parsing rules
+    - Exposed parser's inner configuration as `parse.config`
+    - Added `onComment` option
+    - Added `consumeUntilBalanceEnd()`, `consumeUntilLeftCurlyBracket()`, `consumeUntilLeftCurlyBracketOrSemicolon()`, `consumeUntilExclamationMarkOrSemicolon()` and `consumeUntilSemicolonIncluded()` methods to parser's inner API to use with `Raw` instead of `Raw.mode`
+- Generator
+    - Generator is now determines itself when a white space required between emitting tokens
+    - Changed `chunk()` handler to `token()` (put a token to output) and `tokenize()` (split a string into tokens and put each of them to output)
+    - Added `mode` option for `generate()` to specify a mode of token separation: `spec` or `safe` (by default)
+    - Changed `Nth` serialiation to serialize `+n` as `n`
+- Lexer
+    - Removed `Lexer#matchDeclaration()` method
+    - Fixed `Lexer#dump()` to dump atrules syntaxes as well
+- List
+    - Changed `List` to be iterable (iterates data)
+    - Changed `List#first`, `List#last` and `List#isEmpty` to getters
+    - Changed `List#getSize()` method to `List#size` getter
+    - Removed `List#each()` and `List#eachRight()` methods, `List#forEach()` and `List#forEachRight()` should be used instead
 
 ## 1.0.0-alpha.39 (December 5, 2019)
 

diff --git a/README.md b/README.md
@@ -92,7 +92,7 @@ Syntax matching:
 // parse CSS to AST as a declaration value
 var ast = csstree.parse('red 1px solid', { context: 'value' });
 
-// march to syntax of `border` property
+// match to syntax of `border` property
 var matchResult = csstree.lexer.matchProperty('border', ast);
 
 // check first value node is a <color>

diff --git a/docs/parsing.md b/docs/parsing.md
@@ -109,6 +109,13 @@ csstree.parse('example { foo; bar: 1! }', {
 // ------------------------------^
 ```
 
+### onComment
+
+Type: `function(value, loc)` or `null`  
+Default: `null`
+
+A handler to call for every comment in parsing source. Value is passing without surrounding `/*` and `*/`. `loc` will be `null` until `positions` option is set to `true`.
+
 ### filename
 
 Type: `string`  

diff --git a/lib/common/TokenStream.js b/lib/common/TokenStream.js
@@ -25,8 +25,8 @@ const balancePair = new Map([
 ]);
 
 module.exports = class TokenStream {
-    constructor() {
-        this.open('', 0).close();
+    constructor(source, tokenize) {
+        this.setSource(source, tokenize);
     }
     reset() {
         this.eof = false;
@@ -35,72 +35,75 @@ module.exports = class TokenStream {
         this.tokenStart = this.firstCharOffset;
         this.tokenEnd = this.firstCharOffset;
     }
-    open(source, firstCharOffset) {
+    setSource(source = '', tokenize = () => {}) {
+        source = String(source || '');
+
         const sourceLength = source.length;
         const offsetAndType = adoptBuffer(this.offsetAndType, source.length + 1); // +1 because of eof-token
         const balance = adoptBuffer(this.balance, source.length + 1);
         let tokenCount = 0;
         let balanceCloseType = 0;
         let balanceStart = 0;
+        let firstCharOffset = -1;
 
         // capture buffers
         this.offsetAndType = null;
         this.balance = null;
 
-        return {
-            token(type, offset) {
-                switch (type) {
-                    default:
-                        balance[tokenCount] = sourceLength;
-                        break;
-
-                    case balanceCloseType: {
-                        let balancePrev = balanceStart & OFFSET_MASK;
-                        balanceStart = balance[balancePrev];
-                        balanceCloseType = balanceStart >> TYPE_SHIFT;
-                        balance[tokenCount] = balancePrev;
-                        balance[balancePrev++] = tokenCount;
-                        for (; balancePrev < tokenCount; balancePrev++) {
-                            if (balance[balancePrev] === sourceLength) {
-                                balance[balancePrev] = tokenCount;
-                            }
-                        }
-                        break;
-                    }
-
-                    case LeftParenthesis:
-                    case FunctionToken:
-                    case LeftSquareBracket:
-                    case LeftCurlyBracket:
-                        balance[tokenCount] = balanceStart;
-                        balanceCloseType = balancePair.get(type);
-                        balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
-                        break;
-                }
+        tokenize(source, (type, start, end) => {
+            switch (type) {
+                default:
+                    balance[tokenCount] = sourceLength;
+                    break;
 
-                offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset;
-            },
-            close: () => {
-                // finalize buffers
-                offsetAndType[tokenCount] = (EOF << TYPE_SHIFT) | sourceLength; // <EOF-token>
-                balance[tokenCount] = sourceLength;
-                balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
-                while (balanceStart !== 0) {
-                    const balancePrev = balanceStart & OFFSET_MASK;
+                case balanceCloseType: {
+                    let balancePrev = balanceStart & OFFSET_MASK;
                     balanceStart = balance[balancePrev];
-                    balance[balancePrev] = sourceLength;
+                    balanceCloseType = balanceStart >> TYPE_SHIFT;
+                    balance[tokenCount] = balancePrev;
+                    balance[balancePrev++] = tokenCount;
+                    for (; balancePrev < tokenCount; balancePrev++) {
+                        if (balance[balancePrev] === sourceLength) {
+                            balance[balancePrev] = tokenCount;
+                        }
+                    }
+                    break;
                 }
 
-                this.source = source;
-                this.firstCharOffset = firstCharOffset;
-                this.tokenCount = tokenCount;
-                this.offsetAndType = offsetAndType;
-                this.balance = balance;
+                case LeftParenthesis:
+                case FunctionToken:
+                case LeftSquareBracket:
+                case LeftCurlyBracket:
+                    balance[tokenCount] = balanceStart;
+                    balanceCloseType = balancePair.get(type);
+                    balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
+                    break;
+            }
 
-                this.reset();
-                this.next();
+            offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | end;
+            if (firstCharOffset === -1) {
+                firstCharOffset = start;
             }
-        };
+        });
+
+        // finalize buffers
+        offsetAndType[tokenCount] = (EOF << TYPE_SHIFT) | sourceLength; // <EOF-token>
+        balance[tokenCount] = sourceLength;
+        balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
+        while (balanceStart !== 0) {
+            const balancePrev = balanceStart & OFFSET_MASK;
+            balanceStart = balance[balancePrev];
+            balance[balancePrev] = sourceLength;
+        }
+
+        this.source = source;
+        this.firstCharOffset = firstCharOffset === -1 ? 0 : firstCharOffset;
+        this.tokenCount = tokenCount;
+        this.offsetAndType = offsetAndType;
+        this.balance = balance;
+
+        this.reset();
+        this.next();
     }
 
     lookupType(offset) {
@@ -148,43 +151,10 @@ module.exports = class TokenStream {
 
         return this.firstCharOffset;
     }
-
-    // TODO: -> skipUntilBalanced
-    getRawLength(startToken, stopConsume) {
-        let cursor = startToken;
-        let balanceEnd;
-        let offset;
-
-        loop:
-        for (; cursor < this.tokenCount; cursor++) {
-            balanceEnd = this.balance[cursor];
-
-            // stop scanning on balance edge that points to offset before start token
-            if (balanceEnd < startToken) {
-                break loop;
-            }
-
-            offset = cursor > 0 ? this.offsetAndType[cursor - 1] & OFFSET_MASK : this.firstCharOffset;
-
-            // check stop condition
-            switch (stopConsume(this.source.charCodeAt(offset))) {
-                case 1: // just stop
-                    break loop;
-
-                case 2: // stop & included
-                    cursor++;
-                    break loop;
-
-                default:
-                    // fast forward to the end of balanced block
-                    if (this.balance[balanceEnd] === cursor) {
-                        cursor = balanceEnd;
-                    }
-            }
-        }
-
-        return cursor - this.tokenIndex;
+    substrToCursor(start) {
+        return this.source.substring(start, this.tokenStart);
     }
+
     isBalanceEdge(pos) {
         return this.balance[this.tokenIndex] < pos;
     }
@@ -202,34 +172,6 @@ module.exports = class TokenStream {
         );
     }
 
-    getTokenValue() {
-        return this.source.substring(this.tokenStart, this.tokenEnd);
-    }
-    getTokenLength() {
-        return this.tokenEnd - this.tokenStart;
-    }
-    substrToCursor(start) {
-        return this.source.substring(start, this.tokenStart);
-    }
-
-    skipWS() {
-        let skipTokenCount = 0;
-
-        for (let i = this.tokenIndex; i < this.tokenCount; i++, skipTokenCount++) {
-            if ((this.offsetAndType[i] >> TYPE_SHIFT) !== WhiteSpace) {
-                break;
-            }
-        }
-
-        if (skipTokenCount > 0) {
-            this.skip(skipTokenCount);
-        }
-    }
-    skipSC() {
-        while (this.tokenType === WhiteSpace || this.tokenType === Comment) {
-            this.next();
-        }
-    }
     skip(tokenCount) {
         let next = this.tokenIndex + tokenCount;
 
@@ -260,22 +202,71 @@ module.exports = class TokenStream {
             this.tokenStart = this.tokenEnd = this.source.length;
         }
     }
+    skipSC() {
+        while (this.tokenType === WhiteSpace || this.tokenType === Comment) {
+            this.next();
+        }
+    }
+    skipUntilBalanced(startToken, stopConsume) {
+        let cursor = startToken;
+        let balanceEnd;
+        let offset;
 
-    dump() {
-        let offset = this.firstCharOffset;
+        loop:
+        for (; cursor < this.tokenCount; cursor++) {
+            balanceEnd = this.balance[cursor];
+
+            // stop scanning on balance edge that points to offset before start token
+            if (balanceEnd < startToken) {
+                break loop;
+            }
+
+            offset = cursor > 0 ? this.offsetAndType[cursor - 1] & OFFSET_MASK : this.firstCharOffset;
+
+            // check stop condition
+            switch (stopConsume(this.source.charCodeAt(offset))) {
+                case 1: // just stop
+                    break loop;
 
-        return Array.prototype.slice.call(this.offsetAndType, 0, this.tokenCount).map(function(item, idx) {
+                case 2: // stop & included
+                    cursor++;
+                    break loop;
+
+                default:
+                    // fast forward to the end of balanced block
+                    if (this.balance[balanceEnd] === cursor) {
+                        cursor = balanceEnd;
+                    }
+            }
+        }
+
+        this.skip(cursor - this.tokenIndex);
+    }
+
+    forEachToken(fn) {
+        for (let i = 0, offset = this.firstCharOffset; i < this.tokenCount; i++) {
             const start = offset;
+            const item = this.offsetAndType[i];
             const end = item & OFFSET_MASK;
+            const type = item >> TYPE_SHIFT;
 
             offset = end;
 
-            return {
-                idx,
-                type: NAME[item >> TYPE_SHIFT],
+            fn(type, start, end, i);
+        }
+    }
+    dump() {
+        const tokens = new Array(this.tokenCount);
+
+        this.forEachToken((type, start, end, index) => {
+            tokens[index] = {
+                idx: index,
+                type: NAME[type],
                 chunk: this.source.substring(start, end),
-                balance: this.balance[idx]
+                balance: this.balance[index]
             };
-        }, this);
+        });
+
+        return tokens;
     }
 };
diff --git a/lib/generator/create.js b/lib/generator/create.js
@@ -24,13 +24,8 @@ function processChildren(node, delimeter) {
 }
 
 function processChunk(chunk) {
-    tokenize(chunk, {
-        open: (source, lastOffset) => ({
-            token: (type, offset) => {
-                this.token(type, source.slice(lastOffset, lastOffset = offset));
-            },
-            close() { }
-        })
+    tokenize(chunk, (type, start, end) => {
+        this.token(type, chunk.slice(start, end));
     });
 }