Skip to content

Commit

Permalink
refactor lexer, fix parser
Browse files Browse the repository at this point in the history
  • Loading branch information
chjj committed Jul 19, 2011
1 parent 7ad0dc8 commit 432da77
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 111 deletions.
190 changes: 97 additions & 93 deletions lib/lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,59 +3,65 @@
* Copyright (c) 2011, Christopher Jeffrey. (MIT Licensed)
*/

/**
* States:
* inside
* refers to the "inside" of rules and at rules.
* rules and at-rules are becoming more and more
* syntactically similar every day - they can now
* contain both properties and/or rules.
* at_rule
* the "header" of an at-rule.
* single_string
* a single quoted string.
* double_string
* a double quoted string.
* comment
* a multiline comment.
* value
* a rule's or at-rule's property's value.
*/

var lex = function(css) {
css = css.replace(/\r\n/g, '\n')
.replace(/\r/g, '\n');

var i = 0
, l = css.length
, ch
, state
, val
, buff = ''
, key
, lineno = 1
, lineoff = 0
, line = 1
, offset = 0
, tokens = []
, stack = [];

css = css.replace(/\r\n/g, '\n')
.replace(/\r/g, '\n');

tokens.push = function(token) {
for (var k in token) {
if (typeof token[k] === 'string') {
token[k] = token[k].trim();
}
}
return Array.prototype.push.call(tokens, token);
};

state = function() {
var state = function() {
return stack[stack.length-1];
};

for (; i < l; i++) {
ch = css[i];
lineoff++;
offset++;

switch (ch) {
case '\\':
buff += ch;
buff += css[++i];
break;
case ' ':
// need this to get at-rule names
// store them in `key`
// should maybe use a different variable
// to avoid confusion
// need this to get at-rule
// names, store them in `key`
if (state() === 'at_rule' && !key) {
key = buff;
buff = '';
}
//if (buff[buff.length-1] !== ' ')
buff += ch;
break;
case '\n': // FALL-THROUGH
lineoff = 0;
lineno++;
case '\n':
offset = 0;
line++;
; // FALL-THROUGH
case '\t':
case '\v':
case '\r':
Expand All @@ -80,23 +86,32 @@ var lex = function(css) {
tokens.push({
type: 'nested_at',
name: key,
params: buff,
line: lineno
params: buff.trim(),
line: line
});
key = '';
buff = '';
stack.pop();
stack.push('nested_at_rule');
stack.push('inside');
break;
case 'rule':
case 'value':
// we were inside a
// selector instead
// of a property like
// we originally thought
stack.pop();
buff = key + ':' + buff;
key = '';
; // FALL-THROUGH
case 'inside':
default:
tokens.push({
type: 'rule',
selector: buff,
line: lineno
selector: buff.trim(),
line: line
});
buff = '';
stack.push('rule');
stack.push('inside');
break;
}
break;
Expand All @@ -105,31 +120,24 @@ var lex = function(css) {
case 'value':
tokens.push({
type: 'property',
key: key,
val: buff,
line: lineno
key: key.trim(),
val: buff.trim(),
line: line
});
key = '';
buff = '';
stack.pop();
// need this for @viewport-like rules
if (state() === 'nested_at_rule') {
// in case someone omitted
// the semicolon at the end
if (state() === 'inside') {
; // FALL-THROUGH
} else {
break;
break; // do we need this?
}
//break;
case 'nested_at_rule':
tokens.push({
type: 'nested_at_end',
line: lineno
});
buff = '';
break;
case 'rule':
case 'inside':
tokens.push({
type: 'rule_end',
line: lineno
type: 'end',
line: line
});
buff = '';
break;
Expand All @@ -147,35 +155,16 @@ var lex = function(css) {
break;
case ':':
switch (state()) {
case 'nested_at_rule': // need this for @viewport-like rules
var c = i, h;
// this is a sticky situation...
// were in the top level of a nested at-rule here,
// but we dont know whether were in an "at-rule property"
// or a selector (pseudo-class/element). so we need to
// lookahead a few bytes to check for a curly brace.
// an easier way would be to simply list and classify
// the different at-rules, but i want to lex this
// independent of any semantic knowledge.
while ((h = css[++c])
&& h !== ';'
&& h !== '}'
&& h !== '{');
if (h !== '{') {
; // FALL-THROUGH
// maybe dont need this,
// could just set the key here
// and add a case for nested_at
// under the semicolon case
// or maybe just do:
// case 'nested_at': buff.split(':'); etc
// under the semicolon case and dont bother
// with the lookahead at all
} else {
buff += ch;
break;
}
case 'rule':
case 'inside':
// at this point were
// either inside a selector
// or a property, we dont
// really know. well assume
// its a property for now,
// and if we hit a curly
// brace later, we can
// change the state token
// to a rule
key = buff;
buff = '';
stack.push('value');
Expand All @@ -188,12 +177,13 @@ var lex = function(css) {
case ';':
switch (state()) {
case 'value':
if (!key) break; // a useless semicolon
// a useless semicolon
if (!key) break;
tokens.push({
type: 'property',
key: key,
val: buff,
line: lineno
key: key.trim(),
val: buff.trim(),
line: line
});
key = '';
buff = '';
Expand All @@ -203,8 +193,8 @@ var lex = function(css) {
tokens.push({
type: 'at',
name: key,
params: buff,
line: lineno
params: buff.trim(),
line: line
});
key = '';
buff = '';
Expand All @@ -231,7 +221,7 @@ var lex = function(css) {
tokens.push({
type: 'comment',
text: buff,
line: lineno
line: line
});
buff = '';
stack.pop();
Expand All @@ -252,27 +242,41 @@ var lex = function(css) {
buff += ch;
break;
default:
buff = ''; // TESTING, this could backfire
buff += ch;
stack.push('at_rule');
break;
}
break;
case '"':
case '\'':
val = (ch === '"' ? 'double_' : 'single_') + 'string';
if (state() !== val && state() !== 'comment') {
stack.push(val);
} else if (state() === val) {
stack.pop();
switch (state()) {
case 'comment':
break;
case 'single_string':
if (ch === '\'') {
stack.pop();
}
break;
case 'double_string':
if (ch === '"') {
stack.pop();
}
break;
default:
stack.push(ch === '"'
? 'double_string'
: 'single_string');
break;
}
buff += ch;
break;
default:
if (ch < ' ') {
throw new
Error('Control character found.'
+ '\nLine: ' + lineno
+ '\nOffset: ' + lineoff);
+ '\nLine: ' + line
+ '\nOffset: ' + offset);
}
buff += ch;
break;
Expand Down
Loading

0 comments on commit 432da77

Please sign in to comment.