Skip to content

Commit

Permalink
* Fixes possible bug with multi mode lexer due to undefined order of …
Browse files Browse the repository at this point in the history
…iteration

  on javascript objects.

* XML Grammar Example.

Fixes Chevrotain#201
Fixes Chevrotain#202
  • Loading branch information
bd82 authored and VILOZNY committed Aug 8, 2016
1 parent 8806dc3 commit 1ba003c
Show file tree
Hide file tree
Showing 9 changed files with 585 additions and 54 deletions.
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
## X.Y.Z (INSERT_DATE_HERE)

#### Breaking Changes
- [MultiMode Lexer defaultMode should not be implicitly defined.](https://github.com/SAP/chevrotain/issues/202)

#### Documentation
- [XML grammar example.](https://github.com/SAP/chevrotain/issues/201)



## 0.10.1 (5-30-2016)

Fixes issue with previous release automation.
- Fixes issue with previous release automation.



Expand Down
2 changes: 1 addition & 1 deletion examples/grammars/css/css.js
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ function CssParser(input) {
// [STRING|URI] S* media_list? ';' S*
this.cssImport = this.RULE('cssImport', function() {
$.CONSUME(ImportSym)
// @formatter:off
// @formatter:off
$.OR([
{ALT: function() { $.CONSUME(StringLiteral)}},
{ALT: function() { $.CONSUME(Uri)}}
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion examples/grammars/json/json_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ describe('The JSON Grammar', function() {
it('can parse a simple Json without errors - Parser implemented using ES6 syntax', function() {
// only load a file containing ES6 syntax when actually running the test
// thus if this test is ignored the other tests can still be run in old node.js versions
var parseJsonES6 = require("./jsonES6");
var parseJsonES6 = require("./json_es6");
var inputText = '{ "arr": [1,null,true], "obj": {"num":666}}';
var lexAndParseResult = parseJsonES6(inputText);

Expand Down
271 changes: 271 additions & 0 deletions examples/grammars/xml/xml_es6.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
"use strict";
var chevrotain = require("chevrotain");
var XRegExp = require("xregexp")

// ----------------- lexer -----------------
var Token = chevrotain.Token;
var Lexer = chevrotain.Lexer;
var Parser = chevrotain.Parser;

// A little mini DSL for easier lexer definition using xRegExp.
var fragments = {}

function FRAGMENT(name, def) {
fragments[name] = XRegExp.build(def, fragments)
}

function MAKE_PATTERN(def, flags) {
return XRegExp.build(def, fragments, flags)
}

FRAGMENT('NameStartChar', '([a-zA-Z]|\\u2070-\\u218F|\\u2C00-\\u2FEF|\\u3001-\\uD7FF|\\uF900-\\uFDCF|\\uFDF0-\\uFFFD)');
FRAGMENT('NameChar', '{{NameStartChar}}|-|_|\\.|\\d|\\u00B7||[\\u0300-\\u036F]|[\\u203F-\\u2040]');
FRAGMENT('Name', '{{NameStartChar}}({{NameChar}})*');

// Unfortunately no support for static class properties in ES2015, only in ES2016...
// so the PATTERN/GROUP static props are defined outside the class declarations.
// see: https://github.com/jeffmo/es-class-fields-and-static-properties
class Comment extends Token {}
Comment.PATTERN = /<!--.*?-->/;

class CData extends Token {}
CData.PATTERN = /<!\[CDATA\[.*?]]>/;

class DTD extends Token {}
DTD.PATTERN = /<!.*?>/;
DTD.GROUP = Lexer.SKIPPED;

class EntityRef extends Token {}
EntityRef.PATTERN = MAKE_PATTERN('&{{Name}};');

class CharRef extends Token {}
CharRef.PATTERN = /&#\d+;|&#x[a-fA-F0-9]/;

class SEA_WS extends Token {}
SEA_WS.PATTERN = /( |\t|\n|\r\n)+/;

class XMLDeclOpen extends Token {}
XMLDeclOpen.PATTERN = /<\?xml[ \t\r\n]/;
XMLDeclOpen.PUSH_MODE = "INSIDE";

class SLASH_OPEN extends Token {}
SLASH_OPEN.PATTERN = /<\//;
SLASH_OPEN.PUSH_MODE = "INSIDE";

class OPEN extends Token {}
OPEN.PATTERN = /</;
OPEN.PUSH_MODE = "INSIDE";

class PROCESSING_INSTRUCTION extends Token {}
PROCESSING_INSTRUCTION.PATTERN = MAKE_PATTERN('<\\?{{Name}}.*\\?>');

class TEXT extends Token {}
TEXT.PATTERN = /[^<&]+/;

class CLOSE extends Token {}
CLOSE.PATTERN = />/;
CLOSE.POP_MODE = true;

class SPECIAL_CLOSE extends Token {}
SPECIAL_CLOSE.PATTERN = /\?>/;
SPECIAL_CLOSE.POP_MODE = true;

class SLASH_CLOSE extends Token {}
SLASH_CLOSE.PATTERN = /\/>/;
SLASH_CLOSE.POP_MODE = true;

class SLASH extends Token {}
SLASH.PATTERN = /\//;

class STRING extends Token {}
STRING.PATTERN = /"[^<"]*"|'[^<']*'/;

class EQUALS extends Token {}
EQUALS.PATTERN = /=/;

class Name extends Token {}
Name.PATTERN = MAKE_PATTERN('{{Name}}');

class S extends Token {}
S.PATTERN = /[ \t\r\n]/;
S.GROUP = Lexer.SKIPPED;

var XmlLexerDefinition = {

defaultMode: "OUTSIDE",

modes: {
// the default (inital) mode is "numbers_mode"
OUTSIDE: [
Comment,
CData,
DTD,
EntityRef,
CharRef,
SEA_WS,
XMLDeclOpen,
SLASH_OPEN,
OPEN,
PROCESSING_INSTRUCTION,
TEXT
],
INSIDE: [
CLOSE,
SPECIAL_CLOSE,
SLASH_CLOSE,
SLASH,
EQUALS,
STRING,
Name,
S
]
}
};

var XmlLexer = new Lexer(XmlLexerDefinition);
var allTokens = XmlLexerDefinition.modes.INSIDE.concat(XmlLexerDefinition.modes.OUTSIDE);


// ----------------- parser -----------------
class XmlParserES6 extends chevrotain.Parser {

// Unfortunately no support for class fields with initializer in ES2015, only in ES2016...
// so the parsing rules are defined inside the constructor, as each parsing rule must be initialized by
// invoking RULE(...)
// see: https://github.com/jeffmo/es-class-fields-and-static-properties
constructor(input) {
super(input, allTokens,
// by default the error recovery / fault tolerance capabilities are disabled
// use this flag to enable them
{recoveryEnabled: true});

// not mandatory, using $ (or any other sign) to reduce verbosity (this. this. this. this. .......)
var $ = this;

$.document = $.RULE("document", () => {
$.OPTION(() => {
$.SUBRULE($.prolog);
});

$.MANY(() => {
$.SUBRULE($.misc);
});

$.SUBRULE($.element);

$.MANY2(() => {
$.SUBRULE2($.misc);
});
});

$.prolog = $.RULE("prolog", () => {
$.CONSUME(XMLDeclOpen);
$.MANY2(() => {
$.SUBRULE($.attribute);
})
$.CONSUME(SPECIAL_CLOSE);
});

$.content = $.RULE("content", () => {
$.OPTION(() => {
$.SUBRULE($.chardata);
})

$.MANY(() => {
// @formatter:off
$.OR([
{ALT: () => { $.SUBRULE($.element)}},
{ALT: () => { $.SUBRULE($.reference)}},
{ALT: () => { $.CONSUME(CData)}},
{ALT: () => { $.CONSUME(PROCESSING_INSTRUCTION)}},
{ALT: () => { $.CONSUME(Comment)}}
]);
// @formatter:on

$.OPTION2(() => {
$.SUBRULE2($.chardata);
})
});
});

$.element = $.RULE("element", () => {
$.CONSUME(OPEN);
$.CONSUME(Name);
$.MANY(() => {
$.SUBRULE($.attribute);
})

// @formatter:off
$.OR([
{ALT: () => {
$.CONSUME(CLOSE);
$.SUBRULE($.content);
$.CONSUME(SLASH_OPEN);
$.CONSUME2(Name);
$.CONSUME2(CLOSE);
}},
{ALT: () => {
$.CONSUME(SLASH_CLOSE);
}}
]);
// @formatter:on
});

$.reference = $.RULE("reference", () => {
// @formatter:off
$.OR([
{ALT: () => { $.CONSUME(EntityRef)}},
{ALT: () => { $.CONSUME(CharRef)}}
]);
// @formatter:on
});

$.attribute = $.RULE("attribute", () => {
$.CONSUME(Name);
$.CONSUME(EQUALS);
$.CONSUME(STRING);
});

$.chardata = $.RULE("chardata", () => {
// @formatter:off
$.OR([
{ALT: () => { $.CONSUME(TEXT)}},
{ALT: () => { $.CONSUME(SEA_WS)}}
]);
// @formatter:on
});

$.misc = $.RULE("misc", () => {
// @formatter:off
$.OR([
{ALT: () => { $.CONSUME(Comment)}},
{ALT: () => { $.CONSUME(PROCESSING_INSTRUCTION)}},
{ALT: () => { $.CONSUME(SEA_WS)}}
]);
// @formatter:on
});

// very important to call this after all the rules have been defined.
// otherwise the parser may not work correctly as it will lack information
// derived during the self analysis phase.
Parser.performSelfAnalysis(this);
}
}

// ----------------- wrapping it all together -----------------
module.exports = function(text) {
var fullResult = {};
var lexResult = XmlLexer.tokenize(text);
fullResult.tokens = lexResult.tokens;
fullResult.ignored = lexResult.ignored;
fullResult.lexErrors = lexResult.errors;

var parser = new XmlParserES6(lexResult.tokens);
parser.document();
fullResult.parseErrors = parser.errors;

if (fullResult.lexErrors.length >= 1 || fullResult.parseErrors.length >= 1) {
throw new Error("sad sad panda")
}
return fullResult;
};
Loading

0 comments on commit 1ba003c

Please sign in to comment.