a stupid PEG parser library (Javascript)
Ruby JavaScript Makefile
Switch branches/tags
Nothing to show
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Failed to load latest commit information.
spec
src
tools
.gitignore
CHANGELOG.md
Gemfile
Gemfile.lock
LICENSE.txt
Makefile
README.md

README.md

jaabro

A very dumb PEG parser library.

Brother to raabro, son to aabro, grandson to neg, grand-grandson to parslet.

a sample parser

A parser is made by calling Jaabro.makeParser with a function. This function should hold the definitions of the parse functions making up the final parser, and also the rewrite functions used to rewrite the parse tree resulting from the parsing.

A parse function might look like

function fun(i) { return seq('fun', i, funname, funargs); }

This fun function expects a sequence funame then funargs. Those two arguments are parse functions themselves. Parse functions generally call a Jaabro function like seq(), rex(), eseq() to compose a bit higher level parser.

Rewrite rules are of the form rewrite_{tree_name}(tree). They take as input a node of a parse tree and return a re-interpretation / rewrite of that tree. By default Jaabro takes the tree_name coming from the parse functions to call the corresponding rewrite functions. For example a parse tree whose root node is "expression" will be handed to rewrite_expression(tree).

Here is a complete example:

var Xelp = Jaabro.makeParser(function() {

  // parse rules

  function pa(i) { return rex(null, i, /\(\s*/); }
  function pz(i) { return rex(null, i, /\)\s*/); }
  function com(i) { return rex(null, i, /,\s*/); }

  function number(i) {
    return rex('number', i, /-?([0-9]*\.[0-9]+|[0-9][,0-9]*[0-9]|[0-9]+)\s*/); }

  function va(i) { return rex('var', i, /[a-z][A-Za-z0-9]*\s*/); }

  function qstring(i) { return rex('qstring', i, /'(\\'|[^'])*'\s*/); }
  function dqstring(i) { return rex('dqstring', i, /"(\\"|[^"])*"\s*/); }
  function string(i) { return alt('string', i, dqstring, qstring); }

  function funargs(i) { return eseq('funargs', i, pa, cmp, com, pz); }
  function funname(i) { return rex('funname', i, /[A-Z][A-Z0-9]*/); }
  function fun(i) { return seq('fun', i, funname, funargs); }

  function comparator(i) { return rex('comparator', i, /([\<\>]=?|=~|!?=)\s*/); }
  function multiplier(i) { return rex('multiplier', i, /[*\/]\s*/); }
  function adder(i) { return rex('adder', i, /[+\-]\s*/); }

  function par(i) { return seq('par', i, pa, cmp, pz); }
  function exp(i) { return alt('exp', i, par, fun, number, string, va); }

  function mul(i) { return jseq('mul', i, exp, multiplier); }
  function add(i) { return jseq('add', i, mul, adder); }

  function rcmp(i) { return seq('rcmp', i, comparator, add); }
  function cmp(i) { return seq('cmp', i, add, rcmp, '?'); }

  var root = cmp; // tells Jaabro where to start

  // rewrite rules
  //
  // one rule per possible parse tree name above

  function rewrite_cmp(t) {

    if (t.children.length === 1) return rewrite(t.children[0]);

    return [
      'cmp',
      t.children[1].children[0].string().trim(),
      rewrite(t.children[0]),
      rewrite(t.children[1].children[1])
    ];
  }

  function rewrite_add(t) {

    if (t.children.length === 1) return rewrite(t.children[0]);

    var cn = t.children.slice(); // dup array
    var a = [ t.name === 'add' ? 'SUM' : 'MUL' ];
    var mod = null;
    var c = null;

    while (c = cn.shift()) {
      var v = rewrite(c);
      if (mod) v.push(mod)
      a.push(v);
      c = cn.shift(); if ( ! c) break;
      mod = { '-': 'opp', '/': 'inv' }[c.string().trim()];
    }

    return a;
  }
  var rewrite_mul = rewrite_add;

  function rewrite_fun(t) {

    var a = [ t.children[0].string() ];
    t.children[1].children.forEach(function(c) {
      if (c.name) a.push(rewrite(c));
    });

    return a;
  }

  function rewrite_exp(t) { return rewrite(t.children[0]); }

  function rewrite_par(t) { return rewrite(t.children[1]); }

  function rewrite_var(t) { return [ 'var', t.string().trim() ]; }
  function rewrite_number(t) { return [ 'num', t.string().trim() ]; }

  function rewrite_string(t) {

    var s = t.children[0].string().trim();
    var q = s[0];
    var s = s.slice(1, -1);

    return [
      'str', q === '"' ? s.replace(/\\\"/g, '"') : s.replace(/\\'/g, "'") ];
  }
});

then

Xelp.parse('10 + IF(c10 = "yes", 11, 0)');
  // -- yields -->
["SUM",["num","9"],["num","1"]]

Xelp.parse('10 + IF(c10 = "yes", 11, 0)');
  // -- yields -->
["SUM",
  ["num","10"],
  ["IF",
    ["cmp","=",["var","c10"],["str","yes"]],
    ["num","11"],
    ["num","0"]]]

basic parsers

The first parameter is the name used by rewrite rules. The second parameter is a Jaabro.Input instance, mostly a wrapped string.

function str(name, input, string)
  // matching a string

function rex(name, input, regex_or_string)
  // matching a regexp
  // no need for ^ or \A, checks the match occurs at current offset

function seq(name, input, parser0, parser1, ...)
  // a sequence of parsers

function alt(name, input, parser0, parser1, ...)
  // tries the parsers returns as soon as one succeeds

function altg(name, input, parser0, parser1, ...)
  // tries all the parsers, returns with the longest match

function rep(name, input, parser, min, max=0)
  // repeats the the wrapped parser

function ren(name, input, parser)
  // renames the output of the wrapped parser

function jseq(name, input, eltpa, seppa)
  //
  // seq(name, input, eltpa, seppa, eltpa, seppa, eltpa, seppa, ...)
  //
  // a sequence of `eltpa` parsers separated (joined) by `seppa` parsers

function eseq(name, input, startpa, eltpa, seppa, endpa)
  //
  // seq(name, input, startpa, eltpa, seppa, eltpa, seppa, ..., endpa)
  //
  // a sequence of `eltpa` parsers separated (joined) by `seppa` parsers
  // preceded by a `startpa` parser and followed by a `endpa` parser

the seq parser and its quantifiers

seq is special, it understands "quantifiers": '?', '+' or '*'. They make behave seq a bit like a classical regex.

var CartParser = Jaabro.makeParser(function() {

  function pa(i) { return rex(null, i, /\(\s*/); }

  function fruit(i) {
    return rex('fruit', i, /(tomato|apple|orange)/);
  }
  function vegetable(i) {
    return rex('vegetable', i, /(potato|cabbage|carrot)/);
  }

  function cart(i) {
    return seq('cart', i, fruit, '*', vegetable, '*');
  }
    // zero or more fruits followed by zero or more vegetables
end

(Yes, this sample parser parses string like "appletomatocabbage", it's not very useful, but I hope you get the point about seq)

"classes"

Basically, one only has to know about Jaabro.Node. It's the input to any rewrite method.

Jaabro.Node

A jaabro result node.

{
  name: 'exp',
  result: 1,
  input: someJaabroInput,
  offset: 0,
  length: 5,
  parter: 'seq',
  children: [
    // ...
  ]
}

This "class" understands the following methods:

  • toArray(): returns an array representation of the node and its children
  • toString(): returns a string representation of the node and its children
  • string(): returns the string matched by the result node
  • lookup(name): returns the first node with the given name (might return "this" node)
  • gather(name): returns all the nodes with the given name (starting with "this" node)

As seen above:

  function rewrite_var(t) { return [ 'var', t.string().trim() ]; }
  function rewrite_number(t) { return [ 'num', t.string().trim() ]; }

The t is a Jaabro.result. The results for "var" and "num" named results get wrapped into some kind of s-expression with the result node string trimmed.

providing a custom rewrite() function

As seen above, Jaabro provides a default implementation of rewrite() one that walks the parse tree and calls rewrite_{node_name}(node). If one needs to completely bypass this default, providing a new rewrite(node) is OK.

var MyParser = Jaabro.makeParser(function() {

  // parse

  // ...

  var root = thatParser;

  // rewrite

  function rewrite(t) {

    // follow some custom logic and return result...
  }
});

LICENSE

MIT, see LICENSE.txt