diff --git a/examples/python-ish.js b/examples/python-ish.js index 22b3409..cb663ad 100644 --- a/examples/python-ish.js +++ b/examples/python-ish.js @@ -7,96 +7,46 @@ let P = require('..'); /////////////////////////////////////////////////////////////////////// -// LIMITATIONS: Python allows not only multiline blocks, but inline blocks too. -// -// if x == y: print("nice") -// -// vs. -// -// if x == y: -// print("nice") -// -// This parser only supports the multiline indented form. - -// NOTE: This is a hack and is not recommended. Maintaining state throughout -// Parsimmon parsers is not reliable since backtracking may occur, leaving your -// state inaccurate. See the relevant GitHub issue for discussion. -// -// https://github.com/jneen/parsimmon/issues/158 -// -function indentPeek() { - return indentStack[indentStack.length - 1]; -} - -let indentStack = [0]; - let Pythonish = P.createLanguage({ - // If this were actually Python, "Block" wouldn't be a statement on its own, - // but rather "If" and "While" would be statements that used "Block" inside. - Statement: r => - P.alt(r.ExpressionStatement, r.Block), + Program: r => + r.Statement.many().trim(r._).node('Program'), - // Just a simple `foo()` style function call. - FunctionCall: () => - P.regexp(/[a-z]+/).skip(P.string('()')) - .node('FunctionCall') - .desc('a function call'), + Statement: r => + P.alt(r.Call, r.Block), - // To make it a statement we just need a newline afterward. - ExpressionStatement: r => - r.FunctionCall.skip(P.string('\n')), + Call: r => + P.letter.skip(P.string('()')).skip(r.End).node('Call'), - // The general idea of this is to assume there's "block:" on its own line, - // then we capture the whitespace used to indent the first statement of the - // block, and require that every other statement has the same exact string of - // indentation in front of it. Block: r => P.seqObj( P.string('block:'), - P.string('\n'), - ['indent', P.regexp(/[ ]+/)], - ['statement', r.Statement] - ).chain(args => { - // `.chain` is called after a parser succeeds. It returns the next parser - // to use for parsing. This allows subsequent parsing to be dependent on - // previous text. - let {indent, statement} = args; - let indentSize = indent.length; - let currentSize = indentPeek(); - // Indentation must be deeper than the current block context. Otherwise - // you could indent *less* for a block and it would still work. This is - // not how any language I know of works. - if (indentSize <= currentSize) { - return P.fail('at least ' + currentSize + ' spaces'); - } - indentStack.push(indentSize); - return P.string(indent) - .then(r.Statement) - .many() - .map(statements => { - indentStack.pop(); - return [statement].concat(statements); - }); - }) - .node('Block'), + r.End, + P.indentMore, + ['first', r.Statement], + ['rest', P.indentSame.then(r.Statement).many()], + P.indentLess + ).map(args => { + let {first, rest} = args; + let statements = [first, ...rest]; + return {statements}; + }).node('Block'), + + _: () => P.optWhitespace, + NL: () => P.string('\n'), + End: r => P.alt(r.NL, P.eof), }); /////////////////////////////////////////////////////////////////////// let text = `\ +z() block: - a() - b() + a() + b() + block: c() - block: - d() - e() - f() - block: - g() - h() - i() - j() + d() +e() `; function prettyPrint(x) { @@ -105,5 +55,5 @@ function prettyPrint(x) { console.log(s); } -let ast = Pythonish.Block.tryParse(text); +let ast = Pythonish.Program.tryParse(text); prettyPrint(ast); diff --git a/src/parsimmon.js b/src/parsimmon.js index e0a2b1e..db0addb 100644 --- a/src/parsimmon.js +++ b/src/parsimmon.js @@ -19,23 +19,31 @@ function isArray(x) { return {}.toString.call(x) === '[object Array]'; } -function makeSuccess(index, value) { +function makeSuccess(index, value, state) { + if (arguments.length !== 3) { + throw new Error('makeSuccess takes 3 arguments'); + } return { status: true, index: index, value: value, furthest: -1, - expected: [] + expected: [], + state: state }; } -function makeFailure(index, expected) { +function makeFailure(index, expected, state) { + if (arguments.length !== 3) { + throw new Error('makeFailure takes 3 arguments'); + } return { status: false, index: -1, value: null, furthest: index, - expected: [expected] + expected: [expected], + state: state }; } @@ -54,7 +62,8 @@ function mergeReplies(result, last) { index: result.index, value: result.value, furthest: last.furthest, - expected: expected + expected: expected, + state: result.state }; } @@ -192,18 +201,19 @@ function seq() { for (var j = 0; j < numParsers; j += 1) { assertParser(parsers[j]); } - return Parsimmon(function(input, i) { + return Parsimmon(function(input, i, state) { var result; var accum = new Array(numParsers); for (var j = 0; j < numParsers; j += 1) { - result = mergeReplies(parsers[j]._(input, i), result); + result = mergeReplies(parsers[j]._(input, i, state), result); if (!result.status) { return result; } + state = result.state; accum[j] = result.value; i = result.index; } - return mergeReplies(makeSuccess(i, accum), result); + return mergeReplies(makeSuccess(i, accum, state), result); }); } @@ -240,7 +250,7 @@ function seqObj() { if (totalKeys === 0) { throw new Error('seqObj expects at least one named parser, found zero'); } - return Parsimmon(function(input, i) { + return Parsimmon(function(input, i, state) { var result; var accum = {}; for (var j = 0; j < numParsers; j += 1) { @@ -253,7 +263,8 @@ function seqObj() { name = null; parser = parsers[j]; } - result = mergeReplies(parser._(input, i), result); + result = mergeReplies(parser._(input, i, state), result); + state = result.state; if (!result.status) { return result; } @@ -262,7 +273,7 @@ function seqObj() { } i = result.index; } - return mergeReplies(makeSuccess(i, accum), result); + return mergeReplies(makeSuccess(i, accum, state), result); }); } @@ -303,13 +314,14 @@ function alt() { for (var j = 0; j < numParsers; j += 1) { assertParser(parsers[j]); } - return Parsimmon(function(input, i) { + return Parsimmon(function(input, i, state) { var result; for (var j = 0; j < parsers.length; j += 1) { - result = mergeReplies(parsers[j]._(input, i), result); + result = mergeReplies(parsers[j]._(input, i, state), result); if (result.status) { return result; } + state = result.state; } return result; }); @@ -333,11 +345,14 @@ function sepBy1(parser, separator) { // -*- Core Parsing Methods -*- -_.parse = function(input) { +_.parse = function(input, initialState) { if (typeof input !== 'string') { throw new Error('.parse must be called with a string as its argument'); } - var result = this.skip(eof)._(input, 0); + if (arguments.length < 2) { + initialState = indentInitialState; + } + var result = this.skip(eof)._(input, 0, initialState); if (result.status) { return { status: true, @@ -353,8 +368,11 @@ _.parse = function(input) { // -*- Other Methods -*- -_.tryParse = function(str) { - var result = this.parse(str); +_.tryParse = function(str, initialState) { + if (arguments.length < 2) { + initialState = indentInitialState; + } + var result = this.parse(str, initialState); if (result.status) { return result.value; } else { @@ -397,17 +415,18 @@ _.then = function(next) { _.many = function() { var self = this; - return Parsimmon(function(input, i) { + return Parsimmon(function(input, i, state) { var accum = []; var result = undefined; for (;;) { - result = mergeReplies(self._(input, i), result); + result = mergeReplies(self._(input, i, state), result); + state = result.state; if (result.status) { i = result.index; accum.push(result.value); } else { - return mergeReplies(makeSuccess(i, accum), result); + return mergeReplies(makeSuccess(i, accum, state), result); } } }); @@ -432,13 +451,14 @@ _.times = function(min, max) { } assertNumber(min); assertNumber(max); - return Parsimmon(function(input, i) { + return Parsimmon(function(input, i, state) { var accum = []; var result = undefined; var prevResult = undefined; for (var times = 0; times < min; times += 1) { - result = self._(input, i); + result = self._(input, i, state); prevResult = mergeReplies(result, prevResult); + state = result.state; if (result.status) { i = result.index; accum.push(result.value); @@ -449,6 +469,7 @@ _.times = function(min, max) { for (; times < max; times += 1) { result = self._(input, i); prevResult = mergeReplies(result, prevResult); + state = result.state; if (result.status) { i = result.index; accum.push(result.value); @@ -456,7 +477,7 @@ _.times = function(min, max) { break; } } - return mergeReplies(makeSuccess(i, accum), prevResult); + return mergeReplies(makeSuccess(i, accum, state), prevResult); }); }; @@ -479,12 +500,13 @@ _.atLeast = function(n) { _.map = function(fn) { assertFunction(fn); var self = this; - return Parsimmon(function(input, i) { - var result = self._(input, i); + return Parsimmon(function(input, i, state) { + var result = self._(input, i, state); + state = result.state; if (!result.status) { return result; } - return mergeReplies(makeSuccess(result.index, fn(result.value)), result); + return mergeReplies(makeSuccess(result.index, fn(result.value), state), result); }); }; @@ -531,8 +553,8 @@ _.notFollowedBy = function(x) { _.desc = function(expected) { var self = this; - return Parsimmon(function(input, i) { - var reply = self._(input, i); + return Parsimmon(function(input, i, state) { + var reply = self._(input, i, state); if (!reply.status) { reply.expected = [expected]; } @@ -552,13 +574,14 @@ _.ap = function(other) { _.chain = function(f) { var self = this; - return Parsimmon(function(input, i) { - var result = self._(input, i); + return Parsimmon(function(input, i, state) { + var result = self._(input, i, state); + state = result.state; if (!result.status) { return result; } var nextParser = f(result.value); - return mergeReplies(nextParser._(input, result.index), result); + return mergeReplies(nextParser._(input, result.index, state), result); }); }; @@ -567,13 +590,13 @@ _.chain = function(f) { function string(str) { assertString(str); var expected = '\'' + str + '\''; - return Parsimmon(function(input, i) { + return Parsimmon(function(input, i, state) { var j = i + str.length; var head = input.slice(i, j); if (head === str) { - return makeSuccess(j, head); + return makeSuccess(j, head, state); } else { - return makeFailure(i, expected); + return makeFailure(i, expected, state); } }); } @@ -587,40 +610,43 @@ function regexp(re, group) { } var anchored = anchoredRegexp(re); var expected = '' + re; - return Parsimmon(function(input, i) { + return Parsimmon(function(input, i, state) { var match = anchored.exec(input.slice(i)); if (match) { if (0 <= group && group <= match.length) { var fullMatch = match[0]; var groupMatch = match[group]; - return makeSuccess(i + fullMatch.length, groupMatch); + return makeSuccess(i + fullMatch.length, groupMatch, state); } return makeFailure( - 'valid match group (0 to ' + match.length + ') in ' + expected + i, + 'valid match group (0 to ' + match.length + ') in ' + expected, + state ); } - return makeFailure(i, expected); + return makeFailure(i, expected, state); }); } function succeed(value) { - return Parsimmon(function(input, i) { - return makeSuccess(i, value); + return Parsimmon(function(input, i, state) { + return makeSuccess(i, value, state); }); } function fail(expected) { - return Parsimmon(function(input, i) { - return makeFailure(i, expected); + return Parsimmon(function(input, i, state) { + return makeFailure(i, expected, state); }); } function lookahead(x) { if (isParser(x)) { - return Parsimmon(function(input, i) { + return Parsimmon(function(input, i, state) { var result = x._(input, i); result.index = i; result.value = ''; + result.state = state; return result; }); } else if (typeof x === 'string') { @@ -633,23 +659,24 @@ function lookahead(x) { function notFollowedBy(parser) { assertParser(parser); - return Parsimmon(function(input, i) { - var result = parser._(input, i); + return Parsimmon(function(input, i, state) { + var result = parser._(input, i, state); + state = result.state; var text = input.slice(i, result.index); return result.status - ? makeFailure(i, 'not "' + text + '"') - : makeSuccess(i, null); + ? makeFailure(i, 'not "' + text + '"', state) + : makeSuccess(i, null, state); }); } function test(predicate) { assertFunction(predicate); - return Parsimmon(function(input, i) { + return Parsimmon(function(input, i, state) { var char = input.charAt(i); if (i < input.length && predicate(char)) { - return makeSuccess(i + 1, char); + return makeSuccess(i + 1, char, state); } else { - return makeFailure(i, 'a character matching ' + predicate); + return makeFailure(i, 'a character matching ' + predicate, state); } }); } @@ -680,12 +707,12 @@ function range(begin, end) { function takeWhile(predicate) { assertFunction(predicate); - return Parsimmon(function(input, i) { + return Parsimmon(function(input, i, state) { var j = i; while (j < input.length && predicate(input.charAt(j))) { j++; } - return makeSuccess(j, input.slice(i, j)); + return makeSuccess(j, input.slice(i, j), state); }); } @@ -695,9 +722,9 @@ function lazy(desc, f) { desc = undefined; } - var parser = Parsimmon(function(input, i) { + var parser = Parsimmon(function(input, i, state) { parser._ = f()._; - return parser._(input, i); + return parser._(input, i, state); }); if (desc) { @@ -725,26 +752,71 @@ _['fantasy-land/map'] = _.map; // -*- Base Parsers -*- -var index = Parsimmon(function(input, i) { - return makeSuccess(i, makeLineColumnIndex(input, i)); +var indentInitialState = [0]; + +var spaces0Count = regexp(/[ ]*/).map(function(s) { + return s.length; +}); + +var indentMore = spaces0Count.chain(function(count) { + return Parsimmon(function(input, i, state) { + var j = state.length - 1; + if (count > state[j]) { + return makeSuccess(i, null, state.concat(count)); + } + var message = 'more than ' + state[j] + ' spaces of indentation'; + return makeFailure(i, message, undefined); + }); +}); + +var indentLess = spaces0Count.chain(function(count) { + return Parsimmon(function(input, i, state) { + var stack = state.slice(); + if (count < stack[stack.length - 1]) { + while (count < stack[stack.length - 1]) { + stack.pop(); + } + return makeSuccess(i, null, stack); + } + var message = + 'less than ' + + stack[stack.length - 1] + + ' spaces of indentation'; + return makeFailure(i, message, undefined); + }); +}); + +var indentSame = spaces0Count.chain(function(count) { + return Parsimmon(function(input, i, state) { + var j = state.length - 1; + if (count === state[j]) { + return makeSuccess(i, null, state); + } + var message = 'exactly ' + count + ' spaces of indentation'; + return makeFailure(i, message, undefined); + }); +}); + +var index = Parsimmon(function(input, i, state) { + return makeSuccess(i, makeLineColumnIndex(input, i), state); }); -var any = Parsimmon(function(input, i) { +var any = Parsimmon(function(input, i, state) { if (i >= input.length) { - return makeFailure(i, 'any character'); + return makeFailure(i, 'any character', state); } - return makeSuccess(i + 1, input.charAt(i)); + return makeSuccess(i + 1, input.charAt(i), state); }); -var all = Parsimmon(function(input, i) { - return makeSuccess(input.length, input.slice(i)); +var all = Parsimmon(function(input, i, state) { + return makeSuccess(input.length, input.slice(i), state); }); -var eof = Parsimmon(function(input, i) { +var eof = Parsimmon(function(input, i, state) { if (i < input.length) { - return makeFailure(i, 'EOF'); + return makeFailure(i, 'EOF', state); } - return makeSuccess(i, null); + return makeSuccess(i, null, state); }); var digit = regexp(/[0-9]/).desc('a digit'); @@ -765,6 +837,9 @@ Parsimmon.empty = empty; Parsimmon.eof = eof; Parsimmon.fail = fail; Parsimmon.formatError = formatError; +Parsimmon.indentLess = indentLess; +Parsimmon.indentMore = indentMore; +Parsimmon.indentSame = indentSame; Parsimmon.index = index; Parsimmon.isParser = isParser; Parsimmon.lazy = lazy; diff --git a/test/core/constructor.test.js b/test/core/constructor.test.js index 07b780d..5abeb4b 100644 --- a/test/core/constructor.test.js +++ b/test/core/constructor.test.js @@ -5,11 +5,11 @@ suite('Parsimmon()', function() { test('should work in general', function() { var good = 'just a Q'; var bad = 'all I wanted was a Q'; - var justQ = Parsimmon(function(str, i) { + var justQ = Parsimmon(function(str, i, state) { if (str.charAt(i) === 'Q') { - return Parsimmon.makeSuccess(i + 1, good); + return Parsimmon.makeSuccess(i + 1, good, state); } else { - return Parsimmon.makeFailure(i, bad); + return Parsimmon.makeFailure(i, bad, state); } }); var result1 = justQ.parse('Q'); @@ -31,13 +31,14 @@ suite('Parsimmon()', function() { test('unsafeUnion works on poorly formatted custom parser', function() { var p1 = Parsimmon.string('a').or(Parsimmon.string('b')); - var p2 = Parsimmon(function(str, i) { + var p2 = Parsimmon(function(str, i, state) { return { status: false, index: -1, value: null, furthest: i, - expected: [] + expected: [], + state: state }; }); var p3 = Parsimmon.alt(p2, p1); diff --git a/test/core/custom.test.js b/test/core/custom.test.js index 97bb60d..848a0c6 100644 --- a/test/core/custom.test.js +++ b/test/core/custom.test.js @@ -4,8 +4,8 @@ suite('Parsimmon.custom', function(){ test('simple parser definition', function(){ function customAny() { return Parsimmon.custom(function(success){ - return function(input, i) { - return success(i+1, input.charAt(i)); + return function(input, i, state) { + return success(i + 1, input.charAt(i), state); }; }); } @@ -21,8 +21,8 @@ suite('Parsimmon.custom', function(){ test('failing parser', function(){ function failer() { return Parsimmon.custom(function(success, failure){ - return function(input, i) { - return failure(i, 'nothing'); + return function(input, i, state) { + return failure(i, 'nothing', state); }; }); } @@ -41,11 +41,12 @@ suite('Parsimmon.custom', function(){ test('composes with existing parsers', function(){ function notChar(char) { return Parsimmon.custom(function(success, failure) { - return function(input, i) { + return function(input, i, state) { if (input.charCodeAt(i) !== char.charCodeAt(0)) { - return success(i+1, input.charAt(i)); + return success(i + 1, input.charAt(i), state); } - return failure(i, 'something different than "' + input.charAt(i)) + '"'; + var message = 'something different than "' + input.charAt(i) + '"'; + return failure(i, message, state); }; }); } diff --git a/test/core/makeFailure.test.js b/test/core/makeFailure.test.js index a13badb..d56c2e1 100644 --- a/test/core/makeFailure.test.js +++ b/test/core/makeFailure.test.js @@ -3,12 +3,13 @@ test('Parsimmon.makeFailure', function() { var furthest = 4444; var expected = 'waiting in the clock tower'; - var result = Parsimmon.makeFailure(furthest, expected); + var result = Parsimmon.makeFailure(furthest, expected, undefined); assert.deepEqual(result, { status: false, index: -1, value: null, furthest: furthest, - expected: [expected] + expected: [expected], + state: undefined }); }); diff --git a/test/core/makeSuccess.test.js b/test/core/makeSuccess.test.js index 138e3b7..60141c1 100644 --- a/test/core/makeSuccess.test.js +++ b/test/core/makeSuccess.test.js @@ -3,12 +3,13 @@ test('Parsimmon.makeSuccess', function() { var index = 11; var value = 'a lucky number'; - var result = Parsimmon.makeSuccess(index, value); + var result = Parsimmon.makeSuccess(index, value, 'state'); assert.deepEqual(result, { status: true, index: index, value: value, furthest: -1, - expected: [] + expected: [], + state: 'state' }); });