diff --git a/beaver-ant/pom.xml b/beaver-ant/pom.xml index 4db7c67..a7af103 100644 --- a/beaver-ant/pom.xml +++ b/beaver-ant/pom.xml @@ -17,9 +17,9 @@ compile - ${parent.groupId} + ${project.parent.groupId} beaver-cc - ${parent.version} + ${project.parent.version} diff --git a/beaver-cc/pom.xml b/beaver-cc/pom.xml index 8ab327d..185496e 100644 --- a/beaver-cc/pom.xml +++ b/beaver-cc/pom.xml @@ -11,9 +11,9 @@ - ${parent.groupId} + ${project.parent.groupId} beaver-rt - ${parent.version} + ${project.parent.version} diff --git a/beaver-rt/src/main/java/beaver/Parser.java b/beaver-rt/src/main/java/beaver/Parser.java index 78178bc..041cd69 100644 --- a/beaver-rt/src/main/java/beaver/Parser.java +++ b/beaver-rt/src/main/java/beaver/Parser.java @@ -44,7 +44,6 @@ public void scannerError(Scanner.Exception e) } public void syntaxError(Symbol token) { - System.err.print(':'); System.err.print(Symbol.getLine(token.start)); System.err.print(','); System.err.print(Symbol.getColumn(token.start)); @@ -67,7 +66,6 @@ public void syntaxError(Symbol token) } public void unexpectedTokenRemoved(Symbol token) { - System.err.print(':'); System.err.print(Symbol.getLine(token.start)); System.err.print(','); System.err.print(Symbol.getColumn(token.start)); @@ -90,7 +88,6 @@ public void unexpectedTokenRemoved(Symbol token) } public void missingTokenInserted(Symbol token) { - System.err.print(':'); System.err.print(Symbol.getLine(token.start)); System.err.print(','); System.err.print(Symbol.getColumn(token.start)); @@ -113,7 +110,6 @@ public void missingTokenInserted(Symbol token) } public void misspelledTokenReplaced(Symbol token) { - System.err.print(':'); System.err.print(Symbol.getLine(token.start)); System.err.print(','); System.err.print(Symbol.getColumn(token.start)); @@ -136,7 +132,6 @@ public void misspelledTokenReplaced(Symbol token) } public void errorPhraseRemoved(Symbol error) { - System.err.print(':'); System.err.print(Symbol.getLine(error.start)); System.err.print(','); System.err.print(Symbol.getColumn(error.start)); @@ -157,106 +152,132 @@ public void errorPhraseRemoved(Symbol error) */ public class TokenStream { + public static final boolean BUFFER = true; + public static final boolean FLUSH = false; + + private static final int BUF_SIZE = 4; + private static final int IDX_MASK = BUF_SIZE - 1; + private Scanner scanner; private Symbol[] buffer; - private int n_marked; - private int n_read; - private int n_written; + private int idx_add; + private int idx_get; + private int ix_mark; public TokenStream(Scanner scanner) { this.scanner = scanner; + this.buffer = new Symbol[BUF_SIZE]; + this.ix_mark = -1; } - - public TokenStream(Scanner scanner, Symbol first_symbol) - { - this(scanner); - alloc(1); - buffer[0] = first_symbol; - n_written++; - } - - public Symbol nextToken() throws IOException + + public void setMode(boolean buffering) { - if (buffer != null) - { - if (n_read < n_written) - return buffer[n_read++]; - - if (n_written < n_marked) - { - n_read++; - return buffer[n_written++] = readToken(); - } - buffer = null; - } - return readToken(); + ix_mark = (buffering ? idx_get : -1); } - - /** - * Prepare a stream to accumulate tokens. - * - * @param size number of shifted tokens to accumulate - */ - public void alloc(int size) + + public boolean isBuffering() { - buffer = new Symbol[(n_marked = size) + 1]; - n_read = n_written = 0; + return ix_mark >= 0; } - /** - * Prepare accumulated tokens to be reread by a next simulation run - * or by a recovered parser. - */ - public void rewind() + public boolean isEmpty() { - n_read = 0; + return idx_get == idx_add; } - - /** - * Insert two tokens at the beginning of a stream. - * - * @param t0 first token to be inserted - * @param t1 second token to be inserted - */ - public void insert(Symbol t0, Symbol t1) + + public boolean isFull() { - if (buffer.length - n_written < 2) - throw new IllegalStateException ("not enough space in the buffer"); - System.arraycopy(buffer, 0, buffer, 2, n_written); - buffer[0] = t0; - buffer[1] = t1; - n_written += 2; + return ((idx_add + 1) & IDX_MASK) == idx_get; } - /** - * Removes a token from the accumulator. - * - * @param i index of a token in the accumulator. - * @return removed token - */ - public Symbol remove(int i) + public int size() { - Symbol token = buffer[i]; - int last = n_written - 1; - while (i < last) + return (idx_add - idx_get + BUF_SIZE) & IDX_MASK; + } + + public void enque(Symbol symbol) + { + if (isFull()) + throw new IllegalStateException ("buffer is full"); + buffer[idx_add++] = symbol; + idx_add &= IDX_MASK; + } + + public Symbol peek() + { + if (isEmpty()) + throw new IllegalStateException ("buffer is empty"); + return buffer[idx_get]; + } + + public Symbol deque() + { + if (isEmpty()) + throw new IllegalStateException ("buffer is empty"); + Symbol symbol = buffer[idx_get++]; + idx_get &= IDX_MASK; + return symbol; + } + + public TokenStream(Scanner scanner, Symbol first_symbol) + { + this(scanner); + enque(first_symbol); + } + + public Symbol nextToken() throws IOException + { + if (isBuffering()) + { + Symbol symbol = readToken(); + enque(symbol); + return symbol; + } + else if (isEmpty()) + { + return readToken(); + } + else { - buffer[i] = buffer[++i]; + return deque(); } - n_written = last; - return token; } /** - * Checks whether a simulation filled the token accumulator. - * - * @return true if accumulator is full + * Prepare accumulated tokens to be reread by a next simulation run + * or by a recovered parser. */ - boolean isFull() + public void rewind() { - return n_read == n_marked; + if (!isBuffering()) + throw new IllegalStateException ("stream is not buffering"); + idx_get = ix_mark; } + /** + * Removes a symbol from the buffer. + */ + public void remove(int offset) + { + if (!isBuffering()) + throw new IllegalStateException ("stream is not buffering"); + switch (offset) + { + case 0: + deque(); + ix_mark = idx_get; + break; + case 1: + Symbol s0 = deque(); + buffer[idx_get] = s0; + ix_mark = idx_get; + break; + default: + throw new IllegalStateException ("unsupported offset"); + } + } + /** * Reads next recognized token from the scanner. If scanner fails to recognize a token and * throws an exception it will be reported via Parser.scannerError(). @@ -397,80 +418,80 @@ protected Parser(ParsingTables tables) this.states = new short[256]; } - /** - * Parses a source and returns a semantic value of the accepted nonterminal - * - * @param source of tokens - a Scanner - * @return semantic value of the accepted nonterminal - */ + /** + * Parses a source and returns a semantic value of the accepted nonterminal + * + * @param source of tokens - a Scanner + * @return semantic value of the accepted nonterminal + */ public Object parse(Scanner source) throws IOException, Parser.Exception { init(); return parse(new TokenStream(source)); } - - /** - * Parses a source and returns a semantic value of the accepted nonterminal. - * Before parsing starts injects alternative goal marker into the source to - * indicate that an alternative goal should be matched. - * - * @param source of tokens - a Scanner - * @param alt_goal_marker_id ID of a token like symbol that will be used as a marker - * @return semantic value of the accepted nonterminal - */ - public Object parse(Scanner source, short alt_goal_marker_id) throws IOException, Parser.Exception - { - init(); - TokenStream in = new TokenStream(source, new Symbol(alt_goal_marker_id)); - return parse(in); - } - - private Object parse(TokenStream in) throws IOException, Parser.Exception - { - while (true) - { - Symbol token = in.nextToken(); - while (true) - { - short act = tables.findParserAction(states[top], token.id); - if (act > 0) - { - shift(token, act); - break; - } - else if (act == accept_action_id) - { - Symbol goal = _symbols[top]; - _symbols = null; // drop this stack to prevent loitering - return goal.value; - } - else if (act < 0) - { - Symbol nt = reduce(~act); - act = tables.findNextState(states[top], nt.id); - if (act > 0) - { - shift(nt, act); - } - else if (act == accept_action_id) - { - _symbols = null; // no loitering - return nt.value; - } - else - { - throw new IllegalStateException("Cannot shift a nonterminal"); - } - } - else // act == 0, i.e. this is an error - { - report.syntaxError(token); - recoverFromError(token, in); - break; // because error recovery altered token stream - parser needs to refetch the next token - } - } - } - } + + /** + * Parses a source and returns a semantic value of the accepted nonterminal. + * Before parsing starts injects alternative goal marker into the source to + * indicate that an alternative goal should be matched. + * + * @param source of tokens - a Scanner + * @param alt_goal_marker_id ID of a token like symbol that will be used as a marker + * @return semantic value of the accepted nonterminal + */ + public Object parse(Scanner source, short alt_goal_marker_id) throws IOException, Parser.Exception + { + init(); + TokenStream in = new TokenStream(source, new Symbol(alt_goal_marker_id)); + return parse(in); + } + + private Object parse(TokenStream in) throws IOException, Parser.Exception + { + while (true) + { + Symbol token = in.nextToken(); + while (true) + { + short act = tables.findParserAction(states[top], token.id); + if (act > 0) + { + shift(token, act); + break; + } + else if (act == accept_action_id) + { + Symbol goal = _symbols[top]; + _symbols = null; // drop this stack to prevent loitering + return goal.value; + } + else if (act < 0) + { + Symbol nt = reduce(~act); + act = tables.findNextState(states[top], nt.id); + if (act > 0) + { + shift(nt, act); + } + else if (act == accept_action_id) + { + _symbols = null; // no loitering + return nt.value; + } + else + { + throw new IllegalStateException("Cannot shift a nonterminal"); + } + } + else // act == 0, i.e. this is an error + { + report.syntaxError(token); + recoverFromError(token, in); + break; // because error recovery altered token stream - parser needs to refetch the next token + } + } + } + } /** * Invoke actual reduce action routine. @@ -487,7 +508,8 @@ else if (act == accept_action_id) */ private void init() { - if (report == null) report = new Events(); + if (report == null) + report = new Events(); _symbols = new Symbol[states.length]; top = 0; // i.e. it's not empty @@ -552,26 +574,27 @@ private Symbol reduce(int rule_id) return lhs_sym; } - /** - * Implements parsing error recovery. Tries several simple approches first, like deleting "bad" token - * or replacing the latter with one of the expected in his state (if possible). If simple methods did - * not work tries to do error phrase recovery. - * - * It is expected that normally descendand parsers do not need to alter this method. In same cases though - * they may want to override it if they need a different error recovery strategy. - * - * @param token a lookahead terminal symbol that messed parsing - * @param in token stream - * @throws IOException propagated from a scanner if it has issues with the source - * @throws Parser.Exception if Parser cannot recover - */ + /** + * Implements parsing error recovery. Tries several simple approches first, like deleting "bad" token + * or replacing the latter with one of the expected (if possible). If simple methods have not worked + * out, tries to do error phrase recovery. + * + * It is expected that normally descendand parsers do not need to alter this method. In same cases though + * they may want to override it if they need a different error recovery strategy. + * + * @param token a lookahead terminal symbol that messed parsing + * @param in token stream + * @throws IOException propagated from a scanner if it has issues with the source + * @throws Parser.Exception if Parser cannot recover + */ protected void recoverFromError(Symbol token, TokenStream in) throws IOException, Parser.Exception { if (token.id == 0) // end of input throw new Parser.Exception("Cannot recover from the syntax error"); + in.setMode(TokenStream.BUFFER); + Simulator sim = new Simulator(); - in.alloc(3); short current_state = states[top]; if (!tables.compressed) // then we can try "insert missing" and "replace unexpected" recoveries { @@ -579,14 +602,16 @@ protected void recoverFromError(Symbol token, TokenStream in) throws IOException if (first_term_id >= 0) { Symbol term = new Symbol(first_term_id, _symbols[top].end, token.start); - in.insert(term, token); // insert expected terminal before unexpected one - in.rewind(); + in.enque(term); // insert expected terminal before the unexpected one + in.enque(token); if (sim.parse(in)) { in.rewind(); + in.setMode(TokenStream.FLUSH); report.missingTokenInserted(term); return; } + in.rewind(); int offset = tables.actn_offsets[current_state]; @@ -598,17 +623,18 @@ protected void recoverFromError(Symbol token, TokenStream in) throws IOException if (tables.lookaheads[index] == term_id) { term.id = term_id; - in.rewind(); if (sim.parse(in)) { in.rewind(); + in.setMode(TokenStream.FLUSH); report.missingTokenInserted(term); return; } + in.rewind(); } } - in.remove(1); // unexpected token, i.e. alter stream as if we replaced - // the unexpected token to an expected terminal + in.remove(1); // alter stream as if an expected terminal replaced the unexpected one + term.start = token.start; term.end = token.end; @@ -620,25 +646,28 @@ protected void recoverFromError(Symbol token, TokenStream in) throws IOException if (tables.lookaheads[index] == term_id) { term.id = term_id; - in.rewind(); if (sim.parse(in)) { in.rewind(); + in.setMode(TokenStream.FLUSH); report.misspelledTokenReplaced(term); return; } + in.rewind(); } } in.remove(0); // simple recoveries failed - remove all stream changes } } - // finally try parsing without unexpected token (as if it was "deleted") - if (sim.parse(in)) - { - in.rewind(); - report.unexpectedTokenRemoved(token); - return; - } + // finally try parsing without the unexpected token (as if it was "deleted") + if (sim.parse(in)) + { + in.rewind(); + in.setMode(TokenStream.FLUSH); + report.unexpectedTokenRemoved(token); + return; + } + in.rewind(); // Simple recoveries failed or are not applicable. Next step is an error phrase recovery. /* @@ -661,16 +690,16 @@ protected void recoverFromError(Symbol token, TokenStream in) throws IOException Symbol error = new Symbol(tables.error_symbol_id, first_sym.start, last_sym.end); // the end is temporary shift(error, goto_state); - in.rewind(); while (!sim.parse(in)) { - last_sym = in.remove(0); - if (last_sym.id == 0) // EOF - throw new Parser.Exception("Cannot recover from the syntax error"); in.rewind(); + if (in.peek().id == 0) // EOF + throw new Parser.Exception("Cannot recover from the syntax error"); + in.remove(0); } - error.end = last_sym.end; in.rewind(); + error.end = in.peek().end; + in.setMode(TokenStream.FLUSH); report.errorPhraseRemoved(error); } }