Permalink
Browse files

- Tokenizer code complete.

- Building Compiler: WIP DNC.

TODO:
- Tokenizer unit tests, and lots of them!!!
- Complete Compiler!!!
  • Loading branch information...
1 parent 6fb039d commit f82a05e736ca17f8032ac8f2a609da0dba071ae6 @alexarnon committed Sep 22, 2011
Showing with 321 additions and 74 deletions.
  1. +0 −27 src/com/tmp/uint/core/Compiler.java
  2. +83 −0 src/com/tmp/uint/core/compilation/Compiler.java
  3. +193 −0 src/com/tmp/uint/core/compilation/Tokenizer.java
  4. +4 −1 src/com/tmp/uint/core/{ → execution}/Runner.java
  5. +1 −1 src/com/tmp/uint/core/{ → types}/Expression.java
  6. +3 −2 src/com/tmp/uint/core/{ → types}/Function.java
  7. +3 −2 src/com/tmp/uint/core/{ → types}/MetaFunction.java
  8. +1 −1 src/com/tmp/uint/functions/arithmetic/One.java
  9. +1 −1 src/com/tmp/uint/functions/arithmetic/Plus.java
  10. +1 −1 src/com/tmp/uint/functions/arithmetic/Two.java
  11. +1 −1 src/com/tmp/uint/functions/arithmetic/Zero.java
  12. +2 −2 src/com/tmp/uint/functions/data/List.java
  13. +4 −5 src/com/tmp/uint/functions/data/Usage.java
  14. +1 −3 src/com/tmp/uint/functions/flow/Do.java
  15. +2 −2 src/com/tmp/uint/functions/flow/If.java
  16. +2 −2 src/com/tmp/uint/functions/flow/While.java
  17. +1 −1 src/com/tmp/uint/functions/logic/And.java
  18. +1 −1 src/com/tmp/uint/functions/logic/Equals.java
  19. +1 −1 src/com/tmp/uint/functions/logic/Not.java
  20. +1 −1 src/com/tmp/uint/functions/logic/Or.java
  21. +1 −2 src/com/tmp/uint/functions/output/Print.java
  22. +2 −3 test/com/tmp/uint/core/CompilerTest.java
  23. +2 −2 test/com/tmp/uint/functions/ArithmeticTests.java
  24. +2 −4 test/com/tmp/uint/functions/DataTests.java
  25. +2 −2 test/com/tmp/uint/functions/FlowTests.java
  26. +2 −2 test/com/tmp/uint/functions/LogicTests.java
  27. +2 −2 test/com/tmp/uint/functions/OutputTests.java
  28. +2 −2 test/com/tmp/uint/test/GlobalAcc.java
@@ -1,27 +0,0 @@
-package com.tmp.uint.core;
-
-import java.util.Map;
-import java.util.TreeMap;
-
-public class Compiler {
-
- private final Map<String, Function> functions;
-
-
- public Compiler(Map<String, Function> functions) {
- this.functions = new TreeMap<String, Function>(functions);
- for (Function function: functions.values()) {
- if (function == null) {
- throw new IllegalArgumentException("null function");
- }
- }
- }
-
-
- public Expression compile(String input) {
-
- throw new IllegalStateException("unimplemented");
- }
-
-
-}
@@ -0,0 +1,83 @@
+package com.tmp.uint.core.compilation;
+
+import com.tmp.uint.core.types.Expression;
+import com.tmp.uint.core.types.Function;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import static com.tmp.uint.core.compilation.Tokenizer.Token;
+
+public class Compiler {
+
+ private final Map<String, Function> functions;
+
+
+ public Compiler(Map<String, Function> functions) {
+ this.functions = new TreeMap<String, Function>(functions);
+ }
+
+
+ public Expression compile(String input) throws IOException, ParseException {
+
+ List<Token> tokens = Tokenizer.tokenize(input);
+ Iterator<Token> tokenIterator = tokens.iterator();
+ Expression result = compile(tokenIterator, false);
+ if (tokenIterator.hasNext()) {
+ Token unexpected = tokenIterator.next();
+ throw new ParseException("unexpected token encountered at end of expression" +
+ rep(unexpected), -1/*token.pos*/);
+ }
+ return result;
+ }
+
+
+ private enum State {
+ WF, WA
+ }
+
+ private String rep(Token token) {
+ return "(" + token.type.name() + ":" + token.value.toString() + ")";
+ }
+
+ private Expression compile(Iterator<Token> it, boolean openBracketAlreadyConsumed) throws ParseException {
+
+ // Open paren:
+ if (!openBracketAlreadyConsumed) {
+ if (!it.hasNext()) {
+ throw new ParseException("unexpected EOF instead of open paren", -1);
+ }
+ Token openParen = it.next();
+ if (! openParen.type.equals(Tokenizer.TokenType.OPEN_BRACKET)) {
+ throw new ParseException("expected open paren instead of " + rep(openParen), -1);
+ }
+ }
+
+ Expression result = new Expression();
+
+ // Function name:
+ {
+ if (!it.hasNext()) {
+ throw new ParseException("unexpected EOF instead of function name", -1);
+ }
+ Token functionName = it.next();
+ if (!functionName.type.equals(Tokenizer.TokenType.WORD)) {
+ throw new ParseException("expected word for function name instead of " + rep(functionName), -1);
+ }
+ // use functions to add function to expression...
+ }
+
+ // Arguments:
+ {
+
+ }
+
+ // TADAA.
+ }
+
+
+}
@@ -0,0 +1,193 @@
+package com.tmp.uint.core.compilation;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class Tokenizer {
+
+ public static enum TokenType {
+ OPEN_BRACKET,
+ CLOSE_BRACKET,
+ WORD,
+ STRING,
+ INTEGER,
+ }
+
+ public static class Token {
+ public final TokenType type;
+ public final Object value;
+
+ public Token(TokenType type, Object value) {
+ this.type = type;
+ this.value = value;
+ }
+ }
+
+ private enum State {
+ END,
+ WS,
+ WORD,
+ STRING,
+ INTEGER,
+ }
+
+
+ private final static Token open_bracket = new Token(TokenType.OPEN_BRACKET, "(");
+ private final static Token close_bracket = new Token(TokenType.CLOSE_BRACKET, "(");
+
+
+ public static List<Token> tokenize(String input) throws IOException, ParseException {
+ return tokenize(new StringReader(input));
+ }
+
+ private static List<Token> tokenize(StringReader reader) throws IOException, ParseException {
+
+ List<Token> tokens = new ArrayList<Token>();
+ State state = State.WS;
+ StringBuilder buf = new StringBuilder();
+ int pos = -1;
+
+ while (! State.END.equals(state)) {
+
+ final int c = reader.read();
+
+ final State nextState;
+
+ switch (state) {
+
+ case WS: {
+
+ if (c == -1) {
+ nextState = State.END;
+ } else if (Character.isWhitespace(c)) {
+ nextState = state;
+ } else if (c == '(') {
+ nextState = newState(open_bracket, tokens, state);
+ } else if (c == ')') {
+ nextState = newState(close_bracket, tokens, state);
+ } else if (c == '"') {
+ nextState = newState(buf, State.STRING);
+ } else if (Character.isDigit(c)) {
+ nextState = newState(buf, c, State.INTEGER);
+ } else if (Character.isLetter(c)) {
+ nextState = newState(buf, c, State.WORD);
+ } else {
+ throw new ParseException("unexpected character " + c + " in state " + state.name(), pos);
+ }
+
+ break;
+ }
+
+ case WORD: {
+
+ if (c == -1) {
+ nextState = newState(TokenType.WORD, tokens, buf, State.END);
+ } else if (Character.isLetterOrDigit(c) || "-_.".contains(Character.toString((char) c))) {
+ append(c, buf);
+ nextState = state;
+ } else if (c == '(') {
+ nextState = newState(TokenType.WORD, tokens, buf, State.WS);
+ tokens.add(open_bracket);
+ } else if (c == ')') {
+ nextState = newState(TokenType.WORD, tokens, buf, State.WS);
+ tokens.add(close_bracket);
+ } else if (Character.isWhitespace(c)) {
+ nextState = newState(TokenType.WORD, tokens, buf, State.WS);
+ } else {
+ throw new ParseException("unexpected character " + c + " in state " + state.name(), pos);
+ }
+
+ break;
+ }
+
+ case STRING: {
+
+ if (c == -1) {
+ throw new ParseException("unexpected end of buffer", 0);
+ } else if (c == '"') {
+ nextState = newState(TokenType.STRING, tokens, buf, State.WS);
+ } else if (c == '\\') {
+ int escaped = reader.read();
+ if (escaped == -1) {
+ throw new ParseException("unexpected end of buffer in state " + state.name(), pos);
+ }
+ append(c, buf);
+ nextState = state;
+ } else {
+ append(c, buf);
+ nextState = state;
+ }
+
+ break;
+ }
+
+ case INTEGER: {
+
+ if (c == -1) {
+ nextState = newState(TokenType.INTEGER, tokens, buf, State.END);
+ } else if (Character.isDigit(c)) {
+ append(c, buf);
+ nextState = state;
+ } else if (c == '(') {
+ nextState = newState(TokenType.INTEGER, tokens, buf, State.WS);
+ tokens.add(open_bracket);
+ } else if (c == ')') {
+ nextState = newState(TokenType.INTEGER, tokens, buf, State.WS);
+ tokens.add(close_bracket);
+ } else if (Character.isWhitespace(c)) {
+ tokens.add(new Token(TokenType.INTEGER, buf.toString()));
+ nextState = State.WS;
+ } else {
+ throw new ParseException("unexpected character " + c + " in state " + state.name(), pos);
+ }
+
+ break;
+ }
+
+ default: {
+ throw new ParseException("UNKNOWN TOKENIZER STATE " + state.name(), pos);
+ }
+
+ }
+
+ state = nextState;
+ }
+
+ return tokens;
+ }
+
+
+ private static void append(int c, StringBuilder buf) {
+ addCharacter((char) c, buf);
+ }
+
+ private static void addCharacter(char c, StringBuilder buf) {
+ buf.append(c);
+ }
+
+ private static State newState(StringBuilder buf, State newState) {
+ buf.setLength(0);
+ return newState;
+ }
+
+ private static State newState(StringBuilder buf, int firstChar, State newState) {
+ buf.setLength(0);
+ addCharacter((char)firstChar, buf);
+ return newState;
+ }
+
+ private static State newState(TokenType tokenType, List<Token> tokens, StringBuilder buf, State newState) {
+ tokens.add(new Token(tokenType, buf.toString()));
+ buf.setLength(0);
+ return newState;
+ }
+
+ private static State newState(Token token, List<Token> tokens, State newState) {
+ tokens.add(token);
+ return newState;
+ }
+
+}
@@ -1,4 +1,7 @@
-package com.tmp.uint.core;
+package com.tmp.uint.core.execution;
+
+import com.tmp.uint.core.types.Expression;
+import com.tmp.uint.core.types.Function;
import java.util.ArrayList;
@@ -1,4 +1,4 @@
-package com.tmp.uint.core;
+package com.tmp.uint.core.types;
import java.util.ArrayList;
@@ -1,6 +1,7 @@
-package com.tmp.uint.core;
+package com.tmp.uint.core.types;
+
+import com.tmp.uint.core.execution.Runner;
-import java.util.ArrayList;
import java.util.List;
public abstract class Function {
@@ -1,6 +1,7 @@
-package com.tmp.uint.core;
+package com.tmp.uint.core.types;
+
+import com.tmp.uint.core.execution.Runner;
-import java.util.ArrayList;
import java.util.List;
public abstract class MetaFunction extends Function {
@@ -1,6 +1,6 @@
package com.tmp.uint.functions.arithmetic;
-import com.tmp.uint.core.Function;
+import com.tmp.uint.core.types.Function;
import java.util.List;
@@ -1,6 +1,6 @@
package com.tmp.uint.functions.arithmetic;
-import com.tmp.uint.core.Function;
+import com.tmp.uint.core.types.Function;
import java.util.List;
@@ -1,6 +1,6 @@
package com.tmp.uint.functions.arithmetic;
-import com.tmp.uint.core.Function;
+import com.tmp.uint.core.types.Function;
import java.util.List;
@@ -1,6 +1,6 @@
package com.tmp.uint.functions.arithmetic;
-import com.tmp.uint.core.Function;
+import com.tmp.uint.core.types.Function;
import java.util.List;
@@ -1,7 +1,7 @@
package com.tmp.uint.functions.data;
-import com.tmp.uint.core.MetaFunction;
-import com.tmp.uint.core.Runner;
+import com.tmp.uint.core.types.MetaFunction;
+import com.tmp.uint.core.execution.Runner;
import java.util.ArrayList;
import java.util.Collections;
@@ -1,11 +1,10 @@
package com.tmp.uint.functions.data;
-import com.tmp.uint.core.Expression;
-import com.tmp.uint.core.Function;
-import com.tmp.uint.core.MetaFunction;
-import com.tmp.uint.core.Runner;
+import com.tmp.uint.core.types.Expression;
+import com.tmp.uint.core.types.Function;
+import com.tmp.uint.core.execution.Runner;
+import com.tmp.uint.core.types.MetaFunction;
-import java.util.*;
import java.util.List;
public class Usage extends MetaFunction {
Oops, something went wrong. Retry.

0 comments on commit f82a05e

Please sign in to comment.