Permalink
Browse files

365 - split up into files, wart-style

  • Loading branch information...
1 parent de142a8 commit 7230fa0cf4da5bd69852a16c92b3394fa8eb2658 @akkartik committed Jul 4, 2011
Showing with 2,656 additions and 2,683 deletions.
  1. +215 −0 001tokenize.boot.cc
  2. +227 −0 001tokenize.boot.test.cc
  3. +151 −0 002parenthesize.boot.cc
  4. +343 −0 002parenthesize.boot.test.cc
  5. +94 −0 003parse.boot.cc
  6. +52 −0 003parse.boot.test.cc
  7. +353 −0 004cell.boot.cc
  8. +94 −0 004cell.boot.test.cc
  9. +66 −0 005build.boot.cc
  10. +274 −0 005build.boot.test.cc
  11. +105 −0 006scope.boot.cc
  12. +140 −0 006scope.boot.test.cc
  13. +127 −0 007eval.boot.cc
  14. +325 −0 007eval.boot.test.cc
  15. +5 −0 010cons.cc
  16. +9 −0 010cons.test.cc
  17. +3 −0 011arith.cc
  18. +8 −0 011arith.test.cc
  19. +41 −2,620 boot.cc
  20. +14 −0 boot_list
  21. +0 −28 comp.cc
  22. +6 −6 makefile
  23. +4 −0 op_list
  24. +0 −28 strip_tests.pl
  25. +0 −1 test_list
View
@@ -0,0 +1,215 @@
+//// tokenize input including newlines and indent
+
+enum TokenType {
+ NON_WHITESPACE,
+ START_OF_LINE,
+ INDENT, MAYBE_WRAP,
+ OUTDENT,
+};
+
+bool whitespace(TokenType t) {
+ return t != NON_WHITESPACE;
+}
+
+struct Token {
+ TokenType type;
+ string token;
+ int indentLevel;
+
+ Token(const TokenType t, const string x, const int l) :type(t), token(x), indentLevel(l) {}
+
+ static Token of(string s) {
+ Token result(NON_WHITESPACE, s, 0);
+ return result;
+ }
+ static Token sol() {
+ Token result(START_OF_LINE, L"", 0);
+ return result;
+ }
+ static Token indent(int l, bool justOneExtraChar) {
+ Token result(justOneExtraChar ? MAYBE_WRAP : INDENT, L"", l);
+ return result;
+ }
+ static Token outdent(int l) {
+ Token result(OUTDENT, L"", l);
+ return result;
+ }
+
+ bool operator==(string x) {
+ return type == NON_WHITESPACE && token == x;
+ }
+ bool operator==(TokenType x) {
+ return type == x;
+ }
+ bool operator!=(string x) {
+ return !(*this == x);
+ }
+ bool operator!=(TokenType x) {
+ return !(*this == x);
+ }
+};
+
+ostream& operator<<(ostream& os, Token p) {
+ if (p.type != NON_WHITESPACE) return os << p.type;
+ else return os << p.token;
+}
+
+
+ void skip(istream& in) {
+ char dummy;
+ in >> dummy;
+ }
+
+ void skipWhitespace(istream& in) {
+ while (isspace(in.peek()) && in.peek() != L'\n')
+ skip(in);
+ }
+
+ bool eof(istream& in) {
+ in.peek();
+ return in.eof();
+ }
+
+ class Die {};
+ ostream& operator<<(ostream& os __unused__, Die die __unused__) {
+ exit(1);
+ }
+ Die DIE;
+
+// BEWARE: tab = 1 space; don't mix the two
+// But do track whether the final indent char is a space. Sometimes you want
+// to wrap a long form.
+const int LAST_CHAR_IS_SPACE = 100;
+int countIndent(istream& in) {
+ int count = 0;
+ bool lastCharIsSpace = false;
+ char c;
+ while (!eof(in)) {
+ if (!isspace(in.peek()))
+ break;
+ in >> c;
+ lastCharIsSpace = (c == L' ');
+ ++count;
+ if (c == L'\n')
+ count = 0;
+ }
+ if (count >= LAST_CHAR_IS_SPACE)
+ cerr << L"eek, too much indent\n" << DIE;
+ if (lastCharIsSpace)
+ count += LAST_CHAR_IS_SPACE;
+ return count;
+}
+
+
+
+ // slurp functions read a token when you're sure to be at it
+ void slurpChar(istream& in, ostream& out) {
+ char c;
+ in >> c; out << c;
+ }
+
+ void slurpWord(istream& in, ostream& out) {
+ char c;
+ while (!eof(in)) {
+ in >> c;
+ // keep this list sync'd with the parseToken switch below
+ if (isspace(c) || c == L',' || c == ';'
+ || c == L'(' || c == L')' || c == L'\'' || c == L'"') {
+ in.putback(c);
+ break;
+ }
+ out << c;
+ }
+ }
+
+ void slurpString(istream& in, ostream& out) {
+ slurpChar(in, out); // initial quote
+ char c;
+ while (!eof(in)) {
+ in >> c; out << c;
+ if (c == L'\\')
+ slurpChar(in, out); // blindly read next
+ else if (c == L'"')
+ break;
+ }
+ }
+
+ void slurpComment(istream& in, ostream& out) {
+ char c;
+ while (!eof(in)) {
+ in >> c;
+ if (c == L'\n') {
+ in.putback(c);
+ break;
+ }
+ out << c;
+ }
+ }
+
+int indentLevel = 0;
+TokenType prevTokenType = START_OF_LINE;
+Token parseToken(istream& in) {
+ if (prevTokenType != START_OF_LINE) {
+ skipWhitespace(in);
+ if (in.peek() == L'\n') {
+ skip(in);
+ return Token::sol();
+ }
+ }
+
+ if (prevTokenType == START_OF_LINE) {
+ int prevIndentLevel = indentLevel;
+ indentLevel = countIndent(in);
+ bool lastCharIsSpace = (indentLevel >= LAST_CHAR_IS_SPACE);
+ if (lastCharIsSpace) indentLevel -= LAST_CHAR_IS_SPACE;
+ if (indentLevel > prevIndentLevel+1)
+ return Token::indent(indentLevel, false);
+ else if (indentLevel == prevIndentLevel+1)
+ return Token::indent(indentLevel, lastCharIsSpace);
+ else if (indentLevel < prevIndentLevel)
+ return Token::outdent(indentLevel);
+ // otherwise prevIndentLevel == indentLevel; fall through
+ }
+
+ ostringstream out;
+ switch (in.peek()) { // now can't be whitespace
+ case L'"':
+ slurpString(in, out); break;
+
+ case L'(':
+ case L')':
+ case L'\'':
+ case L'`':
+ slurpChar(in, out); break;
+
+ case L',':
+ slurpChar(in, out);
+ if (in.peek() == L'@')
+ slurpChar(in, out);
+ break;
+
+ case L';':
+ slurpComment(in, out); break;
+
+ default:
+ slurpWord(in, out); break;
+ }
+ return Token::of(out.rdbuf()->str());
+}
+
+list<Token> tokenize(istream& in) {
+ indentLevel = 0;
+ in >> std::noskipws;
+ list<Token> result;
+ result.push_back(Token::sol());
+ prevTokenType = START_OF_LINE;
+ while (!eof(in)) {
+ result.push_back(parseToken(in));
+ prevTokenType = result.back().type;
+ }
+
+ while(!result.empty()
+ && (whitespace(result.back().type) || result.back().token == L""))
+ result.pop_back();
+ return result;
+}
Oops, something went wrong.

0 comments on commit 7230fa0

Please sign in to comment.