Skip to content

Commit

Permalink
simplifying the munchar tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
Aaron Leung committed Mar 26, 2015
1 parent 658e199 commit 5d387e3
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 229 deletions.
115 changes: 0 additions & 115 deletions src/lisp_tokens.hpp

This file was deleted.

146 changes: 39 additions & 107 deletions src/munchar_tokens.hpp
Expand Up @@ -7,113 +7,45 @@
namespace Munchar {
namespace Tokens {

constexpr auto null = CHR('\0');
constexpr auto tab = CHR('\t');
constexpr auto newline = CHR('\n');
constexpr auto linefeed = newline;
constexpr auto cr = CHR('\r');
constexpr auto crlf = STR("\r\n");

constexpr auto space = CHR(' ');
constexpr auto exclamation = CHR('!');
constexpr auto double_quote = CHR('"');
constexpr auto quote = double_quote;
constexpr auto pound = CHR('#');
constexpr auto hash = pound;
constexpr auto octothorpe = pound;
constexpr auto dollar = CHR('$');
constexpr auto percent = CHR('%');
constexpr auto ampersand = CHR('&');
constexpr auto apostrophe = CHR('\'');
constexpr auto single_quote = apostrophe;
constexpr auto left_paren = CHR('(');
constexpr auto right_paren = CHR(')');
constexpr auto asterisk = CHR('*');
constexpr auto plus = CHR('+');
constexpr auto comma = CHR(',');
constexpr auto hyphen = CHR('-');
constexpr auto minus = hyphen;
constexpr auto period = CHR('.');
constexpr auto dot = period;
constexpr auto slash = CHR('/');
constexpr auto divide = slash;
constexpr auto colon = CHR(':');
constexpr auto semicolon = CHR(';');
constexpr auto less_than = CHR('<');
constexpr auto lt = less_than;
constexpr auto lte = STR("<=");
constexpr auto equals = CHR('=');
constexpr auto eq = equals;
constexpr auto greater_than = CHR('>');
constexpr auto gt = greater_than;
constexpr auto gte = STR(">=");
constexpr auto question = CHR('?');
constexpr auto at = CHR('@');
constexpr auto left_bracket = CHR('[');
constexpr auto backslash = CHR('\\');
constexpr auto right_bracket = CHR(']');
constexpr auto caret = CHR('^');
constexpr auto circumflex = caret;
constexpr auto underscore = CHR('_');
constexpr auto backquote = CHR('`');
constexpr auto left_brace = CHR('{');
constexpr auto vertical_bar = CHR('|');
constexpr auto pipe = vertical_bar;
constexpr auto right_brace = CHR('}');
constexpr auto tilde = CHR('~');

constexpr auto _ = Any_Char { };
constexpr auto letter = MUNCHAR_STATIC_PREDICATE(::isalpha);
constexpr auto alphanumeric = MUNCHAR_STATIC_PREDICATE(::isalnum);
constexpr auto digit = MUNCHAR_STATIC_PREDICATE(::isdigit);
constexpr auto hex_digit = MUNCHAR_STATIC_PREDICATE(::isxdigit);
constexpr auto ws_char = MUNCHAR_STATIC_PREDICATE(::isspace);
constexpr auto whitespace = *ws_char;
constexpr auto sign = CLS("+-");
constexpr auto id_start = letter | underscore;
constexpr auto id_body = alphanumeric | underscore;
constexpr auto identifier = id_start^*id_body;
constexpr auto integer = ~sign ^ +digit;
constexpr auto number_ne = ~sign ^ ((*digit ^ dot ^ +digit) | +digit);
constexpr auto number = number_ne ^ ~(CLS("eE") ^ ~sign ^ +digit);
constexpr auto escape_seq = backslash ^ _;
constexpr auto dq_string = double_quote ^
*(escape_seq | (!CLS("\"\\") ^ _)) ^
double_quote;
constexpr auto sq_string = single_quote ^
*(escape_seq | (!CLS("'\\") ^ _)) ^
single_quote;
// constexpr auto string = dq_string | sq_string;
constexpr auto eol = newline | crlf;
constexpr auto cpp_comment = STR("//") ^ *(!eol ^ _) ^ ~eol;
constexpr auto c_comment = STR("/*") ^ *(!STR("*/") ^ _) ^ STR("*/");
constexpr auto sh_comment = CHR('#') ^ *(!eol ^ _) ^ ~eol;


constexpr auto ellipsis = STR("...");
constexpr auto right_arrow = STR("->");
constexpr auto lisp_true = STR("#t") | STR("#T");
constexpr auto lisp_false = STR("#f") | STR("#F");
constexpr auto lisp_boolean = lisp_true | lisp_false;
constexpr auto lisp_id_start = letter | underscore | hyphen | exclamation | dollar | percent | ampersand | asterisk | slash | colon | less_than | eq | greater_than | question | caret | tilde;
constexpr auto lisp_id_body = alphanumeric | lisp_id_start | plus | minus | dot | at;
constexpr auto peculiar_identifier = plus | minus | ellipsis | (right_arrow^*lisp_id_body);
constexpr auto lisp_identifier = (lisp_id_start^*lisp_id_body) | peculiar_identifier;
constexpr auto lisp_comment = CHR(';') ^ *(!eol ^ _) ^ ~eol;
constexpr auto lisp_string = dq_string;
constexpr auto spaces = CLS(" \t\r");
// constexpr auto lparen = CHR('(');
// constexpr auto rparen = CHR(')');
// constexpr auto hash = CHR('#');
// constexpr auto quote = CHR('\'');
// constexpr auto hash_t = hash ^ CLS("tT");
// constexpr auto hash_f = hash ^ CLS("fF");
// constexpr auto boolean = hash ^ CLS("tfTF");
// constexpr auto string = double_quote ^
// *(escape_seq | (!CLS("\"\\") ^ _)) ^
// double_quote;


// basic things
constexpr auto _ = Any_Char { };
constexpr auto newline = CHR('\n');
constexpr auto spaces = CLS(" \t\r");

// numbers
constexpr auto digit = MUNCHAR_STATIC_PREDICATE(::isdigit);
constexpr auto sign = CLS("+-");
constexpr auto dot = CHR('.');
constexpr auto integer = ~sign ^ +digit;
constexpr auto number_no_exp = ~sign ^ ((*digit ^ dot ^ +digit) | +digit);
constexpr auto number = number_no_exp ^ ~(CLS("eE") ^ ~sign ^ +digit);

// identifiers
constexpr auto letter = MUNCHAR_STATIC_PREDICATE(::isalpha);
constexpr auto special_subsequent = CLS("+-.@");
constexpr auto special_initial = CLS("!$%&*/:<=>?^_~");
constexpr auto constituent = letter;
constexpr auto initial = constituent | special_initial;
constexpr auto subsequent = initial | digit | special_subsequent;
constexpr auto peculiar_identifier
= CLS("+-") | STR("...") | (STR("->") ^ *subsequent);
constexpr auto identifier = (initial ^ *subsequent) | peculiar_identifier;

// strings
constexpr auto escape_seq = CHR('\\') ^ _;
constexpr auto string
= CHR('"') ^ *(escape_seq | (!CLS("\"\\") ^ _)) ^ CHR('"');

// hash tags (special prefixes)
constexpr auto hash_tag = CHR('#') ^ identifier;
constexpr auto hash_paren = STR("#(");
constexpr auto hash_comment = STR("#;");

// boolean literals (technically hash tags)
constexpr auto boolean = CHR('#') ^ CLS("tfTF") ^ !subsequent;

// comments
constexpr auto line_comment = CHR(';') ^ *(!newline ^ _) ^ ~newline;
}
}

Expand Down
13 changes: 6 additions & 7 deletions src/tokenize.cpp
Expand Up @@ -5,7 +5,6 @@ namespace Vole {

using namespace std;
using namespace Munchar;
using namespace Munchar::Tokens;

Lexeme::Lexeme(Type t, const string& txt, size_t ln)
: type(t), text(txt), line(ln)
Expand All @@ -23,11 +22,11 @@ namespace Vole {
case '\t':
case '\r':
case ' ':
munched = spaces(src);
munched = Tokens::spaces(src);
break;

case '"':
if ((munched = lisp_string(src))) {
if ((munched = Tokens::string(src))) {
tokens.push_back(
Lexeme(Lexeme::STRING, string(src, munched), line)
);
Expand All @@ -40,7 +39,7 @@ namespace Vole {
break;

case '#':
if ((munched = lisp_boolean(src))) {
if ((munched = Tokens::boolean(src))) {
tokens.push_back(
Lexeme(Lexeme::BOOLEAN, string(src, munched), line)
);
Expand Down Expand Up @@ -71,16 +70,16 @@ namespace Vole {
break;

case ';':
munched = lisp_comment(src);
munched = Tokens::line_comment(src);
++line;
break;

default:
if ((munched = number(src))) {
if ((munched = Tokens::number(src))) {
tokens.push_back(
Lexeme(Lexeme::NUMBER, string(src, munched), line)
);
} else if ((munched = lisp_identifier(src))) {
} else if ((munched = Tokens::identifier(src))) {
tokens.push_back(
Lexeme(Lexeme::IDENTIFIER, string(src, munched), line)
);
Expand Down

0 comments on commit 5d387e3

Please sign in to comment.