Skip to content

Commit

Permalink
rearrange the Tokenizer interface a little bit so we can more readily…
Browse files Browse the repository at this point in the history
… customize the behavior in subclasses. Add a token map, which can be used to map certain character strings to pre-defined tokens
  • Loading branch information
Whiteknight committed May 18, 2011
1 parent 9e2db22 commit b58ca3f
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
21 changes: 20 additions & 1 deletion src/unstable/string/Tokenizer.winxed
Expand Up @@ -4,11 +4,13 @@ namespace Rosella { namespace String
{
var tokens;
var data;
var map;

function Tokenizer()
{
self.tokens = [];
self.data = null;
self.map = {};
}

function add_data(string str)
Expand All @@ -21,6 +23,11 @@ namespace Rosella { namespace String
}
}

function map_token(string key, var metadata)
{
self.map[key] = self.build_token(key, metadata);
}

function has_tokens()
{
if (elements(self.tokens) > 0)
Expand All @@ -30,7 +37,7 @@ namespace Rosella { namespace String
return 0;
}

function get_token()
function next_token()
{
if (elements(self.tokens) > 1) {
string token;
Expand All @@ -43,6 +50,18 @@ namespace Rosella { namespace String
return null;
}

function get_token(string text, var metadata)
{
if (exists self.map[text])
return self.map[text];
return self.build_token(text, metadata);
}

function build_token(string text, var metadata)
{
return new Rosella.String.Tokenizer.Token(text, metadata);
}

function lex_next_token()
{
using Rosella.Error.must_subclass;
Expand Down
6 changes: 3 additions & 3 deletions src/unstable/string/tokenizer/CClass.winxed
Expand Up @@ -40,7 +40,7 @@ namespace Rosella { namespace String { namespace Tokenizer

function CClass(var cclasses [optional], int has_cclasses [opt_flag])
{
//self.Tokenizer();
self.Tokenizer();
self.tokens = [];
self.data = null;
if (has_cclasses)
Expand All @@ -55,11 +55,11 @@ namespace Rosella { namespace String { namespace Tokenizer
int len = length(str);
int curr_cclass = private_get_first_cclass(str, self.cclasses);
if (curr_cclass == 0)
return new Rosella.String.Tokenizer.Token(self.get_chars(1), curr_cclass);
return self.get_token(self.get_chars(1), curr_cclass);
int next_idx = 0;
${ find_not_cclass next_idx, curr_cclass, str, 0, len };
string chars = self.get_chars(next_idx);
return new Rosella.String.Tokenizer.Token(chars, curr_cclass);
return self.get_token(chars, curr_cclass);
}

function get_chars(int count)
Expand Down

0 comments on commit b58ca3f

Please sign in to comment.