Skip to content
Newer
Older
100644 83 lines (67 sloc) 2.13 KB
7aa6b9d The remaining "core" files OBJJified (with probable errors)
Ross Boucher authored Mar 6, 2009
1
2 @import "TDTokenizerState.j"
3
4 @implementation TDWordState : TDTokenizerState
5 {
6 CPArray wordChars;
7 }
8
9 - (id)init
10 {
11 if (self = [super init])
12 {
13 wordChars = [];
14
15 for (var i=0, len=255 ; i <= len; i++) {
16 [wordChars addObject:NO];
17 }
18
19 [self setWordChars:YES from: 'a' to: 'z'];
20 [self setWordChars:YES from: 'A' to: 'Z'];
21 [self setWordChars:YES from: '0' to: '9'];
22 [self setWordChars:YES from: '-' to: '-'];
23 [self setWordChars:YES from: '_' to: '_'];
24 [self setWordChars:YES from:'\'' to:'\''];
94e84d9 Fix the build errors.
Ross Boucher authored Mar 6, 2009
25 [self setWordChars:YES from: 192 to: 255];
7aa6b9d The remaining "core" files OBJJified (with probable errors)
Ross Boucher authored Mar 6, 2009
26 }
27
28 return self;
29 }
30
31 - (void)setWordChars:(BOOL)yn from:(unsigned)start to:(unsigned)end
32 {
33 var len = wordChars.length;
34 if (start > len || end > len || start < 0 || end < 0) {
35 [CPException raise:@"TDWordStateNotSupportedException" reason:@"TDWordState only supports setting word chars for chars in the latin1 set (under 256)"];
36 }
37
38 for (var i=start ; i <= end; i++) {
39 [wordChars replaceObjectAtIndex:i withObject:yn];
40 }
41 }
42
43 - (BOOL)isWordChar:(unsigned)c
44 {
45 if (c > -1 && c < wordChars.length - 1) {
46 return (!![wordChars objectAtIndex:c]);
47 }
48
49 if (c >= 0x2000 && c <= 0x2BFF) { // various symbols
50 return NO;
51 } else if (c >= 0xFE30 && c <= 0xFE6F) { // general punctuation
52 return NO;
53 } else if (c >= 0xFE30 && c <= 0xFE6F) { // western musical symbols
54 return NO;
55 } else if (c >= 0xFF00 && c <= 0xFF65) { // symbols within Hiragana & Katakana
56 return NO;
57 } else if (c >= 0xFFF0 && c <= 0xFFFF) { // specials
58 return NO;
59 } else if (c < 0) {
60 return NO;
61 } else {
62 return YES;
63 }
64 }
65
66 - (TDToken)nextTokenFromReader:(TDReader)r startingWith:(unsigned)cin tokenizer:(TDTokenizer)t
67 {
68 [self reset];
69
70 var c = cin;
71 do {
72 [self append:c];
73 c = [r read];
74 } while ([self isWordChar:c]);
75
76 if (-1 != c)
77 [r unread];
78
79 return [TDToken tokenWithTokenType:TDTokenTypeWord stringValue:[self bufferedString] floatValue:0.0];
80 }
81
82 @end
Something went wrong with that request. Please try again.