Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 169 lines (141 sloc) 5.486 kb
98b7f3c Beginnings of porting the Obj-C code to Obj-J.
Ross Boucher authored
1
2 @import "TDParseKit.j"
3
4 @implementation TDTokenizer : CPObject
5 {
6 CPString string;
7 TDReader reader;
8
9 CPArray tokenizerStates;
10
11 TDNumberState numberState;
12 TDQuoteState quoteState;
13 TDCommentState commentState;
14 TDSymbolState symbolState;
15 TDWhitespaceState whitespaceState;
16 TDWordState wordState;
17 }
18
19 + (id)tokenizer
20 {
21 return [self tokenizerWithString:nil];
22 }
23
24 + (id)tokenizerWithString:(CPString)s
25 {
26 return [[self alloc] initWithString:s];
27 }
28
29 - (id)init
30 {
31 return [self initWithString:nil];
32 }
33
34 - (id)initWithString:(CPString)s
35 {
36 if (self = [super init])
37 {
38 string = s;
39 [self setReader:[[TDReader alloc] init]];
40
41 numberState = [[TDNumberState alloc] init];
42 quoteState = [[TDQuoteState alloc] init];
43 commentState = [[TDCommentState alloc] init];
44 symbolState = [[TDSymbolState alloc] init];
45 whitespaceState = [[TDWhitespaceState alloc] init];
46 wordState = [[TDWordState alloc] init];
47
48 [symbolState add:@"<="];
49 [symbolState add:@">="];
50 [symbolState add:@"!="];
51 [symbolState add:@"=="];
52
53 [commentState addSingleLineStartSymbol:@"//"];
54 [commentState addMultiLineStartSymbol:@"/*" endSymbol:@"*/"];
55
56 tokenizerStates = [[NSMutableArray alloc] initWithCapacity:256];
57
58 [self addTokenizerState:whitespaceState from: 0 to: ' ']; // From: 0 to: 32 From:0x00 to:0x20
59 [self addTokenizerState:symbolState from: 33 to: 33];
60 [self addTokenizerState:quoteState from: '"' to: '"']; // From: 34 to: 34 From:0x22 to:0x22
61 [self addTokenizerState:symbolState from: 35 to: 38];
62 [self addTokenizerState:quoteState from:'\'' to:'\'']; // From: 39 to: 39 From:0x27 to:0x27
63 [self addTokenizerState:symbolState from: 40 to: 42];
64 [self addTokenizerState:symbolState from: '+' to: '+']; // From: 43 to: 43 From:0x2B to:0x2B
65 [self addTokenizerState:symbolState from: 44 to: 44];
66 [self addTokenizerState:numberState from: '-' to: '-']; // From: 45 to: 45 From:0x2D to:0x2D
67 [self addTokenizerState:numberState from: '.' to: '.']; // From: 46 to: 46 From:0x2E to:0x2E
68 [self addTokenizerState:commentState from: '/' to: '/']; // From: 47 to: 47 From:0x2F to:0x2F
69 [self addTokenizerState:numberState from: '0' to: '9']; // From: 48 to: 57 From:0x30 to:0x39
70 [self addTokenizerState:symbolState from: 58 to: 64];
71 [self addTokenizerState:wordState from: 'A' to: 'Z']; // From: 65 to: 90 From:0x41 to:0x5A
72 [self addTokenizerState:symbolState from: 91 to: 96];
73 [self addTokenizerState:wordState from: 'a' to: 'z']; // From: 97 to:122 From:0x61 to:0x7A
74 [self addTokenizerState:symbolState from: 123 to: 191];
75 [self addTokenizerState:wordState from:0xC0 to:0xFF]; // From:192 to:255 From:0xC0 to:0xFF
76 }
77
78 return self;
79 }
80
81 - (TDToken)nextToken
82 {
83 var c = [reader read],
84 result = nil;
85
86 if (-1 === c)
87 result = [TDToken EOFToken];
88 else
89 {
90 var state = [self tokenizerStateFor:c];
91
92 if (state)
93 result = [state nextTokenFromReader:reader startingWith:c tokenizer:self];
94 else
95 result = [TDToken EOFToken];
96 }
97
98 return result;
99 }
100
101 - (void)addTokenizerState:(TDTokenizerState)state from:(unsigned)start to:(unsigned)end
102 {
103 for (var i=start; i <= end; i++) {
104 [tokenizerStates addObject:state];
105 //addObject(tokenizerStates, @selector(addObject:), state);
106 }
107 }
108
109 - (void)setTokenizerState:(TDTokenizerState)state from:(unsigned)start to:(unsigned)end
110 {
111 for (var i = start; i <= end; i++) {
112 [tokenizerStates replaceObjectAtIndex:i withObject:state];
113 //relaceObject(tokenizerStates, @selector(replaceObjectAtIndex:withObject:), i, state);
114 }
115 }
116
117 - (TDReader)reader
118 {
119 return reader;
120 }
121
122 - (void)setReader:(TDReader)r
123 {
124 if (reader != r) {
125 reader = r
126 reader.string = string;
127 }
128 }
129
130 - (CPString)string
131 {
132 return string;
133 }
134
135 - (void)setString:(CPString)s
136 {
137 string = s;
138 reader.string = string;
139 }
140
141 - (TDTokenizerState)tokenizerStateFor:(unsigned)c
142 {
143 if (c < 0 || c > 255) {
144 if (c >= 0x19E0 && c <= 0x19FF) { // khmer symbols
145 return symbolState;
146 } else if (c >= 0x2000 && c <= 0x2BFF) { // various symbols
147 return symbolState;
148 } else if (c >= 0x2E00 && c <= 0x2E7F) { // supplemental punctuation
149 return symbolState;
150 } else if (c >= 0x3000 && c <= 0x303F) { // cjk symbols & punctuation
151 return symbolState;
152 } else if (c >= 0x3200 && c <= 0x33FF) { // enclosed cjk letters and months, cjk compatibility
153 return symbolState;
154 } else if (c >= 0x4DC0 && c <= 0x4DFF) { // yijing hexagram symbols
155 return symbolState;
156 } else if (c >= 0xFE30 && c <= 0xFE6F) { // cjk compatibility forms, small form variants
157 return symbolState;
158 } else if (c >= 0xFF00 && c <= 0xFFFF) { // hiragana & katakana halfwitdh & fullwidth forms, Specials
159 return symbolState;
160 } else {
161 return wordState;
162 }
163 }
164
165 return tokenizerStates[c];
166 }
167
168 @end
Something went wrong with that request. Please try again.