forked from RyanMarcus/dirty-json
/
lexer.js
144 lines (116 loc) · 3.11 KB
/
lexer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// Copyright 2016, 2015, 2014 Ryan Marcus
// This file is part of dirty-json.
//
// dirty-json is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// dirty-json is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with dirty-json. If not, see <http://www.gnu.org/licenses/>.
"use strict";
let Lexer = require("lex");
// terminals
const LEX_KV = 0;
const LEX_KVLIST = 1;
const LEX_VLIST = 2;
const LEX_BOOLEAN = 3;
const LEX_COVALUE = 4;
const LEX_CVALUE = 5;
const LEX_FLOAT = 6;
const LEX_INT = 7;
const LEX_KEY = 8;
const LEX_LIST = 9;
const LEX_OBJ = 10;
const LEX_QUOTE = 11;
const LEX_RB = 12;
const LEX_RCB = 13;
const LEX_TOKEN = 14;
const LEX_VALUE = 15;
const LEX_HEXLITERAL = 16;
const LEX_HEXNUM = 17;
// non-terminals
const LEX_COLON = -1;
const LEX_COMMA = -2;
const LEX_LCB = -3;
const LEX_LB = -4;
const LEX_DOT = -5;
var lexMap = {
":": {type: LEX_COLON},
",": {type: LEX_COMMA},
"{": {type: LEX_LCB},
"}": {type: LEX_RCB},
"[": {type: LEX_LB},
"]": {type: LEX_RB},
".": {type: LEX_DOT} // TODO: remove?
};
var lexSpc = [
[/:/, LEX_COLON],
[/,/, LEX_COMMA],
[/{/, LEX_LCB],
[/}/, LEX_RCB],
[/\[/, LEX_LB],
[/\]/, LEX_RB],
[/\./, LEX_DOT] // TODO: remove?
];
function getLexer(string) {
let lexer = new Lexer();
lexer.addRule(/"([\s\S]*?)("|$)/, (lexeme, txt) => {
return {type: LEX_QUOTE, value: txt};
});
lexer.addRule(/'([\s\S]*?)('|$)/, (lexeme, txt) => {
return {type: LEX_QUOTE, value: txt};
});
lexer.addRule(/[\-0-9]*\.[0-9]+/, lexeme => {
return {type: LEX_FLOAT, value: parseFloat(lexeme)};
});
lexer.addRule(/[\-0-9]+/, lexeme => {
return {type: LEX_INT, value: parseInt(lexeme)};
});
lexer.addRule(/0[xX][a-fA-F0-9]+/, lexeme => {
let hex_num = parseInt(lexeme, 16);
return {type: LEX_HEXNUM, value: hex_num};
});
lexer.addRule(/hex:"([a-fA-F0-9]+)"/, lexeme => {
// return TypedArray of uint8
let hex = lexeme.slice(5).slice(0,-1);
let bytes = [];
for (let c = 0; c < hex.length; c += 2)
bytes.push(parseInt(hex.substr(c, 2), 16));
return {type: LEX_HEXLITERAL, value: bytes};
});
lexSpc.forEach(item => {
lexer.addRule(item[0], lexeme => {
return {type: item[1], value: lexeme};
});
});
lexer.addRule(/\s/, lexeme => {
// chomp whitespace...
});
lexer.addRule(/./, lexeme => {
let lt = LEX_TOKEN;
let val = lexeme;
return {type: lt, value: val};
});
lexer.setInput(string);
return lexer;
}
module.exports.lexString = lexString;
function lexString(str, emit) {
let lex = getLexer(str);
var arr = [];
let token = "";
while ((token = lex.lex())) {
arr.push(token);
}
return arr;
}
module.exports.getAllTokens = getAllTokens;
function getAllTokens(str) {
return lexString(str);
}