/
tokenizer.ts
77 lines (65 loc) · 1.77 KB
/
tokenizer.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible.
export type Token = {
type: string;
value: string | number;
index: number;
[key: string]: unknown;
};
export interface ReceiverResult {
[name: string]: string | number | unknown;
}
export type CallbackResult = {
type: string;
value: string | number;
[key: string]: unknown;
};
type CallbackFunction = (value: unknown) => CallbackResult;
export type TestResult = { value: unknown; length: number } | undefined;
export type TestFunction = (
string: string,
) => TestResult | undefined;
export interface Rule {
test: TestFunction;
fn: CallbackFunction;
}
export class Tokenizer {
rules: Rule[];
constructor(rules: Rule[] = []) {
this.rules = rules;
}
addRule(test: TestFunction, fn: CallbackFunction): Tokenizer {
this.rules.push({ test, fn });
return this;
}
tokenize(
string: string,
receiver = (token: Token): ReceiverResult => token,
): ReceiverResult[] {
function* generator(rules: Rule[]): IterableIterator<ReceiverResult> {
let index = 0;
for (const rule of rules) {
const result = rule.test(string);
if (result) {
const { value, length } = result;
index += length;
string = string.slice(length);
const token = { ...rule.fn(value), index };
yield receiver(token);
yield* generator(rules);
}
}
}
const tokenGenerator = generator(this.rules);
const tokens: ReceiverResult[] = [];
for (const token of tokenGenerator) {
tokens.push(token);
}
if (string.length) {
throw new Error(
`parser error: string not fully parsed! ${string.slice(0, 25)}`,
);
}
return tokens;
}
}