-
Notifications
You must be signed in to change notification settings - Fork 0
/
2-tokenise.js
78 lines (70 loc) · 2 KB
/
2-tokenise.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import * as moo from 'moo';
let symbolBandiser = moo.compile({
symbol: { match: /\S+\son\s/, value: value => value.match(/\S+/)[0] },
symbolColour: /^[a-zA-Z]+/,
bandColour: /[a-zA-Z]+$/,
WS: /[ \t]+/,
error: moo.error,
});
let idBandiser = moo.compile({
bandColour: /^[a-zA-Z]+/,
inscription: { match: /\(\S+\)/, value: value => value.match(/[^()]+/)[0] },
error: moo.error,
});
let tokeniser = moo.compile({
symbolBand: /\w+\s\S+\son\s\w+/,
partSeparator: /\/\//,
bandSeparator: /\//,
legSeparator: /-/,
colouredIdBand: /[a-zA-Z]+\(\S+\)/,
uncolouredIdBand: /\(\S+\)/,
nullBand: /x/,
colouredBand: /[a-zA-Z]+/,
WS: /[ \t]+/,
error: moo.error,
});
/**
Tokenise the string using `moo` as per the rules defined above, then further process certain types further.
*/
const tokenise = nzbbtef => {
tokeniser.reset(nzbbtef);
const allTokens = Array.from(tokeniser).map(token => {
// If nested token, initialise tokeniser
switch (token.type) {
case 'symbolBand':
symbolBandiser.reset(token.value);
break;
case 'uncolouredIdBand':
idBandiser.reset(`M${token.value}`); // Assume metal band
break;
case 'colouredIdBand':
idBandiser.reset(token.value);
break;
default:
break;
}
// Depending on type, use tokeniser (as initialised) or otherwise just return the token as is
switch (token.type) {
case 'symbolBand':
return Object.assign(
{},
token,
{ type: 'tokenisedSymbolBand' },
{ tokens: Array.from(symbolBandiser) }
);
case 'uncolouredIdBand':
case 'colouredIdBand':
return Object.assign(
{},
token,
{ type: 'tokenisedIdBand' },
{ tokens: Array.from(idBandiser) }
);
default:
return token;
}
});
// Remove whitespace tokens as they are not helpful after tokenisation
return allTokens.filter(token => token.type !== 'WS');
};
export default tokenise;