-
Notifications
You must be signed in to change notification settings - Fork 3.7k
/
Python3LexerBase.js
132 lines (112 loc) · 4.27 KB
/
Python3LexerBase.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import antlr4 from 'antlr4';
import Python3Parser from './Python3Parser.js';
export default class Python3LexerBase extends antlr4.Lexer {
constructor(input) {
super(input);
this.tokens = [];
this.indents = [];
this.opened = 0;
}
reset() {
// A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
this.tokens = [];
// The stack that keeps track of the indentation level.
this.indents = [];
// The amount of opened braces, brackets and parenthesis.
this.opened = 0;
super.reset();
}
emitToken(token) {
this._token = token;
this.tokens.push(token);
}
nextToken() {
// Check if the end-of-file is ahead and there are still some DEDENTS expected.
if (this._input.LA(1) === Python3Parser.EOF && this.indents.length) {
// Remove any trailing EOF tokens from our buffer.
this.tokens = this.tokens.filter(function (val) {
return val.type !== Python3Parser.EOF;
});
// First emit an extra line break that serves as the end of the statement.
this.emitToken(this.commonToken(Python3Parser.NEWLINE, "\n"));
// Now emit as much DEDENT tokens as needed.
while (this.indents.length) {
this.emitToken(this.createDedent());
this.indents.pop();
}
// Put the EOF back on the token stream.
this.emitToken(this.commonToken(Python3Parser.EOF, "<EOF>"));
}
let next = super.nextToken();
return this.tokens.length ? this.tokens.shift() : next;
}
createDedent() {
return this.commonToken(Python3Parser.DEDENT, "");
}
commonToken(type, text) {
let stop = this.getCharIndex() - 1;
let start = text.length ? stop - text.length + 1 : stop;
return new antlr4.CommonToken(this._tokenFactorySourcePair, type, antlr4.Lexer.DEFAULT_TOKEN_CHANNEL, start, stop);
}
getIndentationCount(whitespace) {
let count = 0;
for (let i = 0; i < whitespace.length; i++) {
if (whitespace[i] === '\t') {
count += 8 - count % 8;
} else {
count++;
}
}
return count;
}
getIndentationCount(whitespace) {
let count = 0;
for (let i = 0; i < whitespace.length; i++) {
if (whitespace[i] === '\t') {
count += 8 - count % 8;
} else {
count++;
}
}
return count;
}
atStartOfInput() {
return this.getCharIndex() === 0;
}
openBrace() {
this.opened++;
}
closeBrace() {
this.opened--;
}
onNewLine() {
let newLine = this.text.replace(/[^\r\n]+/g, '');
let spaces = this.text.replace(/[\r\n]+/g, '');
// Strip newlines inside open clauses except if we are near EOF. We keep NEWLINEs near EOF to
// satisfy the final newline needed by the single_put rule used by the REPL.
let next = this._input.LA(1);
let nextnext = this._input.LA(2);
if (this.opened > 0 || (nextnext != -1 /* EOF */ && (next === 13 /* '\r' */ || next === 10 /* '\n' */ || next === 35 /* '#' */))) {
// If we're inside a list or on a blank line, ignore all indents,
// dedents and line breaks.
this.skip();
} else {
this.emitToken(this.commonToken(Python3Parser.NEWLINE, newLine));
let indent = this.getIndentationCount(spaces);
let previous = this.indents.length ? this.indents[this.indents.length - 1] : 0;
if (indent === previous) {
// skip indents of the same size as the present indent-size
this.skip();
} else if (indent > previous) {
this.indents.push(indent);
this.emitToken(this.commonToken(Python3Parser.INDENT, spaces));
} else {
// Possibly emit more than 1 DEDENT token.
while (this.indents.length && this.indents[this.indents.length - 1] > indent) {
this.emitToken(this.createDedent());
this.indents.pop();
}
}
}
}
}