Skip to content

Commit 160c67a

Browse files
committed
Merge pull request graphql#186 from graphql/control-badness
[RFC] Clarify and restrict unicode support
2 parents 5d4d531 + 969095e commit 160c67a

File tree

2 files changed

+109
-39
lines changed

2 files changed

+109
-39
lines changed

src/language/__tests__/lexer.js

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,25 @@ function lexErr(str) {
2222

2323
describe('Lexer', () => {
2424

25+
it('disallows uncommon control characters', () => {
26+
27+
expect(lexErr('\u0007')
28+
).to.throw(
29+
'Syntax Error GraphQL (1:1) Invalid character "\\u0007"'
30+
);
31+
32+
});
33+
34+
it('accepts BOM header', () => {
35+
expect(lexOne('\uFEFF foo')
36+
).to.deep.equal({
37+
kind: TokenKind.NAME,
38+
start: 2,
39+
end: 5,
40+
value: 'foo'
41+
});
42+
});
43+
2544
it('skips whitespace', () => {
2645

2746
expect(lexOne(`
@@ -136,53 +155,75 @@ describe('Lexer', () => {
136155

137156
it('lex reports useful string errors', () => {
138157

158+
expect(
159+
lexErr('"')
160+
).to.throw('Syntax Error GraphQL (1:2) Unterminated string');
161+
139162
expect(
140163
lexErr('"no end quote')
141164
).to.throw('Syntax Error GraphQL (1:14) Unterminated string');
142165

143166
expect(
144-
lexErr('"multi\nline"')
145-
).to.throw('Syntax Error GraphQL (1:7) Unterminated string');
167+
lexErr('"contains unescaped \u0007 control char"')
168+
).to.throw(
169+
'Syntax Error GraphQL (1:21) Invalid character within String: "\\u0007".'
170+
);
146171

147172
expect(
148-
lexErr('"multi\rline"')
149-
).to.throw('Syntax Error GraphQL (1:7) Unterminated string');
173+
lexErr('"null-byte is not \u0000 end of file"')
174+
).to.throw(
175+
'Syntax Error GraphQL (1:19) Invalid character within String: "\\u0000".'
176+
);
150177

151178
expect(
152-
lexErr('"multi\u2028line"')
179+
lexErr('"multi\nline"')
153180
).to.throw('Syntax Error GraphQL (1:7) Unterminated string');
154181

155182
expect(
156-
lexErr('"multi\u2029line"')
183+
lexErr('"multi\rline"')
157184
).to.throw('Syntax Error GraphQL (1:7) Unterminated string');
158185

159186
expect(
160187
lexErr('"bad \\z esc"')
161-
).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence');
188+
).to.throw(
189+
'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\z.'
190+
);
162191

163192
expect(
164193
lexErr('"bad \\x esc"')
165-
).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence');
194+
).to.throw(
195+
'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\x.'
196+
);
166197

167198
expect(
168199
lexErr('"bad \\u1 esc"')
169-
).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence');
200+
).to.throw(
201+
'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\u1 es.'
202+
);
170203

171204
expect(
172205
lexErr('"bad \\u0XX1 esc"')
173-
).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence');
206+
).to.throw(
207+
'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\u0XX1.'
208+
);
174209

175210
expect(
176211
lexErr('"bad \\uXXXX esc"')
177-
).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence');
212+
).to.throw(
213+
'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\uXXXX.'
214+
);
178215

179216
expect(
180217
lexErr('"bad \\uFXXX esc"')
181-
).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence');
218+
).to.throw(
219+
'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\uFXXX.'
220+
);
182221

183222
expect(
184223
lexErr('"bad \\uXXXF esc"')
185-
).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence');
224+
).to.throw(
225+
'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\uXXXF.'
226+
);
186227
});
187228

188229
it('lexes numbers', () => {

src/language/lexer.js

Lines changed: 55 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@ tokenDescription[TokenKind.FLOAT] = 'Float';
110110
tokenDescription[TokenKind.STRING] = 'String';
111111

112112
var charCodeAt = String.prototype.charCodeAt;
113-
var fromCharCode = String.fromCharCode;
114113
var slice = String.prototype.slice;
115114

116115
/**
@@ -125,6 +124,10 @@ function makeToken(
125124
return { kind, start, end, value };
126125
}
127126

127+
function printCharCode(code) {
128+
return isNaN(code) ? 'EOF' : JSON.stringify(String.fromCharCode(code));
129+
}
130+
128131
/**
129132
* Gets the next token from the source starting at the given position.
130133
*
@@ -137,12 +140,22 @@ function readToken(source: Source, fromPosition: number): Token {
137140
var bodyLength = body.length;
138141

139142
var position = positionAfterWhitespace(body, fromPosition);
140-
var code = charCodeAt.call(body, position);
141143

142144
if (position >= bodyLength) {
143145
return makeToken(TokenKind.EOF, position, position);
144146
}
145147

148+
var code = charCodeAt.call(body, position);
149+
150+
// SourceCharacter
151+
if (code < 0x0020 && code !== 0x0009 && code !== 0x000A && code !== 0x000D) {
152+
throw syntaxError(
153+
source,
154+
position,
155+
`Invalid character ${printCharCode(code)}.`
156+
);
157+
}
158+
146159
switch (code) {
147160
// !
148161
case 33: return makeToken(TokenKind.BANG, position, position + 1);
@@ -201,7 +214,7 @@ function readToken(source: Source, fromPosition: number): Token {
201214
throw syntaxError(
202215
source,
203216
position,
204-
`Unexpected character "${fromCharCode(code)}".`
217+
`Unexpected character ${printCharCode(code)}.`
205218
);
206219
}
207220

@@ -215,23 +228,28 @@ function positionAfterWhitespace(body: string, startPosition: number): number {
215228
var position = startPosition;
216229
while (position < bodyLength) {
217230
var code = charCodeAt.call(body, position);
218-
// Skip whitespace
231+
// Skip Ignored
219232
if (
220-
code === 32 || // space
221-
code === 44 || // comma
222-
code === 160 || // '\xa0'
223-
code === 0x2028 || // line separator
224-
code === 0x2029 || // paragraph separator
225-
code > 8 && code < 14 // whitespace
233+
// BOM
234+
code === 0xFEFF ||
235+
// White Space
236+
code === 0x0009 || // tab
237+
code === 0x0020 || // space
238+
// Line Terminator
239+
code === 0x000A || // new line
240+
code === 0x000D || // carriage return
241+
// Comma
242+
code === 0x002C
226243
) {
227244
++position;
228245
// Skip comments
229246
} else if (code === 35) { // #
230247
++position;
231248
while (
232249
position < bodyLength &&
233-
(code = charCodeAt.call(body, position)) &&
234-
code !== 10 && code !== 13 && code !== 0x2028 && code !== 0x2029
250+
(code = charCodeAt.call(body, position)) !== null &&
251+
// SourceCharacter but not LineTerminator
252+
(code > 0x001F || code === 0x0009) && code !== 0x000A && code !== 0x000D
235253
) {
236254
++position;
237255
}
@@ -265,7 +283,7 @@ function readNumber(source, start, firstCode) {
265283
throw syntaxError(
266284
source,
267285
position,
268-
`Invalid number, unexpected digit after 0: "${fromCharCode(code)}".`
286+
`Invalid number, unexpected digit after 0: ${printCharCode(code)}.`
269287
);
270288
}
271289
} else {
@@ -315,8 +333,7 @@ function readDigits(source, start, firstCode) {
315333
throw syntaxError(
316334
source,
317335
position,
318-
'Invalid number, expected digit but got: ' +
319-
(code ? `"${fromCharCode(code)}"` : 'EOF') + '.'
336+
`Invalid number, expected digit but got: ${printCharCode(code)}.`
320337
);
321338
}
322339

@@ -329,15 +346,26 @@ function readString(source, start) {
329346
var body = source.body;
330347
var position = start + 1;
331348
var chunkStart = position;
332-
var code;
349+
var code = 0;
333350
var value = '';
334351

335352
while (
336353
position < body.length &&
337-
(code = charCodeAt.call(body, position)) &&
338-
code !== 34 &&
339-
code !== 10 && code !== 13 && code !== 0x2028 && code !== 0x2029
354+
(code = charCodeAt.call(body, position)) !== null &&
355+
// not LineTerminator
356+
code !== 0x000A && code !== 0x000D &&
357+
// not Quote (")
358+
code !== 34
340359
) {
360+
// SourceCharacter
361+
if (code < 0x0020 && code !== 0x0009) {
362+
throw syntaxError(
363+
source,
364+
position,
365+
`Invalid character within String: ${printCharCode(code)}.`
366+
);
367+
}
368+
341369
++position;
342370
if (code === 92) { // \
343371
value += slice.call(body, chunkStart, position - 1);
@@ -351,7 +379,7 @@ function readString(source, start) {
351379
case 110: value += '\n'; break;
352380
case 114: value += '\r'; break;
353381
case 116: value += '\t'; break;
354-
case 117:
382+
case 117: // u
355383
var charCode = uniCharCode(
356384
charCodeAt.call(body, position + 1),
357385
charCodeAt.call(body, position + 2),
@@ -362,25 +390,26 @@ function readString(source, start) {
362390
throw syntaxError(
363391
source,
364392
position,
365-
'Bad character escape sequence.'
393+
`Invalid character escape sequence: ` +
394+
`\\u${body.slice(position + 1, position + 5)}.`
366395
);
367396
}
368-
value += fromCharCode(charCode);
397+
value += String.fromCharCode(charCode);
369398
position += 4;
370399
break;
371400
default:
372401
throw syntaxError(
373402
source,
374403
position,
375-
'Bad character escape sequence.'
404+
`Invalid character escape sequence: \\${String.fromCharCode(code)}.`
376405
);
377406
}
378407
++position;
379408
chunkStart = position;
380409
}
381410
}
382411

383-
if (code !== 34) {
412+
if (code !== 34) { // quote (")
384413
throw syntaxError(source, position, 'Unterminated string.');
385414
}
386415

@@ -428,10 +457,10 @@ function readName(source, position) {
428457
var body = source.body;
429458
var bodyLength = body.length;
430459
var end = position + 1;
431-
var code;
460+
var code = 0;
432461
while (
433462
end !== bodyLength &&
434-
(code = charCodeAt.call(body, end)) &&
463+
(code = charCodeAt.call(body, end)) !== null &&
435464
(
436465
code === 95 || // _
437466
code >= 48 && code <= 57 || // 0-9

0 commit comments

Comments
 (0)