Skip to content

Commit 0256fd0

Browse files
authored
18.x port - Stop DOS attacks by making the lexer stop early on evil input (#2897)
* READY - Stop DOS attacks by making the lexer stop early on evil input. (#2892) Port to 18.x * Test stability
1 parent a8c2b7e commit 0256fd0

File tree

11 files changed

+602
-25
lines changed

11 files changed

+602
-25
lines changed

src/main/java/graphql/ParseAndValidate.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import java.util.List;
1212
import java.util.function.Predicate;
1313

14+
import static java.util.Optional.ofNullable;
15+
1416
/**
1517
* This class allows you to parse and validate a graphql query without executing it. It will tell you
1618
* if its syntactically valid and also semantically valid according to the graphql specification
@@ -58,6 +60,8 @@ public static ParseAndValidateResult parse(ExecutionInput executionInput) {
5860
//
5961
// we allow the caller to specify new parser options by context
6062
ParserOptions parserOptions = executionInput.getGraphQLContext().get(ParserOptions.class);
63+
// we use the query parser options by default if they are not specified
64+
parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultOperationParserOptions());
6165
Parser parser = new Parser();
6266
Document document = parser.parseDocument(executionInput.getQuery(), parserOptions);
6367
return ParseAndValidateResult.newResult().document(document).variables(executionInput.getVariables()).build();

src/main/java/graphql/parser/GraphqlAntlrToLanguage.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,16 @@
7676
import static graphql.Assert.assertShouldNeverHappen;
7777
import static graphql.collect.ImmutableKit.emptyList;
7878
import static graphql.collect.ImmutableKit.map;
79+
import static graphql.parser.Parser.CHANNEL_COMMENTS;
80+
import static graphql.parser.Parser.CHANNEL_WHITESPACE;
7981
import static graphql.parser.StringValueParsing.parseSingleQuotedString;
8082
import static graphql.parser.StringValueParsing.parseTripleQuotedString;
83+
import static java.util.Optional.ofNullable;
8184

8285
@Internal
8386
public class GraphqlAntlrToLanguage {
8487

8588
private static final List<Comment> NO_COMMENTS = ImmutableKit.emptyList();
86-
private static final int CHANNEL_COMMENTS = 2;
87-
private static final int CHANNEL_IGNORED_CHARS = 3;
8889
private final CommonTokenStream tokens;
8990
private final MultiSourceReader multiSourceReader;
9091
private final ParserOptions parserOptions;
@@ -97,7 +98,7 @@ public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiS
9798
public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiSourceReader, ParserOptions parserOptions) {
9899
this.tokens = tokens;
99100
this.multiSourceReader = multiSourceReader;
100-
this.parserOptions = parserOptions == null ? ParserOptions.getDefaultParserOptions() : parserOptions;
101+
this.parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
101102
}
102103

103104
public ParserOptions getParserOptions() {
@@ -791,12 +792,12 @@ private void addIgnoredChars(ParserRuleContext ctx, NodeBuilder nodeBuilder) {
791792
}
792793
Token start = ctx.getStart();
793794
int tokenStartIndex = start.getTokenIndex();
794-
List<Token> leftChannel = tokens.getHiddenTokensToLeft(tokenStartIndex, CHANNEL_IGNORED_CHARS);
795+
List<Token> leftChannel = tokens.getHiddenTokensToLeft(tokenStartIndex, CHANNEL_WHITESPACE);
795796
List<IgnoredChar> ignoredCharsLeft = mapTokenToIgnoredChar(leftChannel);
796797

797798
Token stop = ctx.getStop();
798799
int tokenStopIndex = stop.getTokenIndex();
799-
List<Token> rightChannel = tokens.getHiddenTokensToRight(tokenStopIndex, CHANNEL_IGNORED_CHARS);
800+
List<Token> rightChannel = tokens.getHiddenTokensToRight(tokenStopIndex, CHANNEL_WHITESPACE);
800801
List<IgnoredChar> ignoredCharsRight = mapTokenToIgnoredChar(rightChannel);
801802

802803
nodeBuilder.ignoredChars(new IgnoredChars(ignoredCharsLeft, ignoredCharsRight));

src/main/java/graphql/parser/Parser.java

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package graphql.parser;
22

3+
import graphql.Internal;
34
import graphql.PublicApi;
45
import graphql.language.Document;
56
import graphql.language.Node;
@@ -24,6 +25,8 @@
2425
import java.io.Reader;
2526
import java.io.UncheckedIOException;
2627
import java.util.List;
28+
import java.util.Optional;
29+
import java.util.function.BiConsumer;
2730
import java.util.function.BiFunction;
2831

2932
/**
@@ -45,6 +48,11 @@
4548
@PublicApi
4649
public class Parser {
4750

51+
@Internal
52+
public static final int CHANNEL_COMMENTS = 2;
53+
@Internal
54+
public static final int CHANNEL_WHITESPACE = 3;
55+
4856
/**
4957
* Parses a string input into a graphql AST {@link Document}
5058
*
@@ -195,7 +203,16 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
195203
}
196204
});
197205

198-
CommonTokenStream tokens = new CommonTokenStream(lexer);
206+
// default in the parser options if they are not set
207+
parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
208+
209+
// this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks.
210+
int maxTokens = parserOptions.getMaxTokens();
211+
int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
212+
BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwCancelParseIfTooManyTokens(token, maxTokenCount, multiSourceReader);
213+
SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
214+
215+
CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);
199216

200217
GraphqlParser parser = new GraphqlParser(tokens);
201218
parser.removeErrorListeners();
@@ -268,21 +285,28 @@ public int getCharPositionInLine() {
268285

269286
count++;
270287
if (count > maxTokens) {
271-
String msg = String.format("More than %d parse tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens);
272-
SourceLocation sourceLocation = null;
273-
String offendingToken = null;
274-
if (token != null) {
275-
offendingToken = node.getText();
276-
sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
277-
}
278-
279-
throw new ParseCancelledException(msg, sourceLocation, offendingToken);
288+
throwCancelParseIfTooManyTokens(token, maxTokens, multiSourceReader);
280289
}
281290
}
282291
};
283292
parser.addParseListener(listener);
284293
}
285294

295+
private void throwCancelParseIfTooManyTokens(Token token, int maxTokens, MultiSourceReader multiSourceReader) throws ParseCancelledException {
296+
String tokenType = "grammar";
297+
SourceLocation sourceLocation = null;
298+
String offendingToken = null;
299+
if (token != null) {
300+
int channel = token.getChannel();
301+
tokenType = channel == CHANNEL_WHITESPACE ? "whitespace" : (channel == CHANNEL_COMMENTS ? "comments" : "grammar");
302+
303+
offendingToken = token.getText();
304+
sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
305+
}
306+
String msg = String.format("More than %d %s tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens, tokenType);
307+
throw new ParseCancelledException(msg, sourceLocation, offendingToken);
308+
}
309+
286310
/**
287311
* Allows you to override the ANTLR to AST code.
288312
*

src/main/java/graphql/parser/ParserOptions.java

Lines changed: 113 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,32 +13,57 @@
1313
public class ParserOptions {
1414

1515
/**
16-
* An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
17-
* memory representing a document that wont ever execute. To prevent this for most users, graphql-java
16+
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
17+
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
1818
* set this value to 15000. ANTLR parsing time is linear to the number of tokens presented. The more you
1919
* allow the longer it takes.
2020
*
2121
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
2222
* JVM wide.
2323
*/
24-
public static final int MAX_QUERY_TOKENS = 15000;
24+
public static final int MAX_QUERY_TOKENS = 15_000;
25+
/**
26+
* Another graphql hacking vector is to send large amounts of whitespace in operations that burn lots of parsing CPU time and burn
27+
* memory representing a document. Whitespace token processing in ANTLR is 2 orders of magnitude faster than grammar token processing
28+
* however it still takes some time to happen.
29+
*
30+
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
31+
* JVM wide.
32+
*/
33+
public static final int MAX_WHITESPACE_TOKENS = 200_000;
2534

2635
private static ParserOptions defaultJvmParserOptions = newParserOptions()
2736
.captureIgnoredChars(false)
2837
.captureSourceLocation(true)
2938
.captureLineComments(true)
3039
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
40+
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
41+
.build();
42+
43+
private static ParserOptions defaultJvmOperationParserOptions = newParserOptions()
44+
.captureIgnoredChars(false)
45+
.captureSourceLocation(true)
46+
.captureLineComments(false) // #comments are not useful in query parsing
47+
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
48+
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
49+
.build();
3150

51+
private static ParserOptions defaultJvmSdlParserOptions = newParserOptions()
52+
.captureIgnoredChars(false)
53+
.captureSourceLocation(true)
54+
.captureLineComments(true) // #comments are useful in SDL parsing
55+
.maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
56+
.maxWhitespaceTokens(Integer.MAX_VALUE)
3257
.build();
3358

3459
/**
35-
* By default the Parser will not capture ignored characters. A static holds this default
60+
* By default, the Parser will not capture ignored characters. A static holds this default
3661
* value in a JVM wide basis options object.
3762
*
3863
* Significant memory savings can be made if we do NOT capture ignored characters,
3964
* especially in SDL parsing.
4065
*
41-
* @return the static default value on whether to capture ignored chars
66+
* @return the static default JVM value
4267
*
4368
* @see graphql.language.IgnoredChar
4469
* @see graphql.language.SourceLocation
@@ -48,7 +73,7 @@ public static ParserOptions getDefaultParserOptions() {
4873
}
4974

5075
/**
51-
* By default the Parser will not capture ignored characters. A static holds this default
76+
* By default, the Parser will not capture ignored characters. A static holds this default
5277
* value in a JVM wide basis options object.
5378
*
5479
* Significant memory savings can be made if we do NOT capture ignored characters,
@@ -65,17 +90,80 @@ public static void setDefaultParserOptions(ParserOptions options) {
6590
defaultJvmParserOptions = assertNotNull(options);
6691
}
6792

93+
94+
/**
95+
* By default, for operation parsing, the Parser will not capture ignored characters, and it will not capture line comments into AST
96+
* elements . A static holds this default value for operation parsing in a JVM wide basis options object.
97+
*
98+
* @return the static default JVM value for operation parsing
99+
*
100+
* @see graphql.language.IgnoredChar
101+
* @see graphql.language.SourceLocation
102+
*/
103+
public static ParserOptions getDefaultOperationParserOptions() {
104+
return defaultJvmOperationParserOptions;
105+
}
106+
107+
/**
108+
* By default, the Parser will not capture ignored characters or line comments. A static holds this default
109+
* value in a JVM wide basis options object for operation parsing.
110+
*
111+
* This static can be set to true to allow the behavior of version 16.x or before.
112+
*
113+
* @param options - the new default JVM parser options for operation parsing
114+
*
115+
* @see graphql.language.IgnoredChar
116+
* @see graphql.language.SourceLocation
117+
*/
118+
public static void setDefaultOperationParserOptions(ParserOptions options) {
119+
defaultJvmOperationParserOptions = assertNotNull(options);
120+
}
121+
122+
/**
123+
* By default, for SDL parsing, the Parser will not capture ignored characters, but it will capture line comments into AST
124+
* elements. The SDL default options allow unlimited tokens and whitespace, since a DOS attack vector is
125+
* not commonly available via schema SDL parsing.
126+
*
127+
* A static holds this default value for SDL parsing in a JVM wide basis options object.
128+
*
129+
* @return the static default JVM value for SDL parsing
130+
*
131+
* @see graphql.language.IgnoredChar
132+
* @see graphql.language.SourceLocation
133+
* @see graphql.schema.idl.SchemaParser
134+
*/
135+
public static ParserOptions getDefaultSdlParserOptions() {
136+
return defaultJvmSdlParserOptions;
137+
}
138+
139+
/**
140+
* By default, for SDL parsing, the Parser will not capture ignored characters, but it will capture line comments into AST
141+
* elements . A static holds this default value for operation parsing in a JVM wide basis options object.
142+
*
143+
* This static can be set to true to allow the behavior of version 16.x or before.
144+
*
145+
* @param options - the new default JVM parser options for operation parsing
146+
*
147+
* @see graphql.language.IgnoredChar
148+
* @see graphql.language.SourceLocation
149+
*/
150+
public static void setDefaultSdlParserOptions(ParserOptions options) {
151+
defaultJvmSdlParserOptions = assertNotNull(options);
152+
}
153+
68154
private final boolean captureIgnoredChars;
69155
private final boolean captureSourceLocation;
70156
private final boolean captureLineComments;
71157
private final int maxTokens;
158+
private final int maxWhitespaceTokens;
72159
private final ParsingListener parsingListener;
73160

74161
private ParserOptions(Builder builder) {
75162
this.captureIgnoredChars = builder.captureIgnoredChars;
76163
this.captureSourceLocation = builder.captureSourceLocation;
77164
this.captureLineComments = builder.captureLineComments;
78165
this.maxTokens = builder.maxTokens;
166+
this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
79167
this.parsingListener = builder.parsingListener;
80168
}
81169

@@ -117,7 +205,7 @@ public boolean isCaptureLineComments() {
117205
}
118206

119207
/**
120-
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
208+
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
121209
* memory representing a document that won't ever execute. To prevent this you can set a maximum number of parse
122210
* tokens that will be accepted before an exception is thrown and the parsing is stopped.
123211
*
@@ -127,6 +215,17 @@ public int getMaxTokens() {
127215
return maxTokens;
128216
}
129217

218+
/**
219+
* A graphql hacking vector is to send larges amounts of whitespace that burn lots of parsing CPU time and burn
220+
* memory representing a document. To prevent this you can set a maximum number of whitespace parse
221+
* tokens that will be accepted before an exception is thrown and the parsing is stopped.
222+
*
223+
* @return the maximum number of raw whitespace tokens the parser will accept, after which an exception will be thrown.
224+
*/
225+
public int getMaxWhitespaceTokens() {
226+
return maxWhitespaceTokens;
227+
}
228+
130229
public ParsingListener getParsingListener() {
131230
return parsingListener;
132231
}
@@ -148,6 +247,7 @@ public static class Builder {
148247
private boolean captureLineComments = true;
149248
private int maxTokens = MAX_QUERY_TOKENS;
150249
private ParsingListener parsingListener = ParsingListener.NOOP;
250+
private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;
151251

152252
Builder() {
153253
}
@@ -157,6 +257,7 @@ public static class Builder {
157257
this.captureSourceLocation = parserOptions.captureSourceLocation;
158258
this.captureLineComments = parserOptions.captureLineComments;
159259
this.maxTokens = parserOptions.maxTokens;
260+
this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
160261
this.parsingListener = parserOptions.parsingListener;
161262
}
162263

@@ -180,6 +281,11 @@ public Builder maxTokens(int maxTokens) {
180281
return this;
181282
}
182283

284+
public Builder maxWhitespaceTokens(int maxWhitespaceTokens) {
285+
this.maxWhitespaceTokens = maxWhitespaceTokens;
286+
return this;
287+
}
288+
183289
public Builder parsingListener(ParsingListener parsingListener) {
184290
this.parsingListener = assertNotNull(parsingListener);
185291
return this;

0 commit comments

Comments
 (0)