diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 13a366a930..a88fd8b4e0 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -7,6 +7,7 @@ on: pull_request: branches: - master + - 17.x jobs: buildAndTest: runs-on: ubuntu-latest diff --git a/src/main/java/graphql/ParseAndValidate.java b/src/main/java/graphql/ParseAndValidate.java index 58728a07b2..073cd00a75 100644 --- a/src/main/java/graphql/ParseAndValidate.java +++ b/src/main/java/graphql/ParseAndValidate.java @@ -3,12 +3,15 @@ import graphql.language.Document; import graphql.parser.InvalidSyntaxException; import graphql.parser.Parser; +import graphql.parser.ParserOptions; import graphql.schema.GraphQLSchema; import graphql.validation.ValidationError; import graphql.validation.Validator; import java.util.List; +import static java.util.Optional.ofNullable; + /** * This class allows you to parse and validate a graphql query without executing it. It will tell you * if its syntactically valid and also semantically valid according to the graphql specification @@ -42,8 +45,13 @@ public static ParseAndValidateResult parseAndValidate(GraphQLSchema graphQLSchem */ public static ParseAndValidateResult parse(ExecutionInput executionInput) { try { + // + // we allow the caller to specify new parser options by context + ParserOptions parserOptions = executionInput.getGraphQLContext().get(ParserOptions.class); + // we use the query parser options by default if they are not specified + parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultOperationParserOptions()); Parser parser = new Parser(); - Document document = parser.parseDocument(executionInput.getQuery()); + Document document = parser.parseDocument(executionInput.getQuery(),parserOptions); return ParseAndValidateResult.newResult().document(document).variables(executionInput.getVariables()).build(); } catch (InvalidSyntaxException e) { return ParseAndValidateResult.newResult().syntaxException(e).variables(executionInput.getVariables()).build(); diff --git a/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java b/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java index b1518ec83b..bb7e320351 100644 --- a/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java +++ b/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java @@ -76,14 +76,16 @@ import static graphql.Assert.assertShouldNeverHappen; import static graphql.collect.ImmutableKit.emptyList; import static graphql.collect.ImmutableKit.map; +import static graphql.parser.Parser.CHANNEL_COMMENTS; +import static graphql.parser.Parser.CHANNEL_WHITESPACE; import static graphql.parser.StringValueParsing.parseSingleQuotedString; import static graphql.parser.StringValueParsing.parseTripleQuotedString; +import static java.util.Optional.ofNullable; @Internal public class GraphqlAntlrToLanguage { - private static final int CHANNEL_COMMENTS = 2; - private static final int CHANNEL_IGNORED_CHARS = 3; + private static final List NO_COMMENTS = ImmutableKit.emptyList(); private final CommonTokenStream tokens; private final MultiSourceReader multiSourceReader; private final ParserOptions parserOptions; @@ -96,7 +98,7 @@ public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiS public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiSourceReader, ParserOptions parserOptions) { this.tokens = tokens; this.multiSourceReader = multiSourceReader; - this.parserOptions = parserOptions == null ? ParserOptions.getDefaultParserOptions() : parserOptions; + this.parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions()); } public ParserOptions getParserOptions() { @@ -790,12 +792,12 @@ private void addIgnoredChars(ParserRuleContext ctx, NodeBuilder nodeBuilder) { } Token start = ctx.getStart(); int tokenStartIndex = start.getTokenIndex(); - List leftChannel = tokens.getHiddenTokensToLeft(tokenStartIndex, CHANNEL_IGNORED_CHARS); + List leftChannel = tokens.getHiddenTokensToLeft(tokenStartIndex, CHANNEL_WHITESPACE); List ignoredCharsLeft = mapTokenToIgnoredChar(leftChannel); Token stop = ctx.getStop(); int tokenStopIndex = stop.getTokenIndex(); - List rightChannel = tokens.getHiddenTokensToRight(tokenStopIndex, CHANNEL_IGNORED_CHARS); + List rightChannel = tokens.getHiddenTokensToRight(tokenStopIndex, CHANNEL_WHITESPACE); List ignoredCharsRight = mapTokenToIgnoredChar(rightChannel); nodeBuilder.ignoredChars(new IgnoredChars(ignoredCharsLeft, ignoredCharsRight)); diff --git a/src/main/java/graphql/parser/Parser.java b/src/main/java/graphql/parser/Parser.java index 3a560344b4..9faa56c3c8 100644 --- a/src/main/java/graphql/parser/Parser.java +++ b/src/main/java/graphql/parser/Parser.java @@ -1,5 +1,6 @@ package graphql.parser; +import graphql.Internal; import graphql.PublicApi; import graphql.language.Document; import graphql.language.Node; @@ -24,6 +25,8 @@ import java.io.Reader; import java.io.UncheckedIOException; import java.util.List; +import java.util.Optional; +import java.util.function.BiConsumer; import java.util.function.BiFunction; /** @@ -45,6 +48,11 @@ @PublicApi public class Parser { + @Internal + public static final int CHANNEL_COMMENTS = 2; + @Internal + public static final int CHANNEL_WHITESPACE = 3; + /** * Parses a string input into a graphql AST {@link Document} * @@ -195,7 +203,16 @@ public void syntaxError(Recognizer recognizer, Object offendingSymbol, int } }); - CommonTokenStream tokens = new CommonTokenStream(lexer); + // default in the parser options if they are not set + parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions()); + + // this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks. + int maxTokens = parserOptions.getMaxTokens(); + int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens(); + BiConsumer onTooManyTokens = (maxTokenCount, token) -> throwCancelParseIfTooManyTokens(token, maxTokenCount, multiSourceReader); + SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens); + + CommonTokenStream tokens = new CommonTokenStream(safeTokenSource); GraphqlParser parser = new GraphqlParser(tokens); parser.removeErrorListeners(); @@ -245,23 +262,31 @@ private void setupParserListener(MultiSourceReader multiSourceReader, GraphqlPar @Override public void visitTerminal(TerminalNode node) { + Token token = node.getSymbol(); count++; if (count > maxTokens) { - String msg = String.format("More than %d parse tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens); - SourceLocation sourceLocation = null; - String offendingToken = null; - if (node.getSymbol() != null) { - offendingToken = node.getText(); - sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, node.getSymbol().getLine(), node.getSymbol().getCharPositionInLine()); - } - - throw new ParseCancelledException(msg, sourceLocation, offendingToken); + throwCancelParseIfTooManyTokens(token, maxTokens, multiSourceReader); } } }; parser.addParseListener(listener); } + private void throwCancelParseIfTooManyTokens(Token token, int maxTokens, MultiSourceReader multiSourceReader) throws ParseCancelledException { + String tokenType = "grammar"; + SourceLocation sourceLocation = null; + String offendingToken = null; + if (token != null) { + int channel = token.getChannel(); + tokenType = channel == CHANNEL_WHITESPACE ? "whitespace" : (channel == CHANNEL_COMMENTS ? "comments" : "grammar"); + + offendingToken = token.getText(); + sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine()); + } + String msg = String.format("More than %d %s tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens, tokenType); + throw new ParseCancelledException(msg, sourceLocation, offendingToken); + } + /** * Allows you to override the ANTLR to AST code. * diff --git a/src/main/java/graphql/parser/ParserOptions.java b/src/main/java/graphql/parser/ParserOptions.java index cfd1be903c..a45719e832 100644 --- a/src/main/java/graphql/parser/ParserOptions.java +++ b/src/main/java/graphql/parser/ParserOptions.java @@ -1,10 +1,11 @@ package graphql.parser; -import graphql.Assert; import graphql.PublicApi; import java.util.function.Consumer; +import static graphql.Assert.assertNotNull; + /** * Options that control how the {@link Parser} behaves. */ @@ -12,31 +13,57 @@ public class ParserOptions { /** - * An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn - * memory representing a document that wont ever execute. To prevent this for most users, graphql-java + * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn + * memory representing a document that won't ever execute. To prevent this for most users, graphql-java * set this value to 15000. ANTLR parsing time is linear to the number of tokens presented. The more you * allow the longer it takes. * * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this * JVM wide. */ - public static int MAX_QUERY_TOKENS = 15000; + public static final int MAX_QUERY_TOKENS = 15_000; + /** + * Another graphql hacking vector is to send large amounts of whitespace in operations that burn lots of parsing CPU time and burn + * memory representing a document. Whitespace token processing in ANTLR is 2 orders of magnitude faster than grammar token processing + * however it still takes some time to happen. + * + * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this + * JVM wide. + */ + public static final int MAX_WHITESPACE_TOKENS = 200_000; private static ParserOptions defaultJvmParserOptions = newParserOptions() .captureIgnoredChars(false) .captureSourceLocation(true) + .captureLineComments(true) + .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java + .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS) + .build(); + + private static ParserOptions defaultJvmOperationParserOptions = newParserOptions() + .captureIgnoredChars(false) + .captureSourceLocation(true) + .captureLineComments(false) // #comments are not useful in query parsing .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java + .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS) + .build(); + private static ParserOptions defaultJvmSdlParserOptions = newParserOptions() + .captureIgnoredChars(false) + .captureSourceLocation(true) + .captureLineComments(true) // #comments are useful in SDL parsing + .maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers + .maxWhitespaceTokens(Integer.MAX_VALUE) .build(); /** - * By default the Parser will not capture ignored characters. A static holds this default + * By default, the Parser will not capture ignored characters. A static holds this default * value in a JVM wide basis options object. * * Significant memory savings can be made if we do NOT capture ignored characters, * especially in SDL parsing. * - * @return the static default value on whether to capture ignored chars + * @return the static default JVM value * * @see graphql.language.IgnoredChar * @see graphql.language.SourceLocation @@ -46,7 +73,7 @@ public static ParserOptions getDefaultParserOptions() { } /** - * By default the Parser will not capture ignored characters. A static holds this default + * By default, the Parser will not capture ignored characters. A static holds this default * value in a JVM wide basis options object. * * Significant memory savings can be made if we do NOT capture ignored characters, @@ -60,24 +87,89 @@ public static ParserOptions getDefaultParserOptions() { * @see graphql.language.SourceLocation */ public static void setDefaultParserOptions(ParserOptions options) { - defaultJvmParserOptions = Assert.assertNotNull(options); + defaultJvmParserOptions = assertNotNull(options); + } + + + /** + * By default, for operation parsing, the Parser will not capture ignored characters, and it will not capture line comments into AST + * elements . A static holds this default value for operation parsing in a JVM wide basis options object. + * + * @return the static default JVM value for operation parsing + * + * @see graphql.language.IgnoredChar + * @see graphql.language.SourceLocation + */ + public static ParserOptions getDefaultOperationParserOptions() { + return defaultJvmOperationParserOptions; + } + + /** + * By default, the Parser will not capture ignored characters or line comments. A static holds this default + * value in a JVM wide basis options object for operation parsing. + * + * This static can be set to true to allow the behavior of version 16.x or before. + * + * @param options - the new default JVM parser options for operation parsing + * + * @see graphql.language.IgnoredChar + * @see graphql.language.SourceLocation + */ + public static void setDefaultOperationParserOptions(ParserOptions options) { + defaultJvmOperationParserOptions = assertNotNull(options); + } + + /** + * By default, for SDL parsing, the Parser will not capture ignored characters, but it will capture line comments into AST + * elements. The SDL default options allow unlimited tokens and whitespace, since a DOS attack vector is + * not commonly available via schema SDL parsing. + * + * A static holds this default value for SDL parsing in a JVM wide basis options object. + * + * @return the static default JVM value for SDL parsing + * + * @see graphql.language.IgnoredChar + * @see graphql.language.SourceLocation + * @see graphql.schema.idl.SchemaParser + */ + public static ParserOptions getDefaultSdlParserOptions() { + return defaultJvmSdlParserOptions; + } + + /** + * By default, for SDL parsing, the Parser will not capture ignored characters, but it will capture line comments into AST + * elements . A static holds this default value for operation parsing in a JVM wide basis options object. + * + * This static can be set to true to allow the behavior of version 16.x or before. + * + * @param options - the new default JVM parser options for operation parsing + * + * @see graphql.language.IgnoredChar + * @see graphql.language.SourceLocation + */ + public static void setDefaultSdlParserOptions(ParserOptions options) { + defaultJvmSdlParserOptions = assertNotNull(options); } private final boolean captureIgnoredChars; private final boolean captureSourceLocation; + private final boolean captureLineComments; private final int maxTokens; + private final int maxWhitespaceTokens; private ParserOptions(Builder builder) { this.captureIgnoredChars = builder.captureIgnoredChars; this.captureSourceLocation = builder.captureSourceLocation; + this.captureLineComments = builder.captureLineComments; this.maxTokens = builder.maxTokens; + this.maxWhitespaceTokens = builder.maxWhitespaceTokens; } /** * Significant memory savings can be made if we do NOT capture ignored characters, * especially in SDL parsing. So we have set this to false by default. * - * @return true if ignored chars are captured in AST nodes + * @return true if ignored chars should be captured as AST nodes */ public boolean isCaptureIgnoredChars() { return captureIgnoredChars; @@ -88,7 +180,7 @@ public boolean isCaptureIgnoredChars() { * Memory savings can be made if we do NOT set {@link graphql.language.SourceLocation}s * on AST nodes, especially in SDL parsing. * - * @return true if {@link graphql.language.SourceLocation}s are captured in AST nodes + * @return true if {@link graphql.language.SourceLocation}s should be captured as AST nodes * * @see graphql.language.SourceLocation */ @@ -97,8 +189,22 @@ public boolean isCaptureSourceLocation() { } /** - * An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn - * memory representing a document that wont ever execute. To prevent this you can set a maximum number of parse + * Single-line {@link graphql.language.Comment}s do not have any semantic meaning in + * GraphQL source documents, as such you may wish to ignore them. + *

+ * This option does not ignore documentation {@link graphql.language.Description}s. + * + * @return true if {@link graphql.language.Comment}s should be captured as AST nodes + * + * @see graphql.language.SourceLocation + */ + public boolean isCaptureLineComments() { + return captureLineComments; + } + + /** + * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns + * memory representing a document that won't ever execute. To prevent this you can set a maximum number of parse * tokens that will be accepted before an exception is thrown and the parsing is stopped. * * @return the maximum number of raw tokens the parser will accept, after which an exception will be thrown. @@ -107,6 +213,17 @@ public int getMaxTokens() { return maxTokens; } + /** + * A graphql hacking vector is to send larges amounts of whitespace that burn lots of parsing CPU time and burn + * memory representing a document. To prevent this you can set a maximum number of whitespace parse + * tokens that will be accepted before an exception is thrown and the parsing is stopped. + * + * @return the maximum number of raw whitespace tokens the parser will accept, after which an exception will be thrown. + */ + public int getMaxWhitespaceTokens() { + return maxWhitespaceTokens; + } + public ParserOptions transform(Consumer builderConsumer) { Builder builder = new Builder(this); builderConsumer.accept(builder); @@ -121,7 +238,9 @@ public static class Builder { private boolean captureIgnoredChars = false; private boolean captureSourceLocation = true; + private boolean captureLineComments = true; private int maxTokens = MAX_QUERY_TOKENS; + private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS; Builder() { } @@ -129,7 +248,9 @@ public static class Builder { Builder(ParserOptions parserOptions) { this.captureIgnoredChars = parserOptions.captureIgnoredChars; this.captureSourceLocation = parserOptions.captureSourceLocation; + this.captureLineComments = parserOptions.captureLineComments; this.maxTokens = parserOptions.maxTokens; + this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens; } public Builder captureIgnoredChars(boolean captureIgnoredChars) { @@ -142,11 +263,21 @@ public Builder captureSourceLocation(boolean captureSourceLocation) { return this; } + public Builder captureLineComments(boolean captureLineComments) { + this.captureLineComments = captureLineComments; + return this; + } + public Builder maxTokens(int maxTokens) { this.maxTokens = maxTokens; return this; } + public Builder maxWhitespaceTokens(int maxWhitespaceTokens) { + this.maxWhitespaceTokens = maxWhitespaceTokens; + return this; + } + public ParserOptions build() { return new ParserOptions(this); } diff --git a/src/main/java/graphql/parser/SafeTokenSource.java b/src/main/java/graphql/parser/SafeTokenSource.java new file mode 100644 index 0000000000..c92c76d916 --- /dev/null +++ b/src/main/java/graphql/parser/SafeTokenSource.java @@ -0,0 +1,94 @@ +package graphql.parser; + +import graphql.Internal; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.TokenFactory; +import org.antlr.v4.runtime.TokenSource; + +import java.util.function.BiConsumer; + +/** + * This token source can wrap a lexer and if it asks for more than a maximum number of tokens + * the user can take some action, typically throw an exception to stop lexing. + * + * It tracks the maximum number per token channel, so we have 3 at the moment, and they will all be tracked. + * + * This is used to protect us from evil input. The lexer will eagerly try to find all tokens + * at times and certain inputs (directives butted together for example) will cause the lexer + * to keep doing work even though before the tokens are presented back to the parser + * and hence before it has a chance to stop work once too much as been done. + */ +@Internal +public class SafeTokenSource implements TokenSource { + + private final TokenSource lexer; + private final int maxTokens; + private final int maxWhitespaceTokens; + private final BiConsumer whenMaxTokensExceeded; + private final int channelCounts[]; + + public SafeTokenSource(TokenSource lexer, int maxTokens, int maxWhitespaceTokens, BiConsumer whenMaxTokensExceeded) { + this.lexer = lexer; + this.maxTokens = maxTokens; + this.maxWhitespaceTokens = maxWhitespaceTokens; + this.whenMaxTokensExceeded = whenMaxTokensExceeded; + // this could be a Map however we want it to be faster as possible. + // we only have 3 channels - but they are 0,2 and 3 so use 5 for safety - still faster than a map get/put + // if we ever add another channel beyond 5 it will IOBEx during tests so future changes will be handled before release! + this.channelCounts = new int[]{0, 0, 0, 0, 0}; + } + + + @Override + public Token nextToken() { + Token token = lexer.nextToken(); + if (token != null) { + int channel = token.getChannel(); + int currentCount = ++channelCounts[channel]; + if (channel == Parser.CHANNEL_WHITESPACE) { + // whitespace gets its own max count + callbackIfMaxExceeded(maxWhitespaceTokens, currentCount, token); + } else { + callbackIfMaxExceeded(maxTokens, currentCount, token); + } + } + return token; + } + + private void callbackIfMaxExceeded(int maxCount, int currentCount, Token token) { + if (currentCount > maxCount) { + whenMaxTokensExceeded.accept(maxCount, token); + } + } + + @Override + public int getLine() { + return lexer.getLine(); + } + + @Override + public int getCharPositionInLine() { + return lexer.getCharPositionInLine(); + } + + @Override + public CharStream getInputStream() { + return lexer.getInputStream(); + } + + @Override + public String getSourceName() { + return lexer.getSourceName(); + } + + @Override + public void setTokenFactory(TokenFactory factory) { + lexer.setTokenFactory(factory); + } + + @Override + public TokenFactory getTokenFactory() { + return lexer.getTokenFactory(); + } +} diff --git a/src/main/java/graphql/schema/idl/SchemaParser.java b/src/main/java/graphql/schema/idl/SchemaParser.java index e49c47aaa3..097e5649f3 100644 --- a/src/main/java/graphql/schema/idl/SchemaParser.java +++ b/src/main/java/graphql/schema/idl/SchemaParser.java @@ -114,9 +114,7 @@ public TypeDefinitionRegistry parseImpl(Reader schemaInput) { private TypeDefinitionRegistry parseImpl(Reader schemaInput, ParserOptions parseOptions) { try { if (parseOptions == null) { - // for SDL we dont stop how many parser tokens there are - its not the attack vector - // to be prevented compared to queries - parseOptions = ParserOptions.getDefaultParserOptions().transform(opts -> opts.maxTokens(Integer.MAX_VALUE)); + parseOptions = ParserOptions.getDefaultSdlParserOptions(); } Parser parser = new Parser(); Document document = parser.parseDocument(schemaInput, parseOptions); diff --git a/src/test/groovy/graphql/parser/BadParserSituations.java b/src/test/groovy/graphql/parser/BadParserSituations.java new file mode 100644 index 0000000000..9f603ccee9 --- /dev/null +++ b/src/test/groovy/graphql/parser/BadParserSituations.java @@ -0,0 +1,128 @@ +package graphql.parser; + +import com.google.common.base.Strings; +import graphql.ExecutionInput; +import graphql.ExecutionResult; +import graphql.GraphQL; +import graphql.GraphQLError; +import graphql.schema.GraphQLSchema; +import graphql.schema.StaticDataFetcher; +import graphql.schema.idl.RuntimeWiring; +import graphql.schema.idl.SchemaGenerator; +import graphql.schema.idl.SchemaParser; +import graphql.schema.idl.TypeDefinitionRegistry; + +import java.io.OutputStream; +import java.io.PrintStream; +import java.time.Duration; +import java.util.List; +import java.util.function.Function; + +import static graphql.schema.idl.RuntimeWiring.newRuntimeWiring; + +/** + * This is not a test - it's a program we can run to show the system reacts to certain bad inputs + * + * You can run this to discover scenarios and see what happens at what levels. + * + * I used this to help discover more on the behavior of ANTLR and its moving parts + */ +public class BadParserSituations { + static Integer STEP = 5000; + static Integer CHECKS_AMOUNT = 15; + + public static void main(String[] args) { + GraphQL graphQL = setupSchema(); + + System.setErr(toDevNull()); + + for (int runNumber = 1; runNumber <= 2; runNumber++) { + String runState = "Limited Tokens"; + // on the second run - have unlimited tokens + if (runNumber > 1) { + ParserOptions unlimitedTokens = ParserOptions.getDefaultOperationParserOptions().transform( + builder -> builder.maxTokens(Integer.MAX_VALUE).maxWhitespaceTokens(Integer.MAX_VALUE)); + ParserOptions.setDefaultOperationParserOptions(unlimitedTokens); + + runState = "Unlimited Tokens"; + } + runScenarios("Whitespace Bad Payloads", runState, graphQL, howMany -> { + String repeatedPayload = Strings.repeat(" ", howMany); + return "query {__typename " + repeatedPayload + " }"; + }); + runScenarios("Comment Bad Payloads", runState, graphQL, howMany -> { + String repeatedPayload = Strings.repeat("# some comment\n", howMany); + String query = repeatedPayload + "\nquery q {__typename }"; + return query; + }); + runScenarios("Grammar Directives Bad Payloads", runState, graphQL, howMany -> { + String repeatedPayload = Strings.repeat("@lol", howMany); + return "query {__typename " + repeatedPayload + " }"; + }); + runScenarios("Grammar Field Bad Payloads", runState, graphQL, howMany -> { + String repeatedPayload = Strings.repeat("f(id:null)", howMany); + return "query {__typename " + repeatedPayload + " }"; + }); + + } + + } + + private static void runScenarios(String scenarioName, String runState, GraphQL graphQL, Function queryGenerator) { + long maxRuntime = 0; + for (int i = 1; i < CHECKS_AMOUNT; i++) { + + int howManyBadPayloads = i * STEP; + String query = queryGenerator.apply(howManyBadPayloads); + + ExecutionInput executionInput = ExecutionInput.newExecutionInput().query(query).build(); + long startTime = System.nanoTime(); + + ExecutionResult executionResult = graphQL.execute(executionInput); + + Duration duration = Duration.ofNanos(System.nanoTime() - startTime); + + System.out.printf("%s(%s)(%d of %d) - | query length %d | bad payloads %d | duration %dms \n", scenarioName, runState, i, CHECKS_AMOUNT, query.length(), howManyBadPayloads, duration.toMillis()); + printLastError(executionResult.getErrors()); + + if (duration.toMillis() > maxRuntime) { + maxRuntime = duration.toMillis(); + } + } + System.out.printf("%s(%s) - finished | max time was %s ms \n" + + "=======================\n\n", scenarioName, runState, maxRuntime); + } + + private static void printLastError(List errors) { + if (errors.size() > 0) { + GraphQLError lastError = errors.get(errors.size() - 1); + System.out.printf("\terror : %s \n", lastError.getMessage()); + } + + } + + private static PrintStream toDevNull() { + return new PrintStream(new OutputStream() { + public void write(int b) { + //DO NOTHING + } + }); + } + + private static GraphQL setupSchema() { + String schema = "type Query{hello: String}"; + + SchemaParser schemaParser = new SchemaParser(); + TypeDefinitionRegistry typeDefinitionRegistry = schemaParser.parse(schema); + + RuntimeWiring runtimeWiring = newRuntimeWiring() + .type("Query", builder -> builder.dataFetcher("hello", new StaticDataFetcher("world"))) + .build(); + + SchemaGenerator schemaGenerator = new SchemaGenerator(); + GraphQLSchema graphQLSchema = schemaGenerator.makeExecutableSchema(typeDefinitionRegistry, runtimeWiring); + + GraphQL graphQL = GraphQL.newGraphQL(graphQLSchema).build(); + return graphQL; + } +} diff --git a/src/test/groovy/graphql/parser/ParserOptionsTest.groovy b/src/test/groovy/graphql/parser/ParserOptionsTest.groovy new file mode 100644 index 0000000000..5867b181fc --- /dev/null +++ b/src/test/groovy/graphql/parser/ParserOptionsTest.groovy @@ -0,0 +1,79 @@ +package graphql.parser + +import spock.lang.Specification + +class ParserOptionsTest extends Specification { + static defaultOptions = ParserOptions.getDefaultParserOptions() + static defaultOperationOptions = ParserOptions.getDefaultOperationParserOptions() + static defaultSdlOptions = ParserOptions.getDefaultSdlParserOptions() + + void setup() { + ParserOptions.setDefaultParserOptions(defaultOptions) + ParserOptions.setDefaultOperationParserOptions(defaultOperationOptions) + ParserOptions.setDefaultSdlParserOptions(defaultSdlOptions) + } + + void cleanup() { + ParserOptions.setDefaultParserOptions(defaultOptions) + ParserOptions.setDefaultOperationParserOptions(defaultOperationOptions) + ParserOptions.setDefaultSdlParserOptions(defaultSdlOptions) + } + + def "lock in default settings"() { + expect: + defaultOptions.getMaxTokens() == 15_000 + defaultOptions.getMaxWhitespaceTokens() == 200_000 + defaultOptions.isCaptureSourceLocation() + defaultOptions.isCaptureLineComments() + !defaultOptions.isCaptureIgnoredChars() + + defaultOperationOptions.getMaxTokens() == 15_000 + defaultOperationOptions.getMaxWhitespaceTokens() == 200_000 + defaultOperationOptions.isCaptureSourceLocation() + !defaultOperationOptions.isCaptureLineComments() + !defaultOperationOptions.isCaptureIgnoredChars() + + defaultSdlOptions.getMaxTokens() == Integer.MAX_VALUE + defaultSdlOptions.getMaxWhitespaceTokens() == Integer.MAX_VALUE + defaultSdlOptions.isCaptureSourceLocation() + defaultSdlOptions.isCaptureLineComments() + !defaultSdlOptions.isCaptureIgnoredChars() + } + + def "can set in new option JVM wide"() { + def newDefaultOptions = defaultOptions.transform({ it.captureIgnoredChars(true) }) + def newDefaultOperationOptions = defaultOperationOptions.transform( + { it.captureIgnoredChars(true).captureLineComments(true).maxWhitespaceTokens(300_000) }) + def newDefaultSDlOptions = defaultSdlOptions.transform( + { it.captureIgnoredChars(true).captureLineComments(true).maxWhitespaceTokens(300_000) }) + + when: + ParserOptions.setDefaultParserOptions(newDefaultOptions) + ParserOptions.setDefaultOperationParserOptions(newDefaultOperationOptions) + ParserOptions.setDefaultSdlParserOptions(newDefaultSDlOptions) + + def currentDefaultOptions = ParserOptions.getDefaultParserOptions() + def currentDefaultOperationOptions = ParserOptions.getDefaultOperationParserOptions() + def currentDefaultSdlOptions = ParserOptions.getDefaultSdlParserOptions() + + then: + + currentDefaultOptions.getMaxTokens() == 15_000 + currentDefaultOptions.getMaxWhitespaceTokens() == 200_000 + currentDefaultOptions.isCaptureSourceLocation() + currentDefaultOptions.isCaptureLineComments() + currentDefaultOptions.isCaptureIgnoredChars() + + currentDefaultOperationOptions.getMaxTokens() == 15_000 + currentDefaultOperationOptions.getMaxWhitespaceTokens() == 300_000 + currentDefaultOperationOptions.isCaptureSourceLocation() + currentDefaultOperationOptions.isCaptureLineComments() + currentDefaultOperationOptions.isCaptureIgnoredChars() + + currentDefaultSdlOptions.getMaxTokens() == Integer.MAX_VALUE + currentDefaultSdlOptions.getMaxWhitespaceTokens() == 300_000 + currentDefaultSdlOptions.isCaptureSourceLocation() + currentDefaultSdlOptions.isCaptureLineComments() + currentDefaultSdlOptions.isCaptureIgnoredChars() + } +} diff --git a/src/test/groovy/graphql/parser/ParserTest.groovy b/src/test/groovy/graphql/parser/ParserTest.groovy index 34f641d313..d4cca5b87c 100644 --- a/src/test/groovy/graphql/parser/ParserTest.groovy +++ b/src/test/groovy/graphql/parser/ParserTest.groovy @@ -47,6 +47,22 @@ import spock.lang.Unroll class ParserTest extends Specification { + static defaultOptions = ParserOptions.getDefaultParserOptions() + static defaultOperationOptions = ParserOptions.getDefaultOperationParserOptions() + static defaultSdlOptions = ParserOptions.getDefaultSdlParserOptions() + + void setup() { + ParserOptions.setDefaultParserOptions(defaultOptions) + ParserOptions.setDefaultOperationParserOptions(defaultOperationOptions) + ParserOptions.setDefaultSdlParserOptions(defaultSdlOptions) + } + + void cleanup() { + ParserOptions.setDefaultParserOptions(defaultOptions) + ParserOptions.setDefaultOperationParserOptions(defaultOperationOptions) + ParserOptions.setDefaultSdlParserOptions(defaultSdlOptions) + } + def "parse anonymous simple query"() { given: @@ -1093,6 +1109,26 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases""" er.errors[0].message.contains("parsing has been cancelled") } + def "a large whitespace laughs attack will be prevented by default"() { + def spaces = " " * 300_000 + def text = "query { f $spaces }" + when: + Parser.parse(text) + + then: + def e = thrown(ParseCancelledException) + e.getMessage().contains("parsing has been cancelled") + + when: "integration test to prove it cancels by default" + + def sdl = """type Query { f : ID} """ + def graphQL = TestUtil.graphQL(sdl).build() + def er = graphQL.execute(text) + then: + er.errors.size() == 1 + er.errors[0].message.contains("parsing has been cancelled") + } + def "they can shoot themselves if they want to with large documents"() { def lol = "@lol" * 10000 // two tokens = 20000+ tokens def text = "query { f $lol }" @@ -1104,4 +1140,16 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases""" then: doc != null } + + def "they can shoot themselves if they want to with large documents with lots of whitespace"() { + def spaces = " " * 300_000 + def text = "query { f $spaces }" + + def options = ParserOptions.newParserOptions().maxWhitespaceTokens(Integer.MAX_VALUE).build() + when: + def doc = new Parser().parseDocument(text, options) + + then: + doc != null + } } diff --git a/src/test/groovy/graphql/parser/SafeTokenSourceTest.groovy b/src/test/groovy/graphql/parser/SafeTokenSourceTest.groovy new file mode 100644 index 0000000000..cf8b34658e --- /dev/null +++ b/src/test/groovy/graphql/parser/SafeTokenSourceTest.groovy @@ -0,0 +1,94 @@ +package graphql.parser + +import graphql.parser.antlr.GraphqlLexer +import org.antlr.v4.runtime.CharStreams +import org.antlr.v4.runtime.Token +import spock.lang.Specification + +import java.util.function.BiConsumer + +class SafeTokenSourceTest extends Specification { + + private void consumeAllTokens(SafeTokenSource tokenSource) { + def nextToken = tokenSource.nextToken() + while (nextToken != null && nextToken.getType() != Token.EOF) { + nextToken = tokenSource.nextToken() + } + } + + private GraphqlLexer lexer(doc) { + def charStream = CharStreams.fromString(doc) + def graphqlLexer = new GraphqlLexer(charStream) + graphqlLexer + } + + def "can call back to the consumer when max whitespace tokens are encountered"() { + + def offendingText = " " * 1000 + GraphqlLexer graphqlLexer = lexer(""" + query foo { _typename $offendingText @lol@lol@lol } + """) + when: + Token offendingToken = null + BiConsumer onTooManyTokens = { max, token -> + offendingToken = token + throw new IllegalStateException("stop at $max") + } + def tokenSource = new SafeTokenSource(graphqlLexer, 50, 1000, onTooManyTokens) + + consumeAllTokens(tokenSource) + assert false, "This is not meant to actually consume all tokens" + + then: + def e = thrown(IllegalStateException) + e.message == "stop at 1000" + offendingToken != null + offendingToken.getChannel() == 3 // whitespace + offendingToken.getText() == " " + } + + def "can call back to the consumer when max grammar tokens are encountered"() { + + def offendingText = "@lol" * 1000 + GraphqlLexer graphqlLexer = lexer(""" + query foo { _typename $offendingText } + """) + when: + Token offendingToken = null + BiConsumer onTooManyTokens = { max, token -> + offendingToken = token + throw new IllegalStateException("stop at $max") + } + def tokenSource = new SafeTokenSource(graphqlLexer, 1000, 200_000, onTooManyTokens) + + consumeAllTokens(tokenSource) + assert false, "This is not meant to actually consume all tokens" + + then: + def e = thrown(IllegalStateException) + e.message == "stop at 1000" + offendingToken != null + offendingToken.getChannel() == 0 // grammar + } + + def "can safely get to the end of text if its ok"() { + + GraphqlLexer graphqlLexer = lexer(""" + query foo { _typename @lol@lol@lol } + """) + when: + Token offendingToken = null + BiConsumer onTooManyTokens = { max, token -> + offendingToken = token + throw new IllegalStateException("stop at $max") + } + def tokenSource = new SafeTokenSource(graphqlLexer, 1000, 200_000, onTooManyTokens) + + consumeAllTokens(tokenSource) + + then: + noExceptionThrown() + offendingToken == null + } + +} diff --git a/src/test/groovy/graphql/schema/idl/SchemaParserTest.groovy b/src/test/groovy/graphql/schema/idl/SchemaParserTest.groovy index 0dd515e1ae..8fcbcd44e1 100644 --- a/src/test/groovy/graphql/schema/idl/SchemaParserTest.groovy +++ b/src/test/groovy/graphql/schema/idl/SchemaParserTest.groovy @@ -343,6 +343,7 @@ class SchemaParserTest extends Specification { def sdl = "type Query {\n" for (int i = 0; i < 30000; i++) { sdl += " f" + i + " : ID\n" + sdl += " " * 10 // 10 whitespace as well } sdl += "}"