Merge pull request #3112 from graphql-java/prevent-stackoverflow-in-p…

…arser Preventing stack overflow exceptions via limiting the depth of the parser rules
graphql-java · Mar 20, 2023 · 80e3135 · 80e3135
2 parents 4b4c0e7 + dcd9cf9
commit 80e3135
Show file tree

Hide file tree

Showing 6 changed files with 297 additions and 137 deletions.
diff --git a/src/main/java/graphql/parser/Parser.java b/src/main/java/graphql/parser/Parser.java
@@ -13,6 +13,7 @@
 import graphql.parser.antlr.GraphqlLexer;
 import graphql.parser.antlr.GraphqlParser;
 import graphql.parser.exceptions.ParseCancelledException;
+import graphql.parser.exceptions.ParseCancelledTooDeepException;
 import org.antlr.v4.runtime.BaseErrorListener;
 import org.antlr.v4.runtime.CharStreams;
 import org.antlr.v4.runtime.CodePointCharStream;
@@ -299,7 +300,12 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
         // this lexer wrapper allows us to stop lexing when too many tokens are in place.  This prevents DOS attacks.
         int maxTokens = parserOptions.getMaxTokens();
         int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
-        BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwCancelParseIfTooManyTokens(environment, token, maxTokenCount, multiSourceReader);
+        BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems(
+                environment,
+                token,
+                maxTokenCount,
+                multiSourceReader,
+                ParseCancelledException.class);
         SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
 
         CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);
@@ -345,9 +351,31 @@ private void setupParserListener(ParserEnvironment environment, MultiSourceReade
         ParserOptions parserOptions = toLanguage.getParserOptions();
         ParsingListener parsingListener = parserOptions.getParsingListener();
         int maxTokens = parserOptions.getMaxTokens();
+        int maxRuleDepth = parserOptions.getMaxRuleDepth();
         // prevent a billion laugh attacks by restricting how many tokens we allow
         ParseTreeListener listener = new GraphqlBaseListener() {
             int count = 0;
+            int depth = 0;
+
+
+            @Override
+            public void enterEveryRule(ParserRuleContext ctx) {
+                depth++;
+                if (depth > maxRuleDepth) {
+                    throwIfTokenProblems(
+                            environment,
+                            ctx.getStart(),
+                            maxRuleDepth,
+                            multiSourceReader,
+                            ParseCancelledTooDeepException.class
+                    );
+                }
+            }
+
+            @Override
+            public void exitEveryRule(ParserRuleContext ctx) {
+                depth--;
+            }
 
             @Override
             public void visitTerminal(TerminalNode node) {
@@ -372,14 +400,20 @@ public int getCharPositionInLine() {
 
                 count++;
                 if (count > maxTokens) {
-                    throwCancelParseIfTooManyTokens(environment, token, maxTokens, multiSourceReader);
+                    throwIfTokenProblems(
+                            environment,
+                            token,
+                            maxTokens,
+                            multiSourceReader,
+                            ParseCancelledException.class
+                    );
                 }
             }
         };
         parser.addParseListener(listener);
     }
 
-    private void throwCancelParseIfTooManyTokens(ParserEnvironment environment, Token token, int maxTokens, MultiSourceReader multiSourceReader) throws ParseCancelledException {
+    private void throwIfTokenProblems(ParserEnvironment environment, Token token, int maxLimit, MultiSourceReader multiSourceReader, Class<? extends InvalidSyntaxException> targetException) throws ParseCancelledException {
         String tokenType = "grammar";
         SourceLocation sourceLocation = null;
         String offendingToken = null;
@@ -390,7 +424,10 @@ private void throwCancelParseIfTooManyTokens(ParserEnvironment environment, Toke
             offendingToken = token.getText();
             sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
         }
-        throw new ParseCancelledException(environment.getI18N(), sourceLocation, offendingToken, maxTokens, tokenType);
+        if (targetException.equals(ParseCancelledTooDeepException.class)) {
+            throw new ParseCancelledTooDeepException(environment.getI18N(), sourceLocation, offendingToken, maxLimit, tokenType);
+        }
+        throw new ParseCancelledException(environment.getI18N(), sourceLocation, offendingToken, maxLimit, tokenType);
     }
 
     /**

diff --git a/src/main/java/graphql/parser/ParserOptions.java b/src/main/java/graphql/parser/ParserOptions.java
@@ -15,9 +15,9 @@ public class ParserOptions {
     /**
      * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
      * memory representing a document that won't ever execute.  To prevent this for most users, graphql-java
-     * set this value to 15000.  ANTLR parsing time is linear to the number of tokens presented.  The more you
+     * sets this value to 15000.  ANTLR parsing time is linear to the number of tokens presented.  The more you
      * allow the longer it takes.
-     *
+     * <p>
      * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
      * JVM wide.
      */
@@ -26,19 +26,30 @@ public class ParserOptions {
      * Another graphql hacking vector is to send large amounts of whitespace in operations that burn lots of parsing CPU time and burn
      * memory representing a document.  Whitespace token processing in ANTLR is 2 orders of magnitude faster than grammar token processing
      * however it still takes some time to happen.
-     *
+     * <p>
      * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
      * JVM wide.
      */
     public static final int MAX_WHITESPACE_TOKENS = 200_000;
 
+    /**
+     * A graphql hacking vector is to send nonsensical queries that have lots of grammar rule depth to them which
+     * can cause stack overflow exceptions during the query parsing.  To prevent this for most users, graphql-java
+     * sets this value to 500 grammar rules deep.
+     * <p>
+     * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
+     * JVM wide.
+     */
+    public static final int MAX_RULE_DEPTH = 500;
+
     private static ParserOptions defaultJvmParserOptions = newParserOptions()
             .captureIgnoredChars(false)
             .captureSourceLocation(true)
             .captureLineComments(true)
             .readerTrackData(true)
             .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
             .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
+            .maxRuleDepth(MAX_RULE_DEPTH)
             .build();
 
     private static ParserOptions defaultJvmOperationParserOptions = newParserOptions()
@@ -48,6 +59,7 @@ public class ParserOptions {
             .readerTrackData(true)
             .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
             .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
+            .maxRuleDepth(MAX_RULE_DEPTH)
             .build();
 
     private static ParserOptions defaultJvmSdlParserOptions = newParserOptions()
@@ -57,6 +69,7 @@ public class ParserOptions {
             .readerTrackData(true)
             .maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
             .maxWhitespaceTokens(Integer.MAX_VALUE)
+            .maxRuleDepth(Integer.MAX_VALUE)
             .build();
 
     /**
@@ -160,6 +173,7 @@ public static void setDefaultSdlParserOptions(ParserOptions options) {
     private final boolean readerTrackData;
     private final int maxTokens;
     private final int maxWhitespaceTokens;
+    private final int maxRuleDepth;
     private final ParsingListener parsingListener;
 
     private ParserOptions(Builder builder) {
@@ -169,6 +183,7 @@ private ParserOptions(Builder builder) {
         this.readerTrackData = builder.readerTrackData;
         this.maxTokens = builder.maxTokens;
         this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
+        this.maxRuleDepth = builder.maxRuleDepth;
         this.parsingListener = builder.parsingListener;
     }
 
@@ -240,6 +255,17 @@ public int getMaxWhitespaceTokens() {
         return maxWhitespaceTokens;
     }
 
+    /**
+     * A graphql hacking vector is to send nonsensical queries that have lots of rule depth to them which
+     * can cause stack overflow exceptions during the query parsing.  To prevent this you can set a value
+     * that is the maximum depth allowed before an exception is thrown and the parsing is stopped.
+     *
+     * @return the maximum token depth the parser will accept, after which an exception will be thrown.
+     */
+    public int getMaxRuleDepth() {
+        return maxRuleDepth;
+    }
+
     public ParsingListener getParsingListener() {
         return parsingListener;
     }
@@ -260,9 +286,10 @@ public static class Builder {
         private boolean captureSourceLocation = true;
         private boolean captureLineComments = true;
         private boolean readerTrackData = true;
-        private int maxTokens = MAX_QUERY_TOKENS;
         private ParsingListener parsingListener = ParsingListener.NOOP;
+        private int maxTokens = MAX_QUERY_TOKENS;
         private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;
+        private int maxRuleDepth = MAX_RULE_DEPTH;
 
         Builder() {
         }
@@ -273,6 +300,7 @@ public static class Builder {
             this.captureLineComments = parserOptions.captureLineComments;
             this.maxTokens = parserOptions.maxTokens;
             this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
+            this.maxRuleDepth = parserOptions.maxRuleDepth;
             this.parsingListener = parserOptions.parsingListener;
         }
 
@@ -306,6 +334,11 @@ public Builder maxWhitespaceTokens(int maxWhitespaceTokens) {
             return this;
         }
 
+        public Builder maxRuleDepth(int maxRuleDepth) {
+            this.maxRuleDepth = maxRuleDepth;
+            return this;
+        }
+
         public Builder parsingListener(ParsingListener parsingListener) {
             this.parsingListener = assertNotNull(parsingListener);
             return this;

diff --git a/src/main/java/graphql/parser/exceptions/ParseCancelledTooDeepException.java b/src/main/java/graphql/parser/exceptions/ParseCancelledTooDeepException.java
@@ -0,0 +1,18 @@
+package graphql.parser.exceptions;
+
+import graphql.Internal;
+import graphql.i18n.I18n;
+import graphql.language.SourceLocation;
+import graphql.parser.InvalidSyntaxException;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+@Internal
+public class ParseCancelledTooDeepException extends InvalidSyntaxException {
+
+    @Internal
+    public ParseCancelledTooDeepException(@NotNull I18n i18N, @Nullable SourceLocation sourceLocation, @Nullable String offendingToken, int maxTokens, @NotNull String tokenType) {
+        super(i18N.msg("ParseCancelled.tooDeep", maxTokens, tokenType),
+                sourceLocation, offendingToken, null, null);
+    }
+}
diff --git a/src/main/resources/i18n/Parsing.properties b/src/main/resources/i18n/Parsing.properties
@@ -19,6 +19,7 @@ InvalidSyntaxBail.full=Invalid syntax with offending token ''{0}'' at line {1} c
 InvalidSyntaxMoreTokens.full=Invalid syntax encountered. There are extra tokens in the text that have not been consumed. Offending token ''{0}'' at line {1} column {2}
 #
 ParseCancelled.full=More than {0} ''{1}'' tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.
+ParseCancelled.tooDeep=More than {0} deep ''{1}'' rules have been entered. To prevent Denial Of Service attacks, parsing has been cancelled.
 #
 InvalidUnicode.trailingLeadingSurrogate=Invalid unicode encountered. Trailing surrogate must be preceded with a leading surrogate. Offending token ''{0}'' at line {1} column {2}
 InvalidUnicode.leadingTrailingSurrogate=Invalid unicode encountered. Leading surrogate must be followed by a trailing surrogate. Offending token ''{0}'' at line {1} column {2}