diff --git a/change-notes/1.24/analysis-javascript.md b/change-notes/1.24/analysis-javascript.md index 47fb2dc590be..9268ab6a5c80 100644 --- a/change-notes/1.24/analysis-javascript.md +++ b/change-notes/1.24/analysis-javascript.md @@ -37,6 +37,7 @@ | Cross-site scripting through exception (`js/xss-through-exception`) | security, external/cwe/cwe-079, external/cwe/cwe-116 | Highlights potential XSS vulnerabilities where an exception is written to the DOM. Results are not shown on LGTM by default. | | Regular expression always matches (`js/regex/always-matches`) | correctness, regular-expressions | Highlights regular expression checks that trivially succeed by matching an empty substring. Results are shown on LGTM by default. | | Missing await (`js/missing-await`) | correctness | Highlights expressions that operate directly on a promise object in a nonsensical way, instead of awaiting its result. Results are shown on LGTM by default. | +| Polynomial regular expression used on uncontrolled data (`js/polynomial-redos`) | security, external/cwe/cwe-730, external/cwe/cwe-400 | Highlights expensive regular expressions that may be used on malicious input. Results are shown on LGTM by default. | | Prototype pollution in utility function (`js/prototype-pollution-utility`) | security, external/cwe/cwe-400, external/cwe/cwe-471 | Highlights recursive copying operations that are susceptible to prototype pollution. Results are shown on LGTM by default. | | Unsafe jQuery plugin (`js/unsafe-jquery-plugin`) | Highlights potential XSS vulnerabilities in unsafely designed jQuery plugins. Results are shown on LGTM by default. | diff --git a/javascript/ql/src/Performance/PolynomialReDoS.qhelp b/javascript/ql/src/Performance/PolynomialReDoS.qhelp new file mode 100644 index 000000000000..b0e92d4ea5f2 --- /dev/null +++ b/javascript/ql/src/Performance/PolynomialReDoS.qhelp @@ -0,0 +1,108 @@ + + + + + + + +

+ + Consider this use of a regular expression, which removes + all leading and trailing whitespace in a string: + +

+ + + text.replace(/^\s+|\s+$/g, ''); // BAD + + +

+ + The sub-expression "\s+$" will match the + whitespace characters in text from left to right, but it + can start matching anywhere within a whitespace sequence. This is + problematic for strings that do not end with a whitespace + character. Such a string will force the regular expression engine to + process each whitespace sequence once per whitespace character in the + sequence. + +

+ +

+ + This ultimately means that the time cost of trimming a + string is quadratic in the length of the string. So a string like + "a b" will take milliseconds to process, but a similar + string with a million spaces instead of just one will take several + minutes. + +

+ +

+ + Avoid this problem by rewriting the regular expression to + not contain the ambiguity about when to start matching whitespace + sequences. For instance, by using a negative look-behind + (/^\s+|(?<!\s)\s+$/g), or just by using the built-in trim + method (text.trim()). + +

+ +

+ + Note that the sub-expression "^\s+" is + not problematic as the ^ anchor restricts + when that sub-expression can start matching, and as the regular + expression engine matches from left to right. + +

+ +
+ + + +

+ + As a similar, but slightly subtler problem, consider the + regular expression that matches lines with numbers, possibly written + using scientific notation: +

+ + + ^0\.\d+E?\d+$ // BAD + + +

+ + The problem with this regular expression is in the + sub-expression \d+E?\d+ because the second + \d+ can start matching digits anywhere after the first + match of the first \d+ if there is no E in + the input string. + +

+ +

+ + This is problematic for strings that do not + end with a digit. Such a string will force the regular expression + engine to process each digit sequence once per digit in the sequence, + again leading to a quadratic time complexity. + +

+ +

+ + To make the processing faster, the regular expression + should be rewritten such that the two \d+ sub-expressions + do not have overlapping matches: ^0\.\d+(E\d+)?$. + +

+ +
+ + + +
diff --git a/javascript/ql/src/Performance/PolynomialReDoS.ql b/javascript/ql/src/Performance/PolynomialReDoS.ql new file mode 100644 index 000000000000..0d8fdb4990cb --- /dev/null +++ b/javascript/ql/src/Performance/PolynomialReDoS.ql @@ -0,0 +1,23 @@ +/** + * @name Polynomial regular expression used on uncontrolled data + * @description A regular expression that can require polynomial time + * to match user-provided values may be + * vulnerable to denial-of-service attacks. + * @kind path-problem + * @problem.severity warning + * @precision high + * @id js/polynomial-redos + * @tags security + * external/cwe/cwe-730 + * external/cwe/cwe-400 + */ + +import javascript +import semmle.javascript.security.performance.PolynomialReDoS::PolynomialReDoS +import DataFlow::PathGraph + + +from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "This expensive $@ use depends on $@.", + sink.getNode().(Sink).getRegExp(), "regular expression", source.getNode(), "a user-provided value" diff --git a/javascript/ql/src/Performance/ReDoS.qhelp b/javascript/ql/src/Performance/ReDoS.qhelp index b226eef11b8d..48fa8393cf44 100644 --- a/javascript/ql/src/Performance/ReDoS.qhelp +++ b/javascript/ql/src/Performance/ReDoS.qhelp @@ -1,70 +1,34 @@ +"-//Semmle//qhelp//EN" +"qhelp.dtd"> + - -

-Some regular expressions take a very long time to match certain input strings to the point where -the time it takes to match a string of length n is proportional to 2n. -Such regular expressions can negatively affect performance, or even allow a malicious user to -perform a Denial of Service ("DoS") attack by crafting an expensive input string for the regular -expression to match. -

-

-The regular expression engines provided by many popular JavaScript platforms use backtracking -non-deterministic finite automata to implement regular expression matching. While this approach -is space-efficient and allows supporting advanced features like capture groups, it is not -time-efficient in general. The worst-case time complexity of such an automaton can be exponential, -meaning that for strings of a certain shape, increasing the input length by ten characters may -make the automaton about 1000 times slower. -

-

-Typically, a regular expression is affected by this problem if it contains a repetition of the -form r* or r+ where the sub-expression r is ambiguous in -the sense that it can match some string in multiple ways. More information about the precise -circumstances can be found in the references. -

-
+ - -

-Modify the regular expression to remove the ambiguity. -

-
+ +

+ Consider this regular expression: +

+ + /^_(__|.)+_$/ + +

+ Its sub-expression "(__|.)+?" can match the string "__" either by the + first alternative "__" to the left of the "|" operator, or by two + repetitions of the second alternative "." to the right. Thus, a string consisting + of an odd number of underscores followed by some other character will cause the regular + expression engine to run for an exponential amount of time before rejecting the input. +

+

+ This problem can be avoided by rewriting the regular expression to remove the ambiguity between + the two branches of the alternative inside the repetition: +

+ + /^_(__|[^_])+_$/ + +
- -

-Consider this regular expression: -

- -/^_(__|.)+_$/ - -

-Its sub-expression "(__|.)+?" can match the string "__" either by the -first alternative "__" to the left of the "|" operator, or by two -repetitions of the second alternative "." to the right. Thus, a string consisting -of an odd number of underscores followed by some other character will cause the regular -expression engine to run for an exponential amount of time before rejecting the input. -

-

-This problem can be avoided by rewriting the regular expression to remove the ambiguity between -the two branches of the alternative inside the repetition: -

- -/^_(__|[^_])+_$/ - -
+ - -
  • -OWASP: -Regular expression Denial of Service - ReDoS. -
  • -
  • Wikipedia: ReDoS.
  • -
  • Wikipedia: Time complexity.
  • -
  • James Kirrage, Asiri Rathnayake, Hayo Thielecke: -Static Analysis for Regular Expression Denial-of-Service Attack. -
  • -
    diff --git a/javascript/ql/src/Performance/ReDoSIntroduction.qhelp b/javascript/ql/src/Performance/ReDoSIntroduction.qhelp new file mode 100644 index 000000000000..4e3d3a06f65d --- /dev/null +++ b/javascript/ql/src/Performance/ReDoSIntroduction.qhelp @@ -0,0 +1,55 @@ + + + +

    + + Some regular expressions take a long time to match certain + input strings to the point where the time it takes to match a string + of length n is proportional to nk or even + 2n. Such regular expressions can negatively affect + performance, or even allow a malicious user to perform a Denial of + Service ("DoS") attack by crafting an expensive input string for the + regular expression to match. + +

    + +

    + + The regular expression engines provided by many popular + JavaScript platforms use backtracking non-deterministic finite + automata to implement regular expression matching. While this approach + is space-efficient and allows supporting advanced features like + capture groups, it is not time-efficient in general. The worst-case + time complexity of such an automaton can be polynomial or even + exponential, meaning that for strings of a certain shape, increasing + the input length by ten characters may make the automaton about 1000 + times slower. + +

    + +

    + + Typically, a regular expression is affected by this + problem if it contains a repetition of the form r* or + r+ where the sub-expression r is ambiguous + in the sense that it can match some string in multiple ways. More + information about the precise circumstances can be found in the + references. + +

    +
    + + + +

    + + Modify the regular expression to remove the ambiguity, or + ensure that the strings matched with the regular expression are short + enough that the time-complexity does not matter. + +

    + +
    +
    diff --git a/javascript/ql/src/Performance/ReDoSReferences.qhelp b/javascript/ql/src/Performance/ReDoSReferences.qhelp new file mode 100644 index 000000000000..2b3e5f17c62d --- /dev/null +++ b/javascript/ql/src/Performance/ReDoSReferences.qhelp @@ -0,0 +1,16 @@ + + + +
  • + OWASP: + Regular expression Denial of Service - ReDoS. +
  • +
  • Wikipedia: ReDoS.
  • +
  • Wikipedia: Time complexity.
  • +
  • James Kirrage, Asiri Rathnayake, Hayo Thielecke: + Static Analysis for Regular Expression Denial-of-Service Attack. +
  • +
    +
    diff --git a/javascript/ql/src/semmle/javascript/security/performance/PolynomialReDoS.qll b/javascript/ql/src/semmle/javascript/security/performance/PolynomialReDoS.qll new file mode 100644 index 000000000000..2ae0fcc749de --- /dev/null +++ b/javascript/ql/src/semmle/javascript/security/performance/PolynomialReDoS.qll @@ -0,0 +1,31 @@ +/** + * Provides a taint tracking configuration for reasoning about + * polynomial regular expression denial-of-service attacks. + * + * Note, for performance reasons: only import this file if + * `PolynomialReDoS::Configuration` is needed, otherwise + * `PolynomialReDoSCustomizations` should be imported instead. + */ +import javascript + +module PolynomialReDoS { + import PolynomialReDoSCustomizations::PolynomialReDoS + + class Configuration extends TaintTracking::Configuration { + Configuration() { this = "PolynomialReDoS" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizerGuard(TaintTracking::SanitizerGuardNode node) { + super.isSanitizerGuard(node) or + node instanceof LengthGuard + } + + override predicate isSanitizer(DataFlow::Node node) { + super.isSanitizer(node) or + node instanceof Sanitizer + } + } +} diff --git a/javascript/ql/src/semmle/javascript/security/performance/PolynomialReDoSCustomizations.qll b/javascript/ql/src/semmle/javascript/security/performance/PolynomialReDoSCustomizations.qll new file mode 100644 index 000000000000..d4b8a5a83173 --- /dev/null +++ b/javascript/ql/src/semmle/javascript/security/performance/PolynomialReDoSCustomizations.qll @@ -0,0 +1,103 @@ +/** + * Provides default sources, sinks and sanitizers for reasoning about + * polynomial regular expression denial-of-service attacks, as well + * as extension points for adding your own. + */ + +import javascript +import SuperlinearBackTracking + +module PolynomialReDoS { + /** + * A data flow source node for polynomial regular expression denial-of-service vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink node for polynomial regular expression denial-of-service vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { + abstract RegExpTerm getRegExp(); + } + + /** + * A sanitizer for polynomial regular expression denial-of-service vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** + * A remote input to a server, seen as a source for polynomial + * regular expression denial-of-service vulnerabilities. + */ + class RequestInputAccessAsSource extends Source { + RequestInputAccessAsSource() { this instanceof HTTP::RequestInputAccess } + } + + /** + * A use of a superlinear backtracking term, seen as a sink for polynomial + * regular expression denial-of-service vulnerabilities. + */ + class PolynomialBackTrackingTermUse extends Sink { + PolynomialBackTrackingTerm term; + + PolynomialBackTrackingTermUse() { + exists(DataFlow::MethodCallNode mcn, DataFlow::Node regexp, string name | + term.getRootTerm() = RegExp::getRegExpFromNode(regexp) + | + this = mcn.getArgument(0) and + regexp = mcn.getReceiver() and + ( + name = "match" or + name = "split" or + name = "matchAll" or + name = "replace" or + name = "search" + ) + or + this = mcn.getReceiver() and + regexp = mcn.getArgument(0) and + (name = "test" or name = "exec") + ) + } + + override RegExpTerm getRegExp() { result = term } + } + + + /** + * An operation that limits the length of a string, seen as a sanitizer. + */ + class StringLengthLimiter extends Sanitizer { + StringLengthLimiter() { + this.(StringReplaceCall).isGlobal() + or + exists(string name | name = "slice" or name = "substring" or name = "substr" | + this.(DataFlow::MethodCallNode).getMethodName() = name + ) + } + } + + /** + * An check on the length of a string, seen as a sanitizer guard. + */ + class LengthGuard extends TaintTracking::SanitizerGuardNode, DataFlow::ValueNode { + DataFlow::Node input; + boolean polarity; + + LengthGuard() { + exists(RelationalComparison cmp, DataFlow::PropRead length | + this.asExpr() = cmp and + length.accesses(input, "length") + | + length.flowsTo(cmp.getLesserOperand().flow()) and polarity = true + or + length.flowsTo(cmp.getGreaterOperand().flow()) and polarity = false + ) + } + + override predicate sanitizes(boolean outcome, Expr e) { + outcome = polarity and + e = input.asExpr() + } + } +} diff --git a/javascript/ql/src/semmle/javascript/security/performance/SuperlinearBackTracking.qll b/javascript/ql/src/semmle/javascript/security/performance/SuperlinearBackTracking.qll new file mode 100644 index 000000000000..d1bdd0bc92c2 --- /dev/null +++ b/javascript/ql/src/semmle/javascript/security/performance/SuperlinearBackTracking.qll @@ -0,0 +1,142 @@ +/** + * Provides classes for working with regular expressions that can + * perform backtracking in superlinear time. + */ + +import javascript + +/** + * A regular expression term that permits unlimited repetitions. + */ +private class InfiniteRepetitionQuantifier extends RegExpQuantifier { + InfiniteRepetitionQuantifier() { + this instanceof RegExpPlus + or + this instanceof RegExpStar + or + this instanceof RegExpRange and not exists(this.(RegExpRange).getUpperBound()) + } +} + +/** + * Holds if `t` matches at least an epsilon symbol. + * + * That is, this term does not restrict the language of the enclosing regular expression. + * + * This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular. + */ +private predicate matchesEpsilon(RegExpTerm t) { + t instanceof RegExpStar + or + t instanceof RegExpOpt + or + t.(RegExpRange).getLowerBound() = 0 + or + exists(RegExpTerm child | + child = t.getAChild() and + matchesEpsilon(child) + | + t instanceof RegExpAlt or + t instanceof RegExpGroup or + t instanceof RegExpPlus or + t instanceof RegExpRange + ) + or + matchesEpsilon(t.(RegExpBackRef).getGroup()) + or + forex(RegExpTerm child | child = t.(RegExpSequence).getAChild() | matchesEpsilon(child)) +} + +/** + * Gets a term that matches the symbol immediately before `t` is done matching. + * + * Examples: + * + * - For `d` in `abc?de` this gets `b`, `c`, `c?` (in addition to `d`). + * - For `(bc|de)` in `a(bc|de)f` this gets `c` and `e` (in addition to `bc|de` and `(bc|de)`). + */ +private RegExpTerm getAMatchPredecessor(RegExpTerm t) { + result = t + or + exists(RegExpTerm recurse | result = getAMatchPredecessor(recurse) | + // wrappers depend on their children + recurse = t.getAChild() and + ( + t instanceof RegExpAlt + or + t instanceof RegExpGroup + or + t instanceof RegExpQuantifier + ) + or + recurse = t.(RegExpSequence).getLastChild() + or + recurse = t.(RegExpBackRef).getGroup() + or + // recurse past epsilon terms + matchesEpsilon(t) and recurse = t.getPredecessor() + ) +} + +private RegExpCharacterClassEscape unwrapCharacterClassEscape(RegExpTerm t) { + t = result or + t.(RegExpCharacterClass).getAChild() = result +} + +pragma[inline] +private predicate compatibleConstants(RegExpTerm t1, RegExpTerm t2) { + exists(string s1, string s2 | + s1 = t1.getAMatchedString() and s2 = t2.getAMatchedString() + or + unwrapCharacterClassEscape(t1).getValue() = s1 and + unwrapCharacterClassEscape(t2).getValue() = s2 + | + s1 = s2 + ) +} + +/** + * Holds if `s1` and `s2` possibly have a non-empty intersection. + * + * This is a simple, and under-approximate, version of + * ReDoS::compatible/2, as this predicate only handles some character + * classes and constant values. + */ +pragma[inline] +private predicate compatible(RegExpTerm s1, RegExpTerm s2) { + not s1.(RegExpCharacterClass).isInverted() and + not s2.(RegExpCharacterClass).isInverted() and + compatibleConstants(s1, s2) +} + +/** + * A term that may cause a regular expression engine to perform a + * polynomial number of match attempts, relative to the input length. + */ +class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier { + string reason; + + PolynomialBackTrackingTerm() { + // the regexp may fail to match ... + exists(RegExpTerm succ | this.getSuccessor+() = succ | not matchesEpsilon(succ)) and + ( + // ... and while failing, it will try to start matching at all positions of a long string + forall(RegExpTerm pred | pred = this.getPredecessor+() | matchesEpsilon(pred)) and + reason = "it can start matching anywhere" + or + exists(InfiniteRepetitionQuantifier pred | + pred = getAMatchPredecessor(this.getPredecessor()) and + compatible(pred.getAChild(), this.getAChild()) + | + reason = "it can start matching anywhere after the start of the preceeding '" + + pred.toString() + "'" + ) + ) and + not this.getParent*() instanceof RegExpSubPattern // too many corner cases + } + + /** + * Gets the reason for the number of match attempts. + */ + string getReason() { result = reason } +} diff --git a/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.expected b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.expected new file mode 100644 index 000000000000..98f5cb00ed4d --- /dev/null +++ b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.expected @@ -0,0 +1,139 @@ +| polynomial-redos.js:7:24:7:26 | \\s+ | it can start matching anywhere | +| polynomial-redos.js:8:17:8:18 | * | it can start matching anywhere | +| polynomial-redos.js:9:19:9:21 | \\s* | it can start matching anywhere | +| polynomial-redos.js:11:19:11:20 | .* | it can start matching anywhere | +| polynomial-redos.js:12:19:12:20 | .* | it can start matching anywhere | +| polynomial-redos.js:15:28:15:35 | [\\s\\S]*? | it can start matching anywhere after the start of the preceeding '\\s*' | +| polynomial-redos.js:18:17:18:22 | [0-9]* | it can start matching anywhere | +| polynomial-redos.js:18:83:18:100 | [\\u0600-\\u06FF\\/]+ | it can start matching anywhere | +| polynomial-redos.js:19:17:19:22 | [0-9]* | it can start matching anywhere | +| polynomial-redos.js:20:56:20:58 | \\d+ | it can start matching anywhere after the start of the preceeding '\\d*' | +| polynomial-redos.js:20:56:20:58 | \\d+ | it can start matching anywhere after the start of the preceeding '\\d+' | +| polynomial-redos.js:22:57:22:59 | \\d+ | it can start matching anywhere after the start of the preceeding '\\d*' | +| polynomial-redos.js:22:57:22:59 | \\d+ | it can start matching anywhere after the start of the preceeding '\\d+' | +| polynomial-redos.js:25:37:25:56 | [a-zA-Z0-9+\\/ \\t\\n]+ | it can start matching anywhere after the start of the preceeding '[ \\t]+' | +| polynomial-redos.js:27:14:27:22 | [A-Z]{2,} | it can start matching anywhere | +| polynomial-redos.js:30:19:30:22 | [?]+ | it can start matching anywhere | +| polynomial-redos.js:31:42:31:43 | -+ | it can start matching anywhere | +| polynomial-redos.js:32:45:32:47 | \\n* | it can start matching anywhere | +| polynomial-redos.js:33:17:33:20 | (.)* | it can start matching anywhere | +| regexplib/address.js:18:26:18:31 | [ \\w]* | it can start matching anywhere after the start of the preceeding '[ \\w]{3,}' | +| regexplib/address.js:20:144:20:147 | [ ]+ | it can start matching anywhere after the start of the preceeding '[a-zA-Z0-9 \\-.]{6,}' | +| regexplib/address.js:24:26:24:31 | [ \\w]* | it can start matching anywhere after the start of the preceeding '[ \\w]{3,}' | +| regexplib/address.js:27:3:27:5 | \\s* | it can start matching anywhere | +| regexplib/address.js:27:48:27:50 | \\s* | it can start matching anywhere | +| regexplib/address.js:27:93:27:95 | \\s* | it can start matching anywhere | +| regexplib/address.js:38:39:38:45 | [ 0-9]* | it can start matching anywhere after the start of the preceeding '[ \|\\.]*' | +| regexplib/address.js:51:235:51:239 | \\x20* | it can start matching anywhere after the start of the preceeding '\\x20*' | +| regexplib/address.js:51:631:51:635 | \\x20* | it can start matching anywhere after the start of the preceeding '\\x20*' | +| regexplib/address.js:51:796:51:798 | \\s+ | it can start matching anywhere after the start of the preceeding '\\s+' | +| regexplib/address.js:67:379:67:755 | [a-zA-Z0-9ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿ\\.\\,\\-\\/\\' ]+ | it can start matching anywhere after the start of the preceeding '[a-zA-Z0-9ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿ\\.\\,\\-\\/\\']+' | +| regexplib/address.js:69:3:69:5 | \\s* | it can start matching anywhere | +| regexplib/address.js:69:48:69:50 | \\s* | it can start matching anywhere | +| regexplib/address.js:69:93:69:95 | \\s* | it can start matching anywhere | +| regexplib/address.js:75:235:75:239 | \\x20* | it can start matching anywhere after the start of the preceeding '\\x20*' | +| regexplib/address.js:75:631:75:635 | \\x20* | it can start matching anywhere after the start of the preceeding '\\x20*' | +| regexplib/address.js:75:796:75:798 | \\s+ | it can start matching anywhere after the start of the preceeding '\\s+' | +| regexplib/address.js:85:15:85:49 | ([0-9]\|[ ]\|[-]\|[\\(]\|[\\)]\|ext.\|[,])+ | it can start matching anywhere | +| regexplib/address.js:85:51:85:67 | ([ ]\|[:]\|\\t\|[-])* | it can start matching anywhere after the start of the preceeding '([0-9]\|[ ]\|[-]\|[\\(]\|[\\)]\|ext.\|[,])+' | +| regexplib/address.js:93:3:93:5 | \\s* | it can start matching anywhere | +| regexplib/address.js:93:48:93:50 | \\s* | it can start matching anywhere | +| regexplib/address.js:93:93:93:95 | \\s* | it can start matching anywhere | +| regexplib/address.js:95:379:95:755 | [a-zA-Z0-9ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿ\\.\\,\\-\\/\\' ]+ | it can start matching anywhere after the start of the preceeding '[a-zA-Z0-9ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿ\\.\\,\\-\\/\\']+' | +| regexplib/email.js:8:16:8:49 | [^ \\t\\(\\)\\<\\>@,;\\:\\\\\\"\\.\\[\\]\\r\\n]+ | it can start matching anywhere | +| regexplib/email.js:12:2:12:4 | \\w+ | it can start matching anywhere | +| regexplib/email.js:15:28:15:30 | \\w* | it can start matching anywhere after the start of the preceeding '\\w+' | +| regexplib/email.js:20:3:20:6 | \\w+? | it can start matching anywhere | +| regexplib/email.js:28:2:28:4 | \\w+ | it can start matching anywhere | +| regexplib/email.js:28:27:28:29 | \\w* | it can start matching anywhere after the start of the preceeding '\\w+' | +| regexplib/email.js:28:73:28:87 | [0-9a-zA-Z'\\.]+ | it can start matching anywhere | +| regexplib/email.js:28:125:28:139 | [0-9a-zA-Z'\\.]+ | it can start matching anywhere | +| regexplib/email.js:29:2:29:7 | [\\w-]+ | it can start matching anywhere | +| regexplib/markup.js:6:99:6:113 | [\\s\\w\\d\\)\\(\\,]* | it can start matching anywhere after the start of the preceeding '[\\d\\w]+' | +| regexplib/markup.js:19:2:19:12 | (+()\\s-]+\|\\*\|\\[.*?\\])+ | it can start matching anywhere | +| tst.js:66:16:66:31 | [\\w#:.~>+()\\s-]+ | it can start matching anywhere | +| tst.js:66:46:66:48 | \\s* | it can start matching anywhere after the start of the preceeding '[\\w#:.~>+()\\s-]+' | +| tst.js:74:14:74:21 | (b\|a?b)* | it can start matching anywhere | +| tst.js:77:14:77:21 | (a\|aa?)* | it can start matching anywhere | +| tst.js:80:14:80:20 | (.\|\\n)* | it can start matching anywhere | +| tst.js:83:14:83:20 | (.\|\\n)* | it can start matching anywhere | +| tst.js:89:25:89:32 | (a\|aa?)* | it can start matching anywhere | +| tst.js:92:14:92:21 | (a\|aa?)* | it can start matching anywhere | diff --git a/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.ql b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.ql new file mode 100644 index 000000000000..592e5aad7a88 --- /dev/null +++ b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.ql @@ -0,0 +1,4 @@ +import semmle.javascript.security.performance.SuperlinearBackTracking + +from PolynomialBackTrackingTerm t +select t, t.getReason() diff --git a/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialReDoS.expected b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialReDoS.expected new file mode 100644 index 000000000000..4d3a9a12da69 --- /dev/null +++ b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialReDoS.expected @@ -0,0 +1,74 @@ +nodes +| polynomial-redos.js:5:6:5:32 | tainted | +| polynomial-redos.js:5:16:5:32 | req.query.tainted | +| polynomial-redos.js:5:16:5:32 | req.query.tainted | +| polynomial-redos.js:7:2:7:8 | tainted | +| polynomial-redos.js:7:2:7:8 | tainted | +| polynomial-redos.js:8:2:8:8 | tainted | +| polynomial-redos.js:8:2:8:8 | tainted | +| polynomial-redos.js:9:2:9:8 | tainted | +| polynomial-redos.js:9:2:9:8 | tainted | +| polynomial-redos.js:11:2:11:8 | tainted | +| polynomial-redos.js:11:2:11:8 | tainted | +| polynomial-redos.js:12:2:12:8 | tainted | +| polynomial-redos.js:12:2:12:8 | tainted | +| polynomial-redos.js:15:2:15:8 | tainted | +| polynomial-redos.js:15:2:15:8 | tainted | +| polynomial-redos.js:18:2:18:8 | tainted | +| polynomial-redos.js:18:2:18:8 | tainted | +| polynomial-redos.js:19:2:19:8 | tainted | +| polynomial-redos.js:19:2:19:8 | tainted | +| polynomial-redos.js:20:2:20:8 | tainted | +| polynomial-redos.js:20:2:20:8 | tainted | +| polynomial-redos.js:25:2:25:8 | tainted | +| polynomial-redos.js:25:2:25:8 | tainted | +| polynomial-redos.js:27:77:27:83 | tainted | +| polynomial-redos.js:27:77:27:83 | tainted | +| polynomial-redos.js:30:2:30:8 | tainted | +| polynomial-redos.js:30:2:30:8 | tainted | +| polynomial-redos.js:33:2:33:8 | tainted | +| polynomial-redos.js:33:2:33:8 | tainted | +edges +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:7:2:7:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:7:2:7:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:8:2:8:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:8:2:8:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:9:2:9:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:9:2:9:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:11:2:11:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:11:2:11:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:12:2:12:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:12:2:12:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:15:2:15:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:15:2:15:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:18:2:18:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:18:2:18:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:19:2:19:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:19:2:19:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:20:2:20:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:20:2:20:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:25:2:25:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:25:2:25:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:27:77:27:83 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:27:77:27:83 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:30:2:30:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:30:2:30:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:33:2:33:8 | tainted | +| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:33:2:33:8 | tainted | +| polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:5:6:5:32 | tainted | +| polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:5:6:5:32 | tainted | +#select +| polynomial-redos.js:7:2:7:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:7:2:7:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:7:24:7:26 | \\s+ | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:8:2:8:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:8:2:8:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:8:17:8:18 | * | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:9:2:9:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:9:2:9:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:9:19:9:21 | \\s* | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:11:2:11:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:11:2:11:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:11:19:11:20 | .* | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:12:2:12:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:12:2:12:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:12:19:12:20 | .* | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:15:2:15:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:15:2:15:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:15:28:15:35 | [\\s\\S]*? | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:18:2:18:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:18:2:18:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:18:17:18:22 | [0-9]* | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:18:2:18:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:18:2:18:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:18:83:18:100 | [\\u0600-\\u06FF\\/]+ | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:19:2:19:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:19:2:19:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:19:17:19:22 | [0-9]* | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:20:2:20:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:20:2:20:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:20:56:20:58 | \\d+ | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:25:2:25:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:25:2:25:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:25:37:25:56 | [a-zA-Z0-9+\\/ \\t\\n]+ | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:27:77:27:83 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:27:77:27:83 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:27:14:27:22 | [A-Z]{2,} | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:30:2:30:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:30:2:30:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:30:19:30:22 | [?]+ | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | +| polynomial-redos.js:33:2:33:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:33:2:33:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:33:17:33:20 | (.)* | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value | diff --git a/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialReDoS.qlref b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialReDoS.qlref new file mode 100644 index 000000000000..e26045522d16 --- /dev/null +++ b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialReDoS.qlref @@ -0,0 +1 @@ +Performance/PolynomialReDoS.ql diff --git a/javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected b/javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected index 6601731045f6..f31ed3e638be 100644 --- a/javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected +++ b/javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected @@ -1,3 +1,5 @@ +| polynomial-redos.js:17:5:17:6 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. | +| polynomial-redos.js:41:52:41:63 | [\\x21-\\x7E]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '?'. | | regexplib/address.js:51:803:51:811 | [A-Za-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'A'. | | regexplib/address.js:75:803:75:811 | [A-Za-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'A'. | | regexplib/dates.js:66:133:66:139 | JANUARY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'JANUARY'. | diff --git a/javascript/ql/test/query-tests/Performance/ReDoS/polynomial-redos.js b/javascript/ql/test/query-tests/Performance/ReDoS/polynomial-redos.js new file mode 100644 index 000000000000..4175bd48a50e --- /dev/null +++ b/javascript/ql/test/query-tests/Performance/ReDoS/polynomial-redos.js @@ -0,0 +1,49 @@ +var express = require('express'); +var app = express(); + +app.use(function(req, res) { + let tainted = req.query.tainted; + + tainted.replace(/^\s+|\s+$/g, ''); // NOT OK + tainted.split(/ *, */); // NOT OK + tainted.replace(/\s*\n\s*/g, ' '); // NOT OK + tainted.split('\n'); // OK + tainted.replace(/.*[/\\]/, ''); // NOT OK + tainted.replace(/.*\./, ''); // NOT OK + tainted.replace(/^.*[/\\]/, ''); // OK + tainted.replace(/^.*\./, ''); // OK + tainted.replace(/^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)/); // NOT OK + tainted.replace(/^(`+)([\s\S]*?[^`])\1(?!`)/); // OK + /^(.*,)+(.+)?$/.test(tainted); // NOT OK - but only flagged by js/redos + tainted.match(/[0-9]*['a-z\u00A0-\u05FF\u0700-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+|[\u0600-\u06FF\/]+(\s*?[\u0600-\u06FF]+){1,2}/i); // NOT OK + tainted.match(/[0-9]*['a-z\u00A0-\u05FF\u0700-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]{1,256}|[\u0600-\u06FF\/]{1,256}(\s*?[\u0600-\u06FF]{1,256}){1,2}/i); // NOT OK (even though it is a proposed fix for the above) + tainted.match(/^(\+|-)?(\d+|(\d*\.\d*))?(E|e)?([-+])?(\d+)?$/); // NOT OK + if (tainted.length < 7000) { + tainted.match(/^(\+|-)?(\d+|(\d*\.\d*))?(E|e)?([-+])?(\d+)?$/); // OK - but flagged + } + + tainted.match(/^([a-z0-9-]+)[ \t]+([a-zA-Z0-9+\/ \t\n]+[=]*)(.*)$/); // NOT OK + tainted.match(/^([a-z0-9-]+)[ \t\n]+([a-zA-Z0-9+\/][a-zA-Z0-9+\/ \t\n=]*)([^a-zA-Z0-9+\/ \t\n=].*)?$/); // OK + /[a-z][A-Z]|[A-Z]{2,}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/.test(tainted); // NOT OK + /[a-z][A-Z]|[A-Z]{2}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/.test(tainted); // OK + + tainted.replace(/[?]+.*$/g, ""); // OK - can not fail - but still flagged + tainted.replace(/\-\-+/g, "-").replace(/-+$/, ""); // OK - indirectly sanitized + tainted.replace(/\n\n\n+/g, "\n").replace(/\n*$/g, ""); // OK - indirectly sanitized + tainted.match(/(.)*solve\/challenges\/server-side(.)*/); // NOT OK + tainted.match(/(?![\s\S]*)/i); // OK + + tainted.match(/<.*class="([^"]+)".*>/); // NOT OK - but not flagged + tainted.match(/<.*style="([^"]+)".*>/); // NOT OK - but not flagged + tainted.match(/<.*href="([^"]+)".*>/); // NOT OK - but not flagged + + tainted.match(/^([^-]+)-([A-Za-z0-9+/]+(?:=?=?))([?\x21-\x7E]*)$/); // NOT OK - but not flagged + tainted.match(/^([^-]+)-([A-Za-z0-9+/=]{44,88})(\?[\x21-\x7E]*)*$/); // NOT OK (it is a fix for the above, but it introduces exponential complexity elsewhere) + + tainted.match(/^([a-z0-9-]+)[ \t]+([a-zA-Z0-9+\/]+[=]*)([\n \t]+([^\n]+))?$/); // NOT OK - but not flagged due to lack of support for inverted character classes + tainted.match(/^([a-z0-9-]+)[ \t]+([a-zA-Z0-9+\/]+[=]*)([ \t]+([^ \t][^\n]*[\n]*)?)?$/); // OK + + tainted.match(/^(?:\.?[a-zA-Z_][a-zA-Z_0-9]*)+$/); // NOT OK - but not flagged + tainted.match(/^(?:\.?[a-zA-Z_][a-zA-Z_0-9]*)(?:\.[a-zA-Z_][a-zA-Z_0-9]*)*$/); // OK + +});