diff --git a/change-notes/1.20/analysis-python.md b/change-notes/1.20/analysis-python.md index ecd6742b0dac..442697b61762 100644 --- a/change-notes/1.20/analysis-python.md +++ b/change-notes/1.20/analysis-python.md @@ -14,6 +14,8 @@ Removes false positives seen when using Python 3.6, but not when using earlier v | **Query** | **Tags** | **Purpose** | |-----------------------------|-----------|--------------------------------------------------------------------| | Default version of SSL/TLS may be insecure (`py/insecure-default-protocol`) | security, external/cwe/cwe-327 | Finds instances where an insecure default protocol may be used. Results are shown on LGTM by default. | +| Incomplete regular expression for hostnames (`py/incomplete-hostname-regexp`) | security, external/cwe/cwe-020 | Finds instances where a hostname is incompletely sanitized because a regular expression contains an unescaped character. Results are shown on LGTM by default. | +| Incomplete URL substring sanitization (`py/incomplete-url-substring-sanitization`) | security, external/cwe/cwe-020 | Finds instances where a URL is incompletely sanitized due to insufficient checks. Results are shown on LGTM by default. | | Overly permissive file permissions (`py/overly-permissive-file`) | security, external/cwe/cwe-732 | Finds instances where a file is created with overly permissive permissions. Results are not shown on LGTM by default. | | Use of insecure SSL/TLS version (`py/insecure-protocol`) | security, external/cwe/cwe-327 | Finds instances where a known insecure protocol has been specified. Results are shown on LGTM by default. | diff --git a/javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qhelp b/javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qhelp index 80f4a8b44fdb..b23557aa557d 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qhelp +++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qhelp @@ -15,9 +15,9 @@

- However, it is notoriously error-prone to treat the URL as - a string and check if one of the allowed hosts is a substring of the - URL. Malicious URLs can bypass such security checks by embedding one + However, treating the URL as a string and checking if one of the + allowed hosts is a substring of the URL is very prone to errors. + Malicious URLs can bypass such security checks by embedding one of the allowed hosts in an unexpected location.

diff --git a/python/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp b/python/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp new file mode 100644 index 000000000000..b542ae252eb1 --- /dev/null +++ b/python/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp @@ -0,0 +1,69 @@ + + + + +

+ + Sanitizing untrusted URLs is an important technique for + preventing attacks such as request forgeries and malicious + redirections. Often, this is done by checking that the host of a URL + is in a set of allowed hosts. + +

+ +

+ + If a regular expression implements such a check, it is + easy to accidentally make the check too permissive by not escaping the + . meta-characters appropriately. + + Even if the check is not used in a security-critical + context, the incomplete check may still cause undesirable behaviors + when it accidentally succeeds. + +

+
+ + +

+ + Escape all meta-characters appropriately when constructing + regular expressions for security checks, pay special attention to the + . meta-character. + +

+
+ + + +

+ + The following example code checks that a URL redirection + will reach the example.com domain, or one of its + subdomains. + +

+ + + +

+ The unsafe check is easy to bypass because the unescaped + . allows for any character before + example.com, effectively allowing the redirect to go to + an attacker-controlled domain such as wwwXexample.com. + +

+

+ The safe check closes this vulnerability by escaping the . + so that URLs of the form wwwXexample.com are rejected. +

+ +
+ + +
  • OWASP: SSRF
  • +
  • OWASP: XSS Unvalidated Redirects and Forwards Cheat Sheet.
  • +
    +
    diff --git a/python/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/python/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql new file mode 100644 index 000000000000..11238db40b42 --- /dev/null +++ b/python/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql @@ -0,0 +1,44 @@ +/** + * @name Incomplete regular expression for hostnames + * @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected. + * @kind problem + * @problem.severity warning + * @precision high + * @id py/incomplete-hostname-regexp + * @tags correctness + * security + * external/cwe/cwe-20 + */ + +import python +import semmle.python.regex + +private string commonTopLevelDomainRegex() { + result = "com|org|edu|gov|uk|net|io" +} + +/** + * Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`, + * and `pattern` contains a subtle mistake that allows it to match unexpected hosts. + */ +bindingset[pattern] +predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) { + hostPart = pattern + .regexpCapture("(?i).*" + + // an unescaped single `.` + "(? + + + +

    + + Sanitizing untrusted URLs is an important technique for + preventing attacks such as request forgeries and malicious + redirections. Usually, this is done by checking that the host of a URL + is in a set of allowed hosts. + +

    + +

    + However, treating the URL as a string and checking if one of the + allowed hosts is a substring of the URL is very prone to errors. + Malicious URLs can bypass such security checks by embedding one + of the allowed hosts in an unexpected location. + +

    + +

    + + Even if the substring check is not used in a + security-critical context, the incomplete check may still cause + undesirable behaviors when the check succeeds accidentally. + +

    +
    + + +

    + + Parse a URL before performing a check on its host value, + and ensure that the check handles arbitrary subdomain sequences + correctly. + +

    +
    + + + +

    + + The following example code checks that a URL redirection + will reach the example.com domain. + +

    + + + +

    + + The first two examples show unsafe checks that are easily bypassed. + In unsafe1 the attacker can simply add + example.com anywhere in the url. For example, + http://evil-example.net/example.com. +

    +

    + In unsafe2 the attacker must use a hostname ending in + example.com, but that is easy to do. For example, + http://benign-looking-prefix-example.com. + +

    + +

    + + The second two examples show safe checks. + In safe1, a white-list is used. Although fairly inflexible, + this is easy to get right and is most likely to be safe. +

    +

    + In safe2, urlparse is used to parse the URL, + then the hostname is checked to make sure it ends with .example.com. +

    + +
    + + +
  • OWASP: SSRF
  • +
  • OWASP: XSS Unvalidated Redirects and Forwards Cheat Sheet.
  • +
    +
    diff --git a/python/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql b/python/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql new file mode 100644 index 000000000000..bc4fdb3d437b --- /dev/null +++ b/python/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql @@ -0,0 +1,58 @@ +/** + * @name Incomplete URL substring sanitization + * @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing. + * @kind problem + * @problem.severity warning + * @precision high + * @id py/incomplete-url-substring-sanitization + * @tags correctness + * security + * external/cwe/cwe-20 + */ + + +import python +import semmle.python.regex + +private string commonTopLevelDomainRegex() { + result = "com|org|edu|gov|uk|net|io" +} + +predicate looksLikeUrl(StrConst s) { + exists(string text | + text = s.getText() + | + text.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(" + + commonTopLevelDomainRegex() +")(:[0-9]+)?/?") + or + // target is a HTTP URL to a domain on any TLD + text.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?") + ) +} + +predicate incomplete_sanitization(Expr sanitizer, StrConst url) { + looksLikeUrl(url) and + ( + sanitizer.(Compare).compares(url, any(In i), _) + or + call_to_startswith(sanitizer, url) + or + unsafe_call_to_endswith(sanitizer, url) + ) +} + +predicate call_to_startswith(Call sanitizer, StrConst url) { + sanitizer.getFunc().(Attribute).getName() = "startswith" + and + sanitizer.getArg(0) = url +} + +predicate unsafe_call_to_endswith(Call sanitizer, StrConst url) { + sanitizer.getFunc().(Attribute).getName() = "endswith" and + sanitizer.getArg(0) = url and + not url.getText().regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+") +} + +from Expr sanitizer, StrConst url +where incomplete_sanitization(sanitizer, url) +select sanitizer, "'$@' may be at an arbitrary position in the sanitized URL.", url, url.getText() diff --git a/python/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.py b/python/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.py new file mode 100644 index 000000000000..957f16fb72b4 --- /dev/null +++ b/python/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.py @@ -0,0 +1,19 @@ +from flask import Flask, request, redirect +import re + +app = Flask(__name__) + +UNSAFE_REGEX = re.compile("(www|beta).example.com/") +SAFE_REGEX = re.compile(r"(www|beta)\.example\.com/") + +@app.route('/some/path/bad') +def unsafe(request): + target = request.args.get('target', '') + if UNSAFE_REGEX.match(target): + return redirect(target) + +@app.route('/some/path/good') +def safe(request): + target = request.args.get('target', '') + if SAFE_REGEX.match(target): + return redirect(target) diff --git a/python/ql/src/Security/CWE-020/examples/IncompleteUrlSubstringSanitization.py b/python/ql/src/Security/CWE-020/examples/IncompleteUrlSubstringSanitization.py new file mode 100644 index 000000000000..937a23f806f3 --- /dev/null +++ b/python/ql/src/Security/CWE-020/examples/IncompleteUrlSubstringSanitization.py @@ -0,0 +1,45 @@ +from flask import Flask, request, redirect +from urllib.parse import urlparse + +app = Flask(__name__) + +# Not safe, as "evil-example.net/example.com" would be accepted + +@app.route('/some/path/bad1') +def unsafe1(request): + target = request.args.get('target', '') + if "example.com" in target: + return redirect(target) + +# Not safe, as "benign-looking-prefix-example.com" would be accepted + +@app.route('/some/path/bad2') +def unsafe2(request): + target = request.args.get('target', '') + if target.endswith("example.com"): + return redirect(target) + + + +#Simplest and safest approach is to use a white-list + +@app.route('/some/path/good1') +def safe1(request): + whitelist = [ + "example.com/home", + "example.com/login", + ] + target = request.args.get('target', '') + if target in whitelist: + return redirect(target) + +#More complex example allowing sub-domains. + +@app.route('/some/path/good2') +def safe2(request): + target = request.args.get('target', '') + host = urlparse(target).hostname + #Note the '.' preceding example.com + if host and host.endswith(".example.com"): + return redirect(target) + diff --git a/python/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/python/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected new file mode 100644 index 000000000000..00e0fcb95a0e --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected @@ -0,0 +1 @@ +| hosttest.py:6:27:6:51 | Str | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | diff --git a/python/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.qlref b/python/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.qlref new file mode 100644 index 000000000000..e818d9472521 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.qlref @@ -0,0 +1 @@ +Security/CWE-020/IncompleteHostnameRegExp.ql \ No newline at end of file diff --git a/python/ql/test/query-tests/Security/CWE-020/IncompleteUrlSubstringSanitization.expected b/python/ql/test/query-tests/Security/CWE-020/IncompleteUrlSubstringSanitization.expected new file mode 100644 index 000000000000..4b2ba67ecdab --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-020/IncompleteUrlSubstringSanitization.expected @@ -0,0 +1,2 @@ +| urltest.py:9:8:9:30 | Compare | '$@' may be at an arbitrary position in the sanitized URL. | urltest.py:9:8:9:20 | Str | example.com | +| urltest.py:15:8:15:37 | Attribute() | '$@' may be at an arbitrary position in the sanitized URL. | urltest.py:15:24:15:36 | Str | example.com | diff --git a/python/ql/test/query-tests/Security/CWE-020/IncompleteUrlSubstringSanitization.qlref b/python/ql/test/query-tests/Security/CWE-020/IncompleteUrlSubstringSanitization.qlref new file mode 100644 index 000000000000..3fa6794419d7 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-020/IncompleteUrlSubstringSanitization.qlref @@ -0,0 +1 @@ +Security/CWE-020/IncompleteUrlSubstringSanitization.ql \ No newline at end of file diff --git a/python/ql/test/query-tests/Security/CWE-020/hosttest.py b/python/ql/test/query-tests/Security/CWE-020/hosttest.py new file mode 100644 index 000000000000..957f16fb72b4 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-020/hosttest.py @@ -0,0 +1,19 @@ +from flask import Flask, request, redirect +import re + +app = Flask(__name__) + +UNSAFE_REGEX = re.compile("(www|beta).example.com/") +SAFE_REGEX = re.compile(r"(www|beta)\.example\.com/") + +@app.route('/some/path/bad') +def unsafe(request): + target = request.args.get('target', '') + if UNSAFE_REGEX.match(target): + return redirect(target) + +@app.route('/some/path/good') +def safe(request): + target = request.args.get('target', '') + if SAFE_REGEX.match(target): + return redirect(target) diff --git a/python/ql/test/query-tests/Security/CWE-020/urltest.py b/python/ql/test/query-tests/Security/CWE-020/urltest.py new file mode 100644 index 000000000000..d1f629794600 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-020/urltest.py @@ -0,0 +1,41 @@ +from flask import Flask, request, redirect +from urllib.parse import urlparse + +app = Flask(__name__) + +@app.route('/some/path/bad1') +def unsafe1(request): + target = request.args.get('target', '') + if "example.com" in target: + return redirect(target) + +@app.route('/some/path/bad2') +def unsafe2(request): + target = request.args.get('target', '') + if target.endswith("example.com"): + return redirect(target) + + + +#Simplest and safest approach is to use a white-list + +@app.route('/some/path/good1') +def safe1(request): + whitelist = [ + "example.com/home", + "example.com/login", + ] + target = request.args.get('target', '') + if target in whitelist: + return redirect(target) + +#More complex example allowing sub-domains. + +@app.route('/some/path/good2') +def safe2(request): + target = request.args.get('target', '') + host = urlparse(target).hostname + #Note the '.' preceding example.com + if host and host.endswith(".example.com"): + return redirect(target) +