diff --git a/python/ql/src/semmle/python/regex.qll b/python/ql/src/semmle/python/regex.qll index 652c89147a29..913695b2acd2 100644 --- a/python/ql/src/semmle/python/regex.qll +++ b/python/ql/src/semmle/python/regex.qll @@ -68,7 +68,8 @@ abstract class RegexString extends Expr { /** Whether there is a character class, between start (inclusive) and end (exclusive) */ predicate charSet(int start, int end) { exists(int inner_start, int inner_end | - this.char_set_start(start, inner_start) | + this.char_set_start(start, inner_start) and + not this.char_set_start(_, start) | end = inner_end + 1 and inner_end > inner_start and this.nonEscapedCharAt(inner_end) = "]" and not exists(int mid | this.nonEscapedCharAt(mid) = "]" | diff --git a/python/ql/test/library-tests/regex/Characters.expected b/python/ql/test/library-tests/regex/Characters.expected index f490522dc4c4..61d13b7bf59b 100644 --- a/python/ql/test/library-tests/regex/Characters.expected +++ b/python/ql/test/library-tests/regex/Characters.expected @@ -56,6 +56,11 @@ | \\A[+-]?\\d+ | 3 | 4 | | \\A[+-]?\\d+ | 4 | 5 | | \\A[+-]?\\d+ | 7 | 9 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | 0 | 2 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | 12 | 13 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | 16 | 18 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | 18 | 20 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | 30 | 31 | | \\\|\\[\\][123]\|\\{\\} | 0 | 2 | | \\\|\\[\\][123]\|\\{\\} | 2 | 4 | | \\\|\\[\\][123]\|\\{\\} | 4 | 6 | diff --git a/python/ql/test/library-tests/regex/FirstLast.expected b/python/ql/test/library-tests/regex/FirstLast.expected index 52ec6df203a7..cdea10fac051 100644 --- a/python/ql/test/library-tests/regex/FirstLast.expected +++ b/python/ql/test/library-tests/regex/FirstLast.expected @@ -45,6 +45,9 @@ | \\A[+-]?\\d+ | first | 0 | 2 | | \\A[+-]?\\d+ | last | 7 | 9 | | \\A[+-]?\\d+ | last | 7 | 10 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | first | 0 | 2 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | last | 28 | 32 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | last | 28 | 33 | | \\\|\\[\\][123]\|\\{\\} | first | 0 | 2 | | \\\|\\[\\][123]\|\\{\\} | first | 12 | 14 | | \\\|\\[\\][123]\|\\{\\} | last | 6 | 11 | diff --git a/python/ql/test/library-tests/regex/GroupContents.expected b/python/ql/test/library-tests/regex/GroupContents.expected index 708579303024..c7c4ac97a1e3 100644 --- a/python/ql/test/library-tests/regex/GroupContents.expected +++ b/python/ql/test/library-tests/regex/GroupContents.expected @@ -10,7 +10,9 @@ | (?P[\\w]+)\| | 0 | 15 | (?P[\\w]+) | 9 | 14 | [\\w]+ | | (?m)^(?!$) | 5 | 10 | (?!$) | 8 | 9 | $ | | (\\033\|~{) | 0 | 9 | (\\033\|~{) | 1 | 8 | \\033\|~{ | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | 2 | 16 | (?P[^[]*) | 10 | 15 | [^[]* | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | 20 | 34 | (?P[^)]*) | 28 | 33 | [^)]* | | ^(^y\|^z)(u$\|v$)$ | 1 | 8 | (^y\|^z) | 2 | 7 | ^y\|^z | | ^(^y\|^z)(u$\|v$)$ | 8 | 15 | (u$\|v$) | 9 | 14 | u$\|v$ | | ^[A-Z_]+$(?[\\w]+)\| | 9 | 14 | false | | \\A[+-]?\\d+ | 2 | 7 | true | | \\A[+-]?\\d+ | 7 | 10 | false | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | 10 | 15 | true | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | 28 | 33 | true | | ^[A-Z_]+$(?[^[]*)\\]\\((?P[^)]*) | char | 0 | 2 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | char | 12 | 13 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | char | 16 | 18 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | char | 18 | 20 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | char | 30 | 31 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | char-set | 10 | 14 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | char-set | 28 | 32 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | non-empty group | 2 | 16 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | non-empty group | 20 | 34 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | qualified | 10 | 15 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | qualified | 28 | 33 | +| \\[(?P[^[]*)\\]\\((?P[^)]*) | sequence | 0 | 34 | | \\\|\\[\\][123]\|\\{\\} | char | 0 | 2 | | \\\|\\[\\][123]\|\\{\\} | char | 2 | 4 | | \\\|\\[\\][123]\|\\{\\} | char | 4 | 6 | diff --git a/python/ql/test/library-tests/regex/test.py b/python/ql/test/library-tests/regex/test.py index 44b2825dcc34..6e57b9f7c716 100644 --- a/python/ql/test/library-tests/regex/test.py +++ b/python/ql/test/library-tests/regex/test.py @@ -57,3 +57,6 @@ #Named group with caret and empty choice. re.compile(r'(?:(?P^(?:|x)))') + +#Misparsed on LGTM +re.compile(r"\[(?P[^[]*)\]\((?P[^)]*)") diff --git a/python/ql/test/query-tests/Expressions/Regex/test.py b/python/ql/test/query-tests/Expressions/Regex/test.py index 535765573944..f0967dc6eefe 100644 --- a/python/ql/test/query-tests/Expressions/Regex/test.py +++ b/python/ql/test/query-tests/Expressions/Regex/test.py @@ -136,3 +136,6 @@ #Named group with caret and empty choice. re.compile(r'(?:(?P^(?:|x)))') + +#Potentially mis-parsed character set +re.compile(r"\[(?P[^[]*)\]\((?P[^)]*)")