From e4b83855d9097c60e4784358809e5da4a97c8b4b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 19 Feb 2020 15:11:36 +0100 Subject: [PATCH 1/9] Python: Autoformat security/strings/External.qll --- .../python/security/strings/External.qll | 72 +++++++------------ 1 file changed, 25 insertions(+), 47 deletions(-) diff --git a/python/ql/src/semmle/python/security/strings/External.qll b/python/ql/src/semmle/python/security/strings/External.qll index bd567f54f24f..4dcbe0d17e3c 100644 --- a/python/ql/src/semmle/python/security/strings/External.qll +++ b/python/ql/src/semmle/python/security/strings/External.qll @@ -2,45 +2,36 @@ import python import Basic private import Common -/** An extensible kind of taint representing an externally controlled string. +/** + * An extensible kind of taint representing an externally controlled string. */ abstract class ExternalStringKind extends StringKind { - bindingset[this] - ExternalStringKind() { - this = this - } + ExternalStringKind() { this = this } override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { result = StringKind.super.getTaintForFlowStep(fromnode, tonode) or - tonode.(SequenceNode).getElement(_) = fromnode and result.(ExternalStringSequenceKind).getItem() = this + tonode.(SequenceNode).getElement(_) = fromnode and + result.(ExternalStringSequenceKind).getItem() = this or json_load(fromnode, tonode) and result.(ExternalJsonKind).getValue() = this or tonode.(DictNode).getAValue() = fromnode and result.(ExternalStringDictKind).getValue() = this } - } /** A kind of "taint", representing a sequence, with a "taint" member */ class ExternalStringSequenceKind extends SequenceKind { - - ExternalStringSequenceKind() { - this.getItem() instanceof ExternalStringKind - } - + ExternalStringSequenceKind() { this.getItem() instanceof ExternalStringKind } } -/** An hierachical dictionary or list where the entire structure is externally controlled +/** + * An hierachical dictionary or list where the entire structure is externally controlled * This is typically a parsed JSON object. */ class ExternalJsonKind extends TaintKind { - - ExternalJsonKind() { - this = "json[" + any(ExternalStringKind key) + "]" - } - + ExternalJsonKind() { this = "json[" + any(ExternalStringKind key) + "]" } /** Gets the taint kind for item in this sequence */ TaintKind getValue() { @@ -54,65 +45,52 @@ class ExternalJsonKind extends TaintKind { json_subscript_taint(tonode, fromnode, this, result) or result = this and copy_call(fromnode, tonode) - } + } override TaintKind getTaintOfMethodResult(string name) { name = "get" and result = this.getValue() - } - + } } /** A kind of "taint", representing a dictionary mapping str->"taint" */ class ExternalStringDictKind extends DictKind { - - ExternalStringDictKind() { - this.getValue() instanceof ExternalStringKind - } - + ExternalStringDictKind() { this.getValue() instanceof ExternalStringKind } } -/** A kind of "taint", representing a dictionary mapping strings to sequences of - * tainted strings */ - +/** + * A kind of "taint", representing a dictionary mapping strings to sequences of + * tainted strings + */ class ExternalStringSequenceDictKind extends DictKind { - ExternalStringSequenceDictKind() { - this.getValue() instanceof ExternalStringSequenceKind - } + ExternalStringSequenceDictKind() { this.getValue() instanceof ExternalStringSequenceKind } } /* Helper for getTaintForStep() */ -pragma [noinline] -private predicate json_subscript_taint(SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key) { +pragma[noinline] +private predicate json_subscript_taint( + SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key +) { sub.isLoad() and sub.getValue() = obj and key = seq.getValue() } - private predicate json_load(ControlFlowNode fromnode, CallNode tonode) { exists(FunctionObject json_loads | ModuleObject::named("json").attr("loads") = json_loads and - json_loads.getACall() = tonode and tonode.getArg(0) = fromnode + json_loads.getACall() = tonode and + tonode.getArg(0) = fromnode ) } /** A kind of "taint", representing an open file-like object from an external source. */ class ExternalFileObject extends TaintKind { - - ExternalFileObject() { - this = "file[" + any(ExternalStringKind key) + "]" - } - + ExternalFileObject() { this = "file[" + any(ExternalStringKind key) + "]" } /** Gets the taint kind for the contents of this file */ - TaintKind getValue() { - this = "file[" + result + "]" - } + TaintKind getValue() { this = "file[" + result + "]" } override TaintKind getTaintOfMethodResult(string name) { name = "read" and result = this.getValue() } - } - - From 74345b1c05e496d4eb711f031b0890981841e77e Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 19 Feb 2020 15:33:53 +0100 Subject: [PATCH 2/9] Python: Make library-tests/taint/strings tests more transparent Following the setup I invented for library-tests/taint/unpacking. TestStep is still a bit annoying, since the output is not easy to eyeball; but for now I guess we can live with it :) I honestly didn't get the point of DistinctStringKinds.ql, other than showing we can handle multiple taint kinds --- .../strings/DistinctStringKinds.expected | 16 ---- .../taint/strings/DistinctStringKinds.ql | 37 -------- .../library-tests/taint/strings/Taint.qll | 16 +++- .../taint/strings/TestNode.expected | 43 ---------- .../library-tests/taint/strings/TestNode.ql | 8 -- .../taint/strings/TestStep.expected | 84 +++++++++++++------ .../taint/strings/TestTaint.expected | 22 +++++ .../library-tests/taint/strings/TestTaint.ql | 18 ++++ .../test/library-tests/taint/strings/test.py | 27 +++--- 9 files changed, 129 insertions(+), 142 deletions(-) delete mode 100644 python/ql/test/library-tests/taint/strings/DistinctStringKinds.expected delete mode 100644 python/ql/test/library-tests/taint/strings/DistinctStringKinds.ql delete mode 100644 python/ql/test/library-tests/taint/strings/TestNode.expected delete mode 100644 python/ql/test/library-tests/taint/strings/TestNode.ql create mode 100644 python/ql/test/library-tests/taint/strings/TestTaint.expected create mode 100644 python/ql/test/library-tests/taint/strings/TestTaint.ql diff --git a/python/ql/test/library-tests/taint/strings/DistinctStringKinds.expected b/python/ql/test/library-tests/taint/strings/DistinctStringKinds.expected deleted file mode 100644 index e7c6590cb50a..000000000000 --- a/python/ql/test/library-tests/taint/strings/DistinctStringKinds.expected +++ /dev/null @@ -1,16 +0,0 @@ -| Taint exception.info | test.py:41 | test.py:41:22:41:26 | taint | p1 = exception.info | -| Taint exception.info | test.py:42 | test.py:42:12:42:22 | func() | p1 = exception.info | -| Taint exception.info | test.py:42 | test.py:42:17:42:21 | taint | p1 = exception.info | -| Taint exception.info | test.py:45 | test.py:45:12:45:33 | TAINTED_EXCEPTION_INFO | | -| Taint exception.info | test.py:46 | test.py:46:11:46:41 | cross_over() | | -| Taint exception.info | test.py:46 | test.py:46:37:46:40 | info | | -| Taint exception.info | test.py:48 | test.py:48:19:48:21 | arg | p0 = exception.info | -| Taint exception.info | test.py:49 | test.py:49:12:49:14 | arg | p0 = exception.info | -| Taint externally controlled string | test.py:41 | test.py:41:22:41:26 | taint | p1 = externally controlled string | -| Taint externally controlled string | test.py:42 | test.py:42:12:42:22 | func() | p1 = externally controlled string | -| Taint externally controlled string | test.py:42 | test.py:42:17:42:21 | taint | p1 = externally controlled string | -| Taint externally controlled string | test.py:48 | test.py:48:19:48:21 | arg | p0 = externally controlled string | -| Taint externally controlled string | test.py:49 | test.py:49:12:49:14 | arg | p0 = externally controlled string | -| Taint externally controlled string | test.py:52 | test.py:52:11:52:33 | TAINTED_EXTERNAL_STRING | | -| Taint externally controlled string | test.py:53 | test.py:53:11:53:41 | cross_over() | | -| Taint externally controlled string | test.py:53 | test.py:53:38:53:40 | ext | | diff --git a/python/ql/test/library-tests/taint/strings/DistinctStringKinds.ql b/python/ql/test/library-tests/taint/strings/DistinctStringKinds.ql deleted file mode 100644 index a1bf5ea5e1f6..000000000000 --- a/python/ql/test/library-tests/taint/strings/DistinctStringKinds.ql +++ /dev/null @@ -1,37 +0,0 @@ -import python -import semmle.python.security.TaintTracking - -import semmle.python.security.Exceptions -import semmle.python.security.strings.Untrusted - - -class ExceptionInfoSource extends TaintSource { - - ExceptionInfoSource() { this.(NameNode).getId() = "TAINTED_EXCEPTION_INFO" } - - override predicate isSourceOf(TaintKind kind) { - kind instanceof ExceptionInfo - } - - override string toString() { - result = "Exception info source" - } - -} - -class ExternalStringSource extends TaintSource { - - ExternalStringSource() { this.(NameNode).getId() = "TAINTED_EXTERNAL_STRING" } - - override predicate isSourceOf(TaintKind kind) { - kind instanceof ExternalStringKind - } - - override string toString() { - result = "Untrusted string source" - } - -} -from TaintedNode n -where n.getLocation().getFile().getShortName() = "test.py" -select "Taint " + n.getTaintKind(), n.getLocation().toString(), n.getAstNode(), n.getContext() diff --git a/python/ql/test/library-tests/taint/strings/Taint.qll b/python/ql/test/library-tests/taint/strings/Taint.qll index b5b2fb61a5e2..d045534e0ef5 100644 --- a/python/ql/test/library-tests/taint/strings/Taint.qll +++ b/python/ql/test/library-tests/taint/strings/Taint.qll @@ -1,11 +1,12 @@ import python import semmle.python.security.TaintTracking import semmle.python.security.strings.Untrusted +import semmle.python.security.Exceptions class SimpleSource extends TaintSource { - SimpleSource() { this.(NameNode).getId() = "TAINTED" } + SimpleSource() { this.(NameNode).getId() = "TAINTED_STRING" } override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringKind @@ -46,3 +47,16 @@ class DictSource extends TaintSource { } +class ExceptionInfoSource extends TaintSource { + + ExceptionInfoSource() { this.(NameNode).getId() = "TAINTED_EXCEPTION_INFO" } + + override predicate isSourceOf(TaintKind kind) { + kind instanceof ExceptionInfo + } + + override string toString() { + result = "Exception info source" + } + +} diff --git a/python/ql/test/library-tests/taint/strings/TestNode.expected b/python/ql/test/library-tests/taint/strings/TestNode.expected deleted file mode 100644 index 3e248be5f897..000000000000 --- a/python/ql/test/library-tests/taint/strings/TestNode.expected +++ /dev/null @@ -1,43 +0,0 @@ -| Taint externally controlled string | test.py:5 | test.py:5:22:5:28 | TAINTED | | -| Taint externally controlled string | test.py:6 | test.py:6:31:6:44 | tainted_string | | -| Taint externally controlled string | test.py:7 | test.py:7:9:7:25 | Subscript | | -| Taint externally controlled string | test.py:8 | test.py:8:9:8:9 | a | | -| Taint externally controlled string | test.py:8 | test.py:8:9:8:18 | Attribute() | | -| Taint externally controlled string | test.py:9 | test.py:9:9:9:9 | b | | -| Taint externally controlled string | test.py:9 | test.py:9:9:9:14 | Subscript | | -| Taint externally controlled string | test.py:12 | test.py:12:22:12:28 | TAINTED | | -| Taint externally controlled string | test.py:13 | test.py:13:9:13:22 | tainted_string | | -| Taint externally controlled string | test.py:13 | test.py:13:9:13:31 | Attribute() | | -| Taint externally controlled string | test.py:14 | test.py:14:9:14:22 | tainted_string | | -| Taint externally controlled string | test.py:14 | test.py:14:9:14:29 | Attribute() | | -| Taint externally controlled string | test.py:15 | test.py:15:9:15:22 | tainted_string | | -| Taint externally controlled string | test.py:15 | test.py:15:9:15:25 | Subscript | | -| Taint externally controlled string | test.py:16 | test.py:16:9:16:22 | tainted_string | | -| Taint externally controlled string | test.py:16 | test.py:16:9:16:27 | Subscript | | -| Taint externally controlled string | test.py:17 | test.py:17:9:17:32 | reversed() | | -| Taint externally controlled string | test.py:17 | test.py:17:18:17:31 | tainted_string | | -| Taint externally controlled string | test.py:18 | test.py:18:9:18:28 | copy() | | -| Taint externally controlled string | test.py:18 | test.py:18:14:18:27 | tainted_string | | -| Taint externally controlled string | test.py:19 | test.py:19:9:19:22 | tainted_string | | -| Taint externally controlled string | test.py:19 | test.py:19:9:19:30 | Attribute() | | -| Taint externally controlled string | test.py:22 | test.py:22:22:22:28 | TAINTED | | -| Taint externally controlled string | test.py:23 | test.py:23:8:23:21 | tainted_string | | -| Taint externally controlled string | test.py:26 | test.py:26:23:26:36 | tainted_string | | -| Taint externally controlled string | test.py:29 | test.py:29:22:29:28 | TAINTED | | -| Taint externally controlled string | test.py:30 | test.py:30:8:30:21 | tainted_string | | -| Taint externally controlled string | test.py:30 | test.py:30:34:30:47 | tainted_string | | -| Taint externally controlled string | test.py:33 | test.py:33:23:33:36 | tainted_string | | -| Taint externally controlled string | test.py:36 | test.py:36:22:36:28 | TAINTED | | -| Taint externally controlled string | test.py:37 | test.py:37:9:37:27 | str() | | -| Taint externally controlled string | test.py:37 | test.py:37:13:37:26 | tainted_string | | -| Taint externally controlled string | test.py:38 | test.py:38:9:38:29 | bytes() | | -| Taint externally controlled string | test.py:38 | test.py:38:15:38:28 | tainted_string | | -| Taint externally controlled string | test.py:39 | test.py:39:9:39:46 | bytes() | | -| Taint externally controlled string | test.py:39 | test.py:39:15:39:28 | tainted_string | | -| Taint json[externally controlled string] | test.py:6 | test.py:6:20:6:45 | Attribute() | | -| Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | -| Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:25 | Subscript | | -| Taint json[externally controlled string] | test.py:8 | test.py:8:9:8:9 | a | | -| Taint json[externally controlled string] | test.py:8 | test.py:8:9:8:18 | Attribute() | | -| Taint json[externally controlled string] | test.py:9 | test.py:9:9:9:9 | b | | -| Taint json[externally controlled string] | test.py:9 | test.py:9:9:9:14 | Subscript | | diff --git a/python/ql/test/library-tests/taint/strings/TestNode.ql b/python/ql/test/library-tests/taint/strings/TestNode.ql deleted file mode 100644 index 23b6643f86a1..000000000000 --- a/python/ql/test/library-tests/taint/strings/TestNode.ql +++ /dev/null @@ -1,8 +0,0 @@ -import python -import semmle.python.security.TaintTracking -import Taint - - -from TaintedNode n -where n.getLocation().getFile().getShortName() = "test.py" -select "Taint " + n.getTaintKind(), n.getLocation().toString(), n.getCfgNode().getNode(), n.getContext() diff --git a/python/ql/test/library-tests/taint/strings/TestStep.expected b/python/ql/test/library-tests/taint/strings/TestStep.expected index 17373b5db798..eccb3efa010f 100644 --- a/python/ql/test/library-tests/taint/strings/TestStep.expected +++ b/python/ql/test/library-tests/taint/strings/TestStep.expected @@ -1,38 +1,70 @@ -| Taint externally controlled string | test.py:5 | test.py:5:22:5:28 | TAINTED | | --> | Taint externally controlled string | test.py:6 | test.py:6:31:6:44 | tainted_string | | +| Taint exception.info | test.py:44 | test.py:44:22:44:26 | taint | p1 = exception.info | --> | Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info | +| Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info | --> | Taint exception.info | test.py:45 | test.py:45:12:45:22 | func() | p1 = exception.info | +| Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info | --> | Taint exception.info | test.py:52 | test.py:52:19:52:21 | arg | p0 = exception.info | +| Taint exception.info | test.py:48 | test.py:48:12:48:33 | TAINTED_EXCEPTION_INFO | | --> | Taint exception.info | test.py:49 | test.py:49:37:49:40 | info | | +| Taint exception.info | test.py:49 | test.py:49:11:49:41 | cross_over() | | --> | Taint exception.info | test.py:50 | test.py:50:10:50:12 | res | | +| Taint exception.info | test.py:49 | test.py:49:37:49:40 | info | | --> | Taint exception.info | test.py:44 | test.py:44:22:44:26 | taint | p1 = exception.info | +| Taint exception.info | test.py:49 | test.py:49:37:49:40 | info | | --> | Taint exception.info | test.py:49 | test.py:49:11:49:41 | cross_over() | | +| Taint exception.info | test.py:52 | test.py:52:19:52:21 | arg | p0 = exception.info | --> | Taint exception.info | test.py:53 | test.py:53:12:53:14 | arg | p0 = exception.info | +| Taint externally controlled string | test.py:5 | test.py:5:22:5:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:6 | test.py:6:31:6:44 | tainted_string | | | Taint externally controlled string | test.py:6 | test.py:6:31:6:44 | tainted_string | | --> | Taint json[externally controlled string] | test.py:6 | test.py:6:20:6:45 | Attribute() | | | Taint externally controlled string | test.py:7 | test.py:7:9:7:25 | Subscript | | --> | Taint externally controlled string | test.py:8 | test.py:8:9:8:9 | a | | +| Taint externally controlled string | test.py:7 | test.py:7:9:7:25 | Subscript | | --> | Taint externally controlled string | test.py:10 | test.py:10:10:10:10 | a | | | Taint externally controlled string | test.py:8 | test.py:8:9:8:18 | Attribute() | | --> | Taint externally controlled string | test.py:9 | test.py:9:9:9:9 | b | | -| Taint externally controlled string | test.py:12 | test.py:12:22:12:28 | TAINTED | | --> | Taint externally controlled string | test.py:13 | test.py:13:9:13:22 | tainted_string | | -| Taint externally controlled string | test.py:12 | test.py:12:22:12:28 | TAINTED | | --> | Taint externally controlled string | test.py:14 | test.py:14:9:14:22 | tainted_string | | -| Taint externally controlled string | test.py:12 | test.py:12:22:12:28 | TAINTED | | --> | Taint externally controlled string | test.py:15 | test.py:15:9:15:22 | tainted_string | | -| Taint externally controlled string | test.py:12 | test.py:12:22:12:28 | TAINTED | | --> | Taint externally controlled string | test.py:16 | test.py:16:9:16:22 | tainted_string | | -| Taint externally controlled string | test.py:12 | test.py:12:22:12:28 | TAINTED | | --> | Taint externally controlled string | test.py:17 | test.py:17:18:17:31 | tainted_string | | -| Taint externally controlled string | test.py:12 | test.py:12:22:12:28 | TAINTED | | --> | Taint externally controlled string | test.py:18 | test.py:18:14:18:27 | tainted_string | | -| Taint externally controlled string | test.py:12 | test.py:12:22:12:28 | TAINTED | | --> | Taint externally controlled string | test.py:19 | test.py:19:9:19:22 | tainted_string | | -| Taint externally controlled string | test.py:13 | test.py:13:9:13:22 | tainted_string | | --> | Taint externally controlled string | test.py:13 | test.py:13:9:13:31 | Attribute() | | -| Taint externally controlled string | test.py:14 | test.py:14:9:14:22 | tainted_string | | --> | Taint externally controlled string | test.py:14 | test.py:14:9:14:29 | Attribute() | | -| Taint externally controlled string | test.py:15 | test.py:15:9:15:22 | tainted_string | | --> | Taint externally controlled string | test.py:15 | test.py:15:9:15:25 | Subscript | | -| Taint externally controlled string | test.py:16 | test.py:16:9:16:22 | tainted_string | | --> | Taint externally controlled string | test.py:16 | test.py:16:9:16:27 | Subscript | | -| Taint externally controlled string | test.py:17 | test.py:17:18:17:31 | tainted_string | | --> | Taint externally controlled string | test.py:17 | test.py:17:9:17:32 | reversed() | | -| Taint externally controlled string | test.py:18 | test.py:18:14:18:27 | tainted_string | | --> | Taint externally controlled string | test.py:18 | test.py:18:9:18:28 | copy() | | -| Taint externally controlled string | test.py:19 | test.py:19:9:19:22 | tainted_string | | --> | Taint externally controlled string | test.py:19 | test.py:19:9:19:30 | Attribute() | | -| Taint externally controlled string | test.py:22 | test.py:22:22:22:28 | TAINTED | | --> | Taint externally controlled string | test.py:23 | test.py:23:8:23:21 | tainted_string | | -| Taint externally controlled string | test.py:22 | test.py:22:22:22:28 | TAINTED | | --> | Taint externally controlled string | test.py:26 | test.py:26:23:26:36 | tainted_string | | -| Taint externally controlled string | test.py:29 | test.py:29:22:29:28 | TAINTED | | --> | Taint externally controlled string | test.py:30 | test.py:30:8:30:21 | tainted_string | | -| Taint externally controlled string | test.py:29 | test.py:29:22:29:28 | TAINTED | | --> | Taint externally controlled string | test.py:30 | test.py:30:34:30:47 | tainted_string | | -| Taint externally controlled string | test.py:29 | test.py:29:22:29:28 | TAINTED | | --> | Taint externally controlled string | test.py:33 | test.py:33:23:33:36 | tainted_string | | -| Taint externally controlled string | test.py:36 | test.py:36:22:36:28 | TAINTED | | --> | Taint externally controlled string | test.py:37 | test.py:37:13:37:26 | tainted_string | | -| Taint externally controlled string | test.py:36 | test.py:36:22:36:28 | TAINTED | | --> | Taint externally controlled string | test.py:38 | test.py:38:15:38:28 | tainted_string | | -| Taint externally controlled string | test.py:36 | test.py:36:22:36:28 | TAINTED | | --> | Taint externally controlled string | test.py:39 | test.py:39:15:39:28 | tainted_string | | -| Taint externally controlled string | test.py:37 | test.py:37:13:37:26 | tainted_string | | --> | Taint externally controlled string | test.py:37 | test.py:37:9:37:27 | str() | | -| Taint externally controlled string | test.py:38 | test.py:38:15:38:28 | tainted_string | | --> | Taint externally controlled string | test.py:38 | test.py:38:9:38:29 | bytes() | | -| Taint externally controlled string | test.py:39 | test.py:39:15:39:28 | tainted_string | | --> | Taint externally controlled string | test.py:39 | test.py:39:9:39:46 | bytes() | | +| Taint externally controlled string | test.py:8 | test.py:8:9:8:18 | Attribute() | | --> | Taint externally controlled string | test.py:10 | test.py:10:13:10:13 | b | | +| Taint externally controlled string | test.py:9 | test.py:9:9:9:14 | Subscript | | --> | Taint externally controlled string | test.py:10 | test.py:10:16:10:16 | c | | +| Taint externally controlled string | test.py:13 | test.py:13:22:13:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:14 | test.py:14:9:14:22 | tainted_string | | +| Taint externally controlled string | test.py:13 | test.py:13:22:13:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:15 | test.py:15:9:15:22 | tainted_string | | +| Taint externally controlled string | test.py:13 | test.py:13:22:13:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:16 | test.py:16:9:16:22 | tainted_string | | +| Taint externally controlled string | test.py:13 | test.py:13:22:13:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:17 | test.py:17:9:17:22 | tainted_string | | +| Taint externally controlled string | test.py:13 | test.py:13:22:13:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:18 | test.py:18:18:18:31 | tainted_string | | +| Taint externally controlled string | test.py:13 | test.py:13:22:13:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:19 | test.py:19:14:19:27 | tainted_string | | +| Taint externally controlled string | test.py:13 | test.py:13:22:13:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:20 | test.py:20:9:20:22 | tainted_string | | +| Taint externally controlled string | test.py:14 | test.py:14:9:14:22 | tainted_string | | --> | Taint externally controlled string | test.py:14 | test.py:14:9:14:31 | Attribute() | | +| Taint externally controlled string | test.py:14 | test.py:14:9:14:31 | Attribute() | | --> | Taint externally controlled string | test.py:21 | test.py:21:10:21:10 | a | | +| Taint externally controlled string | test.py:15 | test.py:15:9:15:22 | tainted_string | | --> | Taint externally controlled string | test.py:15 | test.py:15:9:15:29 | Attribute() | | +| Taint externally controlled string | test.py:15 | test.py:15:9:15:29 | Attribute() | | --> | Taint externally controlled string | test.py:21 | test.py:21:13:21:13 | b | | +| Taint externally controlled string | test.py:16 | test.py:16:9:16:22 | tainted_string | | --> | Taint externally controlled string | test.py:16 | test.py:16:9:16:25 | Subscript | | +| Taint externally controlled string | test.py:16 | test.py:16:9:16:25 | Subscript | | --> | Taint externally controlled string | test.py:21 | test.py:21:16:21:16 | c | | +| Taint externally controlled string | test.py:17 | test.py:17:9:17:22 | tainted_string | | --> | Taint externally controlled string | test.py:17 | test.py:17:9:17:27 | Subscript | | +| Taint externally controlled string | test.py:17 | test.py:17:9:17:27 | Subscript | | --> | Taint externally controlled string | test.py:21 | test.py:21:19:21:19 | d | | +| Taint externally controlled string | test.py:18 | test.py:18:9:18:32 | reversed() | | --> | Taint externally controlled string | test.py:21 | test.py:21:22:21:22 | e | | +| Taint externally controlled string | test.py:18 | test.py:18:18:18:31 | tainted_string | | --> | Taint externally controlled string | test.py:18 | test.py:18:9:18:32 | reversed() | | +| Taint externally controlled string | test.py:19 | test.py:19:9:19:28 | copy() | | --> | Taint externally controlled string | test.py:21 | test.py:21:25:21:25 | f | | +| Taint externally controlled string | test.py:19 | test.py:19:14:19:27 | tainted_string | | --> | Taint externally controlled string | test.py:19 | test.py:19:9:19:28 | copy() | | +| Taint externally controlled string | test.py:20 | test.py:20:9:20:22 | tainted_string | | --> | Taint externally controlled string | test.py:20 | test.py:20:9:20:30 | Attribute() | | +| Taint externally controlled string | test.py:20 | test.py:20:9:20:30 | Attribute() | | --> | Taint externally controlled string | test.py:21 | test.py:21:28:21:28 | g | | +| Taint externally controlled string | test.py:24 | test.py:24:22:24:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:25 | test.py:25:8:25:21 | tainted_string | | +| Taint externally controlled string | test.py:24 | test.py:24:22:24:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:28 | test.py:28:14:28:27 | tainted_string | | +| Taint externally controlled string | test.py:31 | test.py:31:22:31:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:32 | test.py:32:8:32:21 | tainted_string | | +| Taint externally controlled string | test.py:31 | test.py:31:22:31:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:32 | test.py:32:34:32:47 | tainted_string | | +| Taint externally controlled string | test.py:31 | test.py:31:22:31:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:35 | test.py:35:14:35:27 | tainted_string | | +| Taint externally controlled string | test.py:38 | test.py:38:22:38:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:39 | test.py:39:13:39:26 | tainted_string | | +| Taint externally controlled string | test.py:38 | test.py:38:22:38:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:40 | test.py:40:15:40:28 | tainted_string | | +| Taint externally controlled string | test.py:38 | test.py:38:22:38:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:41 | test.py:41:15:41:28 | tainted_string | | +| Taint externally controlled string | test.py:39 | test.py:39:9:39:27 | str() | | --> | Taint externally controlled string | test.py:42 | test.py:42:10:42:10 | a | | +| Taint externally controlled string | test.py:39 | test.py:39:13:39:26 | tainted_string | | --> | Taint externally controlled string | test.py:39 | test.py:39:9:39:27 | str() | | +| Taint externally controlled string | test.py:40 | test.py:40:9:40:29 | bytes() | | --> | Taint externally controlled string | test.py:42 | test.py:42:13:42:13 | b | | +| Taint externally controlled string | test.py:40 | test.py:40:15:40:28 | tainted_string | | --> | Taint externally controlled string | test.py:40 | test.py:40:9:40:29 | bytes() | | +| Taint externally controlled string | test.py:41 | test.py:41:9:41:46 | bytes() | | --> | Taint externally controlled string | test.py:42 | test.py:42:16:42:16 | c | | +| Taint externally controlled string | test.py:41 | test.py:41:15:41:28 | tainted_string | | --> | Taint externally controlled string | test.py:41 | test.py:41:9:41:46 | bytes() | | +| Taint externally controlled string | test.py:44 | test.py:44:22:44:26 | taint | p1 = externally controlled string | --> | Taint externally controlled string | test.py:45 | test.py:45:17:45:21 | taint | p1 = externally controlled string | +| Taint externally controlled string | test.py:45 | test.py:45:17:45:21 | taint | p1 = externally controlled string | --> | Taint externally controlled string | test.py:45 | test.py:45:12:45:22 | func() | p1 = externally controlled string | +| Taint externally controlled string | test.py:45 | test.py:45:17:45:21 | taint | p1 = externally controlled string | --> | Taint externally controlled string | test.py:52 | test.py:52:19:52:21 | arg | p0 = externally controlled string | +| Taint externally controlled string | test.py:52 | test.py:52:19:52:21 | arg | p0 = externally controlled string | --> | Taint externally controlled string | test.py:53 | test.py:53:12:53:14 | arg | p0 = externally controlled string | +| Taint externally controlled string | test.py:56 | test.py:56:11:56:24 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:57 | test.py:57:38:57:40 | ext | | +| Taint externally controlled string | test.py:57 | test.py:57:11:57:41 | cross_over() | | --> | Taint externally controlled string | test.py:58 | test.py:58:10:58:12 | res | | +| Taint externally controlled string | test.py:57 | test.py:57:38:57:40 | ext | | --> | Taint externally controlled string | test.py:44 | test.py:44:22:44:26 | taint | p1 = externally controlled string | +| Taint externally controlled string | test.py:57 | test.py:57:38:57:40 | ext | | --> | Taint externally controlled string | test.py:57 | test.py:57:11:57:41 | cross_over() | | | Taint json[externally controlled string] | test.py:6 | test.py:6:20:6:45 | Attribute() | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint externally controlled string | test.py:7 | test.py:7:9:7:25 | Subscript | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:25 | Subscript | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:25 | Subscript | | --> | Taint json[externally controlled string] | test.py:8 | test.py:8:9:8:9 | a | | +| Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:25 | Subscript | | --> | Taint json[externally controlled string] | test.py:10 | test.py:10:10:10:10 | a | | | Taint json[externally controlled string] | test.py:8 | test.py:8:9:8:9 | a | | --> | Taint externally controlled string | test.py:8 | test.py:8:9:8:18 | Attribute() | | | Taint json[externally controlled string] | test.py:8 | test.py:8:9:8:9 | a | | --> | Taint json[externally controlled string] | test.py:8 | test.py:8:9:8:18 | Attribute() | | | Taint json[externally controlled string] | test.py:8 | test.py:8:9:8:18 | Attribute() | | --> | Taint json[externally controlled string] | test.py:9 | test.py:9:9:9:9 | b | | +| Taint json[externally controlled string] | test.py:8 | test.py:8:9:8:18 | Attribute() | | --> | Taint json[externally controlled string] | test.py:10 | test.py:10:13:10:13 | b | | | Taint json[externally controlled string] | test.py:9 | test.py:9:9:9:9 | b | | --> | Taint externally controlled string | test.py:9 | test.py:9:9:9:14 | Subscript | | | Taint json[externally controlled string] | test.py:9 | test.py:9:9:9:9 | b | | --> | Taint json[externally controlled string] | test.py:9 | test.py:9:9:9:14 | Subscript | | +| Taint json[externally controlled string] | test.py:9 | test.py:9:9:9:14 | Subscript | | --> | Taint json[externally controlled string] | test.py:10 | test.py:10:16:10:16 | c | | diff --git a/python/ql/test/library-tests/taint/strings/TestTaint.expected b/python/ql/test/library-tests/taint/strings/TestTaint.expected new file mode 100644 index 000000000000..db6d601bb43b --- /dev/null +++ b/python/ql/test/library-tests/taint/strings/TestTaint.expected @@ -0,0 +1,22 @@ +| test.py:10 | test_json | a | externally controlled string | +| test.py:10 | test_json | a | json[externally controlled string] | +| test.py:10 | test_json | b | externally controlled string | +| test.py:10 | test_json | b | json[externally controlled string] | +| test.py:10 | test_json | c | externally controlled string | +| test.py:10 | test_json | c | json[externally controlled string] | +| test.py:21 | test_str | a | externally controlled string | +| test.py:21 | test_str | b | externally controlled string | +| test.py:21 | test_str | c | externally controlled string | +| test.py:21 | test_str | d | externally controlled string | +| test.py:21 | test_str | e | externally controlled string | +| test.py:21 | test_str | f | externally controlled string | +| test.py:21 | test_str | g | externally controlled string | +| test.py:26 | test_const_sanitizer1 | tainted_string | NO TAINT | +| test.py:28 | test_const_sanitizer1 | tainted_string | externally controlled string | +| test.py:33 | test_const_sanitizer2 | tainted_string | NO TAINT | +| test.py:35 | test_const_sanitizer2 | tainted_string | externally controlled string | +| test.py:42 | test_str2 | a | externally controlled string | +| test.py:42 | test_str2 | b | externally controlled string | +| test.py:42 | test_str2 | c | externally controlled string | +| test.py:50 | test_exc_info | res | exception.info | +| test.py:58 | test_untrusted | res | externally controlled string | diff --git a/python/ql/test/library-tests/taint/strings/TestTaint.ql b/python/ql/test/library-tests/taint/strings/TestTaint.ql new file mode 100644 index 000000000000..92657b1fef91 --- /dev/null +++ b/python/ql/test/library-tests/taint/strings/TestTaint.ql @@ -0,0 +1,18 @@ +import python +import semmle.python.security.TaintTracking +import Taint + +from Call call, Expr arg, string taint_string +where + call.getLocation().getFile().getShortName() = "test.py" and + call.getFunc().(Name).getId() = "test" and + arg = call.getAnArg() and + ( + not exists(TaintedNode tainted | tainted.getAstNode() = arg) and + taint_string = "NO TAINT" + or + exists(TaintedNode tainted | tainted.getAstNode() = arg | + taint_string = tainted.getTaintKind().toString() + ) + ) +select arg.getLocation().toString(), call.getScope().(Function).getName(), arg.toString(), taint_string diff --git a/python/ql/test/library-tests/taint/strings/test.py b/python/ql/test/library-tests/taint/strings/test.py index 1b2b439a648e..878200d947c5 100644 --- a/python/ql/test/library-tests/taint/strings/test.py +++ b/python/ql/test/library-tests/taint/strings/test.py @@ -2,41 +2,44 @@ from copy import copy def test_json(): - tainted_string = TAINTED + tainted_string = TAINTED_STRING tainted_json = json.loads(tainted_string) a = tainted_json["x"] b = a.get("y") c = b["z"] + test(a, b, c) def test_str(): - tainted_string = TAINTED + tainted_string = TAINTED_STRING a = tainted_string.ljust(8) b = tainted_string.copy() c = tainted_string[:] d = tainted_string[::2] e = reversed(tainted_string) f = copy(tainted_string) - h = tainted_string.strip() + g = tainted_string.strip() + test(a, b, c, d, e, f, g) def test_const_sanitizer1(): - tainted_string = TAINTED + tainted_string = TAINTED_STRING if tainted_string == "OK": - not_tainted(tainted_string) + test(tainted_string) # not tainted else: - still_tainted(tainted_string) + test(tainted_string) # still tainted def test_const_sanitizer2(): - tainted_string = TAINTED + tainted_string = TAINTED_STRING if tainted_string == "OK" or tainted_string == "ALSO_OK": - not_tainted(tainted_string) + test(tainted_string) # not tainted else: - still_tainted(tainted_string) + test(tainted_string) # still tainted def test_str2(): - tainted_string = TAINTED + tainted_string = TAINTED_STRING a = str(tainted_string) b = bytes(tainted_string) # This is an error in Python 3 c = bytes(tainted_string, encoding="utf8") # This is an error in Python 2 + test(a, b, c) def cross_over(func, taint): return func(taint) @@ -44,13 +47,15 @@ def cross_over(func, taint): def test_exc_info(): info = TAINTED_EXCEPTION_INFO res = cross_over(exc_info_call, info) + test(res) def exc_info_call(arg): return arg def test_untrusted(): - ext = TAINTED_EXTERNAL_STRING + ext = TAINTED_STRING res = cross_over(untrusted_call, ext) + test(res) def exc_untrusted_call(arg): return arg From fd270cc02cce94885f5f2ad1cdbecd14e716a277 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 19 Feb 2020 16:22:09 +0100 Subject: [PATCH 3/9] Python: Add basic taint support for urlsplit/urlparse --- .../python/security/strings/External.qll | 101 ++++++++++++++++++ .../library-tests/taint/namedtuple/Taint.qll | 27 +++++ .../taint/namedtuple/TestTaint.expected | 11 ++ .../taint/namedtuple/TestTaint.ql | 18 ++++ .../library-tests/taint/namedtuple/test.py | 33 ++++++ .../taint/strings/TestStep.expected | 6 ++ .../taint/strings/TestTaint.expected | 2 + .../test/library-tests/taint/strings/test.py | 8 ++ 8 files changed, 206 insertions(+) create mode 100644 python/ql/test/library-tests/taint/namedtuple/Taint.qll create mode 100644 python/ql/test/library-tests/taint/namedtuple/TestTaint.expected create mode 100644 python/ql/test/library-tests/taint/namedtuple/TestTaint.ql create mode 100644 python/ql/test/library-tests/taint/namedtuple/test.py diff --git a/python/ql/src/semmle/python/security/strings/External.qll b/python/ql/src/semmle/python/security/strings/External.qll index 4dcbe0d17e3c..564580c24651 100644 --- a/python/ql/src/semmle/python/security/strings/External.qll +++ b/python/ql/src/semmle/python/security/strings/External.qll @@ -18,6 +18,10 @@ abstract class ExternalStringKind extends StringKind { json_load(fromnode, tonode) and result.(ExternalJsonKind).getValue() = this or tonode.(DictNode).getAValue() = fromnode and result.(ExternalStringDictKind).getValue() = this + or + urlsplit(fromnode, tonode) and result.(ExternalUrlSplitResult).getItem() = this + or + urlparse(fromnode, tonode) and result.(ExternalUrlParseResult).getItem() = this } } @@ -65,6 +69,65 @@ class ExternalStringSequenceDictKind extends DictKind { ExternalStringSequenceDictKind() { this.getValue() instanceof ExternalStringSequenceKind } } +/** TaintKind for the result of `urlsplit(tainted_string)` */ +class ExternalUrlSplitResult extends ExternalStringSequenceKind { + // https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit + override TaintKind getTaintOfAttribute(string name) { + result = super.getTaintOfAttribute(name) + or + ( + // namedtuple field names + name = "scheme" or + name = "netloc" or + name = "path" or + name = "query" or + name = "fragment" or + // class methods + name = "username" or + name = "password" or + name = "hostname" + ) and + result instanceof ExternalStringKind + } + + override TaintKind getTaintOfMethodResult(string name) { + result = super.getTaintOfMethodResult(name) + or + name = "geturl" and + result instanceof ExternalStringKind + } +} + +/** TaintKind for the result of `urlparse(tainted_string)` */ +class ExternalUrlParseResult extends ExternalStringSequenceKind { + // https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse + override TaintKind getTaintOfAttribute(string name) { + result = super.getTaintOfAttribute(name) + or + ( + // namedtuple field names + name = "scheme" or + name = "netloc" or + name = "path" or + name = "params" or + name = "query" or + name = "fragment" or + // class methods + name = "username" or + name = "password" or + name = "hostname" + ) and + result instanceof ExternalStringKind + } + + override TaintKind getTaintOfMethodResult(string name) { + result = super.getTaintOfMethodResult(name) + or + name = "geturl" and + result instanceof ExternalStringKind + } +} + /* Helper for getTaintForStep() */ pragma[noinline] private predicate json_subscript_taint( @@ -83,6 +146,44 @@ private predicate json_load(ControlFlowNode fromnode, CallNode tonode) { ) } +private predicate urlsplit(ControlFlowNode fromnode, CallNode tonode) { + // This could be implemented as `exists(FunctionValue` without the explicit six part, + // but then our tests will need to import +100 modules, so for now this slightly + // altered version gets to live on. + exists(Value urlsplit | + ( + urlsplit = Value::named("six.moves.urllib.parse.urlsplit") + or + // Python 2 + urlsplit = Value::named("urlparse.urlsplit") + or + // Python 3 + urlsplit = Value::named("urllib.parse.urlsplit") + ) and + tonode = urlsplit.getACall() and + tonode.getArg(0) = fromnode + ) +} + +private predicate urlparse(ControlFlowNode fromnode, CallNode tonode) { + // This could be implemented as `exists(FunctionValue` without the explicit six part, + // but then our tests will need to import +100 modules, so for now this slightly + // altered version gets to live on. + exists(Value urlparse | + ( + urlparse = Value::named("six.moves.urllib.parse.urlparse") + or + // Python 2 + urlparse = Value::named("urlparse.urlparse") + or + // Python 3 + urlparse = Value::named("urllib.parse.urlparse") + ) and + tonode = urlparse.getACall() and + tonode.getArg(0) = fromnode + ) +} + /** A kind of "taint", representing an open file-like object from an external source. */ class ExternalFileObject extends TaintKind { ExternalFileObject() { this = "file[" + any(ExternalStringKind key) + "]" } diff --git a/python/ql/test/library-tests/taint/namedtuple/Taint.qll b/python/ql/test/library-tests/taint/namedtuple/Taint.qll new file mode 100644 index 000000000000..b97f65225f25 --- /dev/null +++ b/python/ql/test/library-tests/taint/namedtuple/Taint.qll @@ -0,0 +1,27 @@ +import python +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + +class SimpleSource extends TaintSource { + SimpleSource() { this.(NameNode).getId() = "TAINTED_STRING" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringKind } + + override string toString() { result = "taint source" } +} + +class ListSource extends TaintSource { + ListSource() { this.(NameNode).getId() = "TAINTED_LIST" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringSequenceKind } + + override string toString() { result = "list taint source" } +} + +class DictSource extends TaintSource { + DictSource() { this.(NameNode).getId() = "TAINTED_DICT" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringDictKind } + + override string toString() { result = "dict taint source" } +} diff --git a/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected b/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected new file mode 100644 index 000000000000..c90d33199ba5 --- /dev/null +++ b/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected @@ -0,0 +1,11 @@ +| test.py:13 | test_basic | a | externally controlled string | +| test.py:13 | test_basic | b | externally controlled string | +| test.py:13 | test_basic | c | externally controlled string | +| test.py:13 | test_basic | d | externally controlled string | +| test.py:13 | test_basic | urlsplit_res | [externally controlled string] | +| test.py:20 | test_sanitizer | Attribute | externally controlled string | +| test.py:23 | test_sanitizer | Subscript | externally controlled string | +| test.py:33 | test_namedtuple | a | NO TAINT | +| test.py:33 | test_namedtuple | b | NO TAINT | +| test.py:33 | test_namedtuple | c | NO TAINT | +| test.py:33 | test_namedtuple | d | NO TAINT | diff --git a/python/ql/test/library-tests/taint/namedtuple/TestTaint.ql b/python/ql/test/library-tests/taint/namedtuple/TestTaint.ql new file mode 100644 index 000000000000..92657b1fef91 --- /dev/null +++ b/python/ql/test/library-tests/taint/namedtuple/TestTaint.ql @@ -0,0 +1,18 @@ +import python +import semmle.python.security.TaintTracking +import Taint + +from Call call, Expr arg, string taint_string +where + call.getLocation().getFile().getShortName() = "test.py" and + call.getFunc().(Name).getId() = "test" and + arg = call.getAnArg() and + ( + not exists(TaintedNode tainted | tainted.getAstNode() = arg) and + taint_string = "NO TAINT" + or + exists(TaintedNode tainted | tainted.getAstNode() = arg | + taint_string = tainted.getTaintKind().toString() + ) + ) +select arg.getLocation().toString(), call.getScope().(Function).getName(), arg.toString(), taint_string diff --git a/python/ql/test/library-tests/taint/namedtuple/test.py b/python/ql/test/library-tests/taint/namedtuple/test.py new file mode 100644 index 000000000000..1c07bcfdc607 --- /dev/null +++ b/python/ql/test/library-tests/taint/namedtuple/test.py @@ -0,0 +1,33 @@ +from six.moves.urllib.parse import urlsplit + +# Currently we don't have support for namedtuples in general, but do have special support +# for `urlsplit` (and `urlparse`) + +def test_basic(): + tainted_string = TAINTED_STRING + urlsplit_res = urlsplit(tainted_string) + a = urlsplit_res.netloc # field access + b = urlsplit_res.hostname # property + c = urlsplit_res[3] # indexing + _, _, d, _, _ = urlsplit(tainted_string) # unpacking + test(a, b, c, d, urlsplit_res) + +def test_sanitizer(): + tainted_string = TAINTED_STRING + urlsplit_res = urlsplit(tainted_string) + + if urlsplit_res.netloc == "OK": + test(urlsplit_res.netloc) # TODO: FP, should not be tainted here + + if urlsplit_res[2] == "OK": + test(urlsplit_res[0]) # TODO: FP, should not be tainted here + +def test_namedtuple(): + tainted_string = TAINTED_STRING + Point = namedtuple('Point', ['x', 'y']) + p = Point('safe', tainted_string) + a = p.x + b = p.y + c = p[0] + d = p[1] + test(a, b, c, d) # TODO: FN, at least p.y and p[1] should be tainted diff --git a/python/ql/test/library-tests/taint/strings/TestStep.expected b/python/ql/test/library-tests/taint/strings/TestStep.expected index eccb3efa010f..6efa8fb6edfb 100644 --- a/python/ql/test/library-tests/taint/strings/TestStep.expected +++ b/python/ql/test/library-tests/taint/strings/TestStep.expected @@ -1,3 +1,5 @@ +| Taint [externally controlled string] | test.py:67 | test.py:67:20:67:43 | urlsplit() | | --> | Taint [externally controlled string] | test.py:69 | test.py:69:10:69:21 | urlsplit_res | | +| Taint [externally controlled string] | test.py:68 | test.py:68:20:68:43 | urlparse() | | --> | Taint [externally controlled string] | test.py:69 | test.py:69:24:69:35 | urlparse_res | | | Taint exception.info | test.py:44 | test.py:44:22:44:26 | taint | p1 = exception.info | --> | Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info | | Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info | --> | Taint exception.info | test.py:45 | test.py:45:12:45:22 | func() | p1 = exception.info | | Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info | --> | Taint exception.info | test.py:52 | test.py:52:19:52:21 | arg | p0 = exception.info | @@ -56,6 +58,10 @@ | Taint externally controlled string | test.py:57 | test.py:57:11:57:41 | cross_over() | | --> | Taint externally controlled string | test.py:58 | test.py:58:10:58:12 | res | | | Taint externally controlled string | test.py:57 | test.py:57:38:57:40 | ext | | --> | Taint externally controlled string | test.py:44 | test.py:44:22:44:26 | taint | p1 = externally controlled string | | Taint externally controlled string | test.py:57 | test.py:57:38:57:40 | ext | | --> | Taint externally controlled string | test.py:57 | test.py:57:11:57:41 | cross_over() | | +| Taint externally controlled string | test.py:66 | test.py:66:22:66:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:67 | test.py:67:29:67:42 | tainted_string | | +| Taint externally controlled string | test.py:66 | test.py:66:22:66:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:68 | test.py:68:29:68:42 | tainted_string | | +| Taint externally controlled string | test.py:67 | test.py:67:29:67:42 | tainted_string | | --> | Taint [externally controlled string] | test.py:67 | test.py:67:20:67:43 | urlsplit() | | +| Taint externally controlled string | test.py:68 | test.py:68:29:68:42 | tainted_string | | --> | Taint [externally controlled string] | test.py:68 | test.py:68:20:68:43 | urlparse() | | | Taint json[externally controlled string] | test.py:6 | test.py:6:20:6:45 | Attribute() | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint externally controlled string | test.py:7 | test.py:7:9:7:25 | Subscript | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:25 | Subscript | | diff --git a/python/ql/test/library-tests/taint/strings/TestTaint.expected b/python/ql/test/library-tests/taint/strings/TestTaint.expected index db6d601bb43b..ff7372a8ce67 100644 --- a/python/ql/test/library-tests/taint/strings/TestTaint.expected +++ b/python/ql/test/library-tests/taint/strings/TestTaint.expected @@ -20,3 +20,5 @@ | test.py:42 | test_str2 | c | externally controlled string | | test.py:50 | test_exc_info | res | exception.info | | test.py:58 | test_untrusted | res | externally controlled string | +| test.py:69 | test_urlsplit_urlparse | urlparse_res | [externally controlled string] | +| test.py:69 | test_urlsplit_urlparse | urlsplit_res | [externally controlled string] | diff --git a/python/ql/test/library-tests/taint/strings/test.py b/python/ql/test/library-tests/taint/strings/test.py index 878200d947c5..207f52c807c3 100644 --- a/python/ql/test/library-tests/taint/strings/test.py +++ b/python/ql/test/library-tests/taint/strings/test.py @@ -59,3 +59,11 @@ def test_untrusted(): def exc_untrusted_call(arg): return arg + +from six.moves.urllib.parse import urlsplit, urlparse + +def test_urlsplit_urlparse(): + tainted_string = TAINTED_STRING + urlsplit_res = urlsplit(tainted_string) + urlparse_res = urlparse(tainted_string) + test(urlsplit_res, urlparse_res) From 31ff652cb39a1fd58fb08226942524bbf76ef2a8 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 21 Feb 2020 15:18:53 +0100 Subject: [PATCH 4/9] Python: Make Sanitizer available for urlsplit taint It isn't used by default, it has to *actively* be enabled. --- .../python/security/strings/External.qll | 59 +++++++++++++++++++ .../library-tests/taint/namedtuple/Taint.qll | 20 +++++++ .../taint/namedtuple/TestTaint.expected | 4 +- .../library-tests/taint/namedtuple/test.py | 4 +- 4 files changed, 83 insertions(+), 4 deletions(-) diff --git a/python/ql/src/semmle/python/security/strings/External.qll b/python/ql/src/semmle/python/security/strings/External.qll index 564580c24651..dc2a64ec2a3f 100644 --- a/python/ql/src/semmle/python/security/strings/External.qll +++ b/python/ql/src/semmle/python/security/strings/External.qll @@ -195,3 +195,62 @@ class ExternalFileObject extends TaintKind { name = "read" and result = this.getValue() } } + +/** + * Temporary sanitizer for the tainted result from `urlsplit` and `urlparse`. Can be used to reduce FPs until + * we have better support for namedtuples. + * + * Will clear **all** taint on a test of the kind. That is, on the true edge of any matching test, + * all fields/indexes will be cleared of taint. + * + * Handles: + * - `if splitres.netloc == "KNOWN_VALUE"` + * - `if splitres[0] == "KNOWN_VALUE"` + */ +class UrlsplitUrlparseTempSanitizer extends Sanitizer { + // TODO: remove this once we have better support for named tuples + + UrlsplitUrlparseTempSanitizer() { this = "UrlsplitUrlparseTempSanitizer" } + + override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) { + ( + taint instanceof ExternalUrlSplitResult + or + taint instanceof ExternalUrlParseResult + ) and + exists(ControlFlowNode foobar | + foobar.(SubscriptNode).getObject() = test.getInput().getAUse() + or + foobar.(AttrNode).getObject() = test.getInput().getAUse() + | + clears_taint(_, foobar, test.getTest(), test.getSense()) + ) + } + + private predicate clears_taint(ControlFlowNode final_test, ControlFlowNode tainted, ControlFlowNode test, boolean sense) { + test_equality_with_const(final_test, tainted, sense) + or + test.(UnaryExprNode).getNode().getOp() instanceof Not and + exists(ControlFlowNode nested_test | + nested_test = test.(UnaryExprNode).getOperand() and + clears_taint(final_test, tainted, nested_test, sense.booleanNot()) + ) + } + + /** holds for `== "KNOWN_VALUE"` on `true` edge, and `!= "KNOWN_VALUE"` on `false` edge */ + private predicate test_equality_with_const(CompareNode cmp, ControlFlowNode operand, boolean sense) { + exists(ControlFlowNode const, Cmpop op | + const.getNode() instanceof StrConst + | + ( + cmp.operands(const, op, operand) + or + cmp.operands(operand, op, const) + ) and ( + op instanceof Eq and sense = true + or + op instanceof NotEq and sense = false + ) + ) + } +} diff --git a/python/ql/test/library-tests/taint/namedtuple/Taint.qll b/python/ql/test/library-tests/taint/namedtuple/Taint.qll index b97f65225f25..9d19a54488c7 100644 --- a/python/ql/test/library-tests/taint/namedtuple/Taint.qll +++ b/python/ql/test/library-tests/taint/namedtuple/Taint.qll @@ -25,3 +25,23 @@ class DictSource extends TaintSource { override string toString() { result = "dict taint source" } } + +class TestConfig extends TaintTracking::Configuration { + TestConfig() { this = "TestConfig" } + + override predicate isSanitizer(Sanitizer sanitizer) { + sanitizer instanceof UrlsplitUrlparseTempSanitizer + } + + override predicate isSource(TaintTracking::Source source) { + source instanceof SimpleSource + or + source instanceof ListSource + or + source instanceof DictSource + } + + override predicate isSink(TaintTracking::Sink sink) { + none() + } +} diff --git a/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected b/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected index c90d33199ba5..72d4c5c4e17b 100644 --- a/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected +++ b/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected @@ -3,8 +3,8 @@ | test.py:13 | test_basic | c | externally controlled string | | test.py:13 | test_basic | d | externally controlled string | | test.py:13 | test_basic | urlsplit_res | [externally controlled string] | -| test.py:20 | test_sanitizer | Attribute | externally controlled string | -| test.py:23 | test_sanitizer | Subscript | externally controlled string | +| test.py:20 | test_sanitizer | Attribute | NO TAINT | +| test.py:23 | test_sanitizer | Subscript | NO TAINT | | test.py:33 | test_namedtuple | a | NO TAINT | | test.py:33 | test_namedtuple | b | NO TAINT | | test.py:33 | test_namedtuple | c | NO TAINT | diff --git a/python/ql/test/library-tests/taint/namedtuple/test.py b/python/ql/test/library-tests/taint/namedtuple/test.py index 1c07bcfdc607..7c7120d37496 100644 --- a/python/ql/test/library-tests/taint/namedtuple/test.py +++ b/python/ql/test/library-tests/taint/namedtuple/test.py @@ -17,10 +17,10 @@ def test_sanitizer(): urlsplit_res = urlsplit(tainted_string) if urlsplit_res.netloc == "OK": - test(urlsplit_res.netloc) # TODO: FP, should not be tainted here + test(urlsplit_res.netloc) if urlsplit_res[2] == "OK": - test(urlsplit_res[0]) # TODO: FP, should not be tainted here + test(urlsplit_res[0]) def test_namedtuple(): tainted_string = TAINTED_STRING From 798db91f7153fb225e0bd5a22c90dd5e1cb7f14d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 21 Feb 2020 15:26:07 +0100 Subject: [PATCH 5/9] Python: Add more urlsplit tests --- .../library-tests/taint/namedtuple/TestTaint.expected | 11 +++++++---- python/ql/test/library-tests/taint/namedtuple/test.py | 9 +++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected b/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected index 72d4c5c4e17b..471801f775df 100644 --- a/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected +++ b/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected @@ -5,7 +5,10 @@ | test.py:13 | test_basic | urlsplit_res | [externally controlled string] | | test.py:20 | test_sanitizer | Attribute | NO TAINT | | test.py:23 | test_sanitizer | Subscript | NO TAINT | -| test.py:33 | test_namedtuple | a | NO TAINT | -| test.py:33 | test_namedtuple | b | NO TAINT | -| test.py:33 | test_namedtuple | c | NO TAINT | -| test.py:33 | test_namedtuple | d | NO TAINT | +| test.py:26 | test_sanitizer | Attribute | NO TAINT | +| test.py:29 | test_sanitizer | Attribute | externally controlled string | +| test.py:32 | test_sanitizer | Attribute | externally controlled string | +| test.py:42 | test_namedtuple | a | NO TAINT | +| test.py:42 | test_namedtuple | b | NO TAINT | +| test.py:42 | test_namedtuple | c | NO TAINT | +| test.py:42 | test_namedtuple | d | NO TAINT | diff --git a/python/ql/test/library-tests/taint/namedtuple/test.py b/python/ql/test/library-tests/taint/namedtuple/test.py index 7c7120d37496..d0b9c1e92125 100644 --- a/python/ql/test/library-tests/taint/namedtuple/test.py +++ b/python/ql/test/library-tests/taint/namedtuple/test.py @@ -22,6 +22,15 @@ def test_sanitizer(): if urlsplit_res[2] == "OK": test(urlsplit_res[0]) + if urlsplit_res.netloc == "OK": + test(urlsplit_res.path) # FN + + if urlsplit_res.netloc in ["OK"]: + test(urlsplit_res.netloc) # FP + + if urlsplit_res.netloc in ["OK", non_constant()]: + test(urlsplit_res.netloc) # should be tainted + def test_namedtuple(): tainted_string = TAINTED_STRING Point = namedtuple('Point', ['x', 'y']) From bfa7553095947e0053c92f1f82378e2e2e829f62 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 21 Feb 2020 16:03:29 +0100 Subject: [PATCH 6/9] Python: urlsplit sanitizer handles `in [KNOWN_VALUE]` --- .../python/security/strings/External.qll | 25 ++++++++++++++++--- .../taint/namedtuple/TestTaint.expected | 2 +- .../library-tests/taint/namedtuple/test.py | 2 +- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/python/ql/src/semmle/python/security/strings/External.qll b/python/ql/src/semmle/python/security/strings/External.qll index dc2a64ec2a3f..a8b17b1f6866 100644 --- a/python/ql/src/semmle/python/security/strings/External.qll +++ b/python/ql/src/semmle/python/security/strings/External.qll @@ -230,6 +230,8 @@ class UrlsplitUrlparseTempSanitizer extends Sanitizer { private predicate clears_taint(ControlFlowNode final_test, ControlFlowNode tainted, ControlFlowNode test, boolean sense) { test_equality_with_const(final_test, tainted, sense) or + test_in_const_seq(final_test, tainted, sense) + or test.(UnaryExprNode).getNode().getOp() instanceof Not and exists(ControlFlowNode nested_test | nested_test = test.(UnaryExprNode).getOperand() and @@ -238,19 +240,34 @@ class UrlsplitUrlparseTempSanitizer extends Sanitizer { } /** holds for `== "KNOWN_VALUE"` on `true` edge, and `!= "KNOWN_VALUE"` on `false` edge */ - private predicate test_equality_with_const(CompareNode cmp, ControlFlowNode operand, boolean sense) { + private predicate test_equality_with_const(CompareNode cmp, ControlFlowNode tainted, boolean sense) { exists(ControlFlowNode const, Cmpop op | const.getNode() instanceof StrConst | ( - cmp.operands(const, op, operand) + cmp.operands(const, op, tainted) or - cmp.operands(operand, op, const) - ) and ( + cmp.operands(tainted, op, const) + ) and + ( op instanceof Eq and sense = true or op instanceof NotEq and sense = false ) ) } + + /** holds for `in ["KNOWN_VALUE", ...]` on `true` edge, and `not in ["KNOWN_VALUE", ...]` on `false` edge */ + private predicate test_in_const_seq(CompareNode cmp, ControlFlowNode tainted, boolean sense) { + exists(SequenceNode const_seq, Cmpop op | + forall(ControlFlowNode elem | elem = const_seq.getAnElement() | elem.getNode() instanceof StrConst) + | + cmp.operands(tainted, op, const_seq) and + ( + op instanceof In and sense = true + or + op instanceof NotIn and sense = false + ) + ) + } } diff --git a/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected b/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected index 471801f775df..11c8b1268a7e 100644 --- a/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected +++ b/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected @@ -6,7 +6,7 @@ | test.py:20 | test_sanitizer | Attribute | NO TAINT | | test.py:23 | test_sanitizer | Subscript | NO TAINT | | test.py:26 | test_sanitizer | Attribute | NO TAINT | -| test.py:29 | test_sanitizer | Attribute | externally controlled string | +| test.py:29 | test_sanitizer | Attribute | NO TAINT | | test.py:32 | test_sanitizer | Attribute | externally controlled string | | test.py:42 | test_namedtuple | a | NO TAINT | | test.py:42 | test_namedtuple | b | NO TAINT | diff --git a/python/ql/test/library-tests/taint/namedtuple/test.py b/python/ql/test/library-tests/taint/namedtuple/test.py index d0b9c1e92125..f3d0b778e1b3 100644 --- a/python/ql/test/library-tests/taint/namedtuple/test.py +++ b/python/ql/test/library-tests/taint/namedtuple/test.py @@ -26,7 +26,7 @@ def test_sanitizer(): test(urlsplit_res.path) # FN if urlsplit_res.netloc in ["OK"]: - test(urlsplit_res.netloc) # FP + test(urlsplit_res.netloc) if urlsplit_res.netloc in ["OK", non_constant()]: test(urlsplit_res.netloc) # should be tainted From 400a8ffae5339c19af66ad90ae9b26643df0c991 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 26 Feb 2020 14:08:10 +0100 Subject: [PATCH 7/9] Python: Use slightly better name than foobar I intended to rename before committing, but woops --- python/ql/src/semmle/python/security/strings/External.qll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ql/src/semmle/python/security/strings/External.qll b/python/ql/src/semmle/python/security/strings/External.qll index a8b17b1f6866..40dafff0f629 100644 --- a/python/ql/src/semmle/python/security/strings/External.qll +++ b/python/ql/src/semmle/python/security/strings/External.qll @@ -218,12 +218,12 @@ class UrlsplitUrlparseTempSanitizer extends Sanitizer { or taint instanceof ExternalUrlParseResult ) and - exists(ControlFlowNode foobar | - foobar.(SubscriptNode).getObject() = test.getInput().getAUse() + exists(ControlFlowNode full_use | + full_use.(SubscriptNode).getObject() = test.getInput().getAUse() or - foobar.(AttrNode).getObject() = test.getInput().getAUse() + full_use.(AttrNode).getObject() = test.getInput().getAUse() | - clears_taint(_, foobar, test.getTest(), test.getSense()) + clears_taint(_, full_use, test.getTest(), test.getSense()) ) } From 0b31cb17167ab23d3e95eb2f2f3056d67750cc36 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 26 Feb 2020 14:08:47 +0100 Subject: [PATCH 8/9] Python: Show that we have initial taint in urlsplit test --- .../taint/namedtuple/TestTaint.expected | 19 ++++++++++--------- .../library-tests/taint/namedtuple/test.py | 2 ++ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected b/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected index 11c8b1268a7e..62b589299ddb 100644 --- a/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected +++ b/python/ql/test/library-tests/taint/namedtuple/TestTaint.expected @@ -3,12 +3,13 @@ | test.py:13 | test_basic | c | externally controlled string | | test.py:13 | test_basic | d | externally controlled string | | test.py:13 | test_basic | urlsplit_res | [externally controlled string] | -| test.py:20 | test_sanitizer | Attribute | NO TAINT | -| test.py:23 | test_sanitizer | Subscript | NO TAINT | -| test.py:26 | test_sanitizer | Attribute | NO TAINT | -| test.py:29 | test_sanitizer | Attribute | NO TAINT | -| test.py:32 | test_sanitizer | Attribute | externally controlled string | -| test.py:42 | test_namedtuple | a | NO TAINT | -| test.py:42 | test_namedtuple | b | NO TAINT | -| test.py:42 | test_namedtuple | c | NO TAINT | -| test.py:42 | test_namedtuple | d | NO TAINT | +| test.py:19 | test_sanitizer | Attribute | externally controlled string | +| test.py:22 | test_sanitizer | Attribute | NO TAINT | +| test.py:25 | test_sanitizer | Subscript | NO TAINT | +| test.py:28 | test_sanitizer | Attribute | NO TAINT | +| test.py:31 | test_sanitizer | Attribute | NO TAINT | +| test.py:34 | test_sanitizer | Attribute | externally controlled string | +| test.py:44 | test_namedtuple | a | NO TAINT | +| test.py:44 | test_namedtuple | b | NO TAINT | +| test.py:44 | test_namedtuple | c | NO TAINT | +| test.py:44 | test_namedtuple | d | NO TAINT | diff --git a/python/ql/test/library-tests/taint/namedtuple/test.py b/python/ql/test/library-tests/taint/namedtuple/test.py index f3d0b778e1b3..ec6304f50733 100644 --- a/python/ql/test/library-tests/taint/namedtuple/test.py +++ b/python/ql/test/library-tests/taint/namedtuple/test.py @@ -16,6 +16,8 @@ def test_sanitizer(): tainted_string = TAINTED_STRING urlsplit_res = urlsplit(tainted_string) + test(urlsplit_res.netloc) # should be tainted + if urlsplit_res.netloc == "OK": test(urlsplit_res.netloc) From 771dfecf6d2658796fd00c10a3d44b53f9787ef8 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 26 Feb 2020 14:10:30 +0100 Subject: [PATCH 9/9] Python: Add sanitized edges for urlsplit test --- .../library-tests/taint/namedtuple/SanitizedEdges.expected | 7 +++++++ .../test/library-tests/taint/namedtuple/SanitizedEdges.ql | 6 ++++++ 2 files changed, 13 insertions(+) create mode 100644 python/ql/test/library-tests/taint/namedtuple/SanitizedEdges.expected create mode 100644 python/ql/test/library-tests/taint/namedtuple/SanitizedEdges.ql diff --git a/python/ql/test/library-tests/taint/namedtuple/SanitizedEdges.expected b/python/ql/test/library-tests/taint/namedtuple/SanitizedEdges.expected new file mode 100644 index 000000000000..0adf64dfd5d0 --- /dev/null +++ b/python/ql/test/library-tests/taint/namedtuple/SanitizedEdges.expected @@ -0,0 +1,7 @@ +| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:21 | Pi(urlsplit_res_0) [true] | +| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:24 | Pi(urlsplit_res_3) [true] | +| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:27 | Pi(urlsplit_res_6) [true] | +| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:30 | Pi(urlsplit_res_9) [true] | +| string equality sanitizer | externally controlled string | test.py:21 | Pi(urlsplit_res_0) [true] | +| string equality sanitizer | externally controlled string | test.py:24 | Pi(urlsplit_res_3) [true] | +| string equality sanitizer | externally controlled string | test.py:27 | Pi(urlsplit_res_6) [true] | diff --git a/python/ql/test/library-tests/taint/namedtuple/SanitizedEdges.ql b/python/ql/test/library-tests/taint/namedtuple/SanitizedEdges.ql new file mode 100644 index 000000000000..d523f79a9638 --- /dev/null +++ b/python/ql/test/library-tests/taint/namedtuple/SanitizedEdges.ql @@ -0,0 +1,6 @@ +import python +import Taint + +from Sanitizer s, TaintKind taint, PyEdgeRefinement test +where s.sanitizingEdge(taint, test) +select s, taint, test.getTest().getLocation().toString(), test.getRepresentation()