From c479a77d55ba58b98905bf291ac1579c398e2dda Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 23 Apr 2020 10:28:29 +0200 Subject: [PATCH 1/5] Python: Refactor ExternalFileObject to use field Instead of string matching. This brings it in line with what CollectionKind, SequenceKind, and DictKind does. --- python/ql/src/semmle/python/security/strings/External.qll | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/ql/src/semmle/python/security/strings/External.qll b/python/ql/src/semmle/python/security/strings/External.qll index 6a7261c4be3f..7ee41bd1ac49 100644 --- a/python/ql/src/semmle/python/security/strings/External.qll +++ b/python/ql/src/semmle/python/security/strings/External.qll @@ -183,10 +183,12 @@ private predicate urlparse(ControlFlowNode fromnode, CallNode tonode) { /** A kind of "taint", representing an open file-like object from an external source. */ class ExternalFileObject extends TaintKind { - ExternalFileObject() { this = "file[" + any(ExternalStringKind key) + "]" } + ExternalStringKind valueKind; + + ExternalFileObject() { this = "file[" + valueKind + "]" } /** Gets the taint kind for the contents of this file */ - TaintKind getValue() { this = "file[" + result + "]" } + TaintKind getValue() { result = valueKind } override TaintKind getTaintOfMethodResult(string name) { name = "read" and result = this.getValue() From 7385ea50242ead574028e6be0c42d4cf4115d664 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 23 Apr 2020 10:36:51 +0200 Subject: [PATCH 2/5] Python: Add tests for ExternalFileObject --- python/ql/test/library-tests/taint/strings/Taint.qll | 8 ++++++++ .../test/library-tests/taint/strings/TestStep.expected | 6 ++++++ .../library-tests/taint/strings/TestTaint.expected | 5 +++++ python/ql/test/library-tests/taint/strings/test.py | 10 ++++++++++ 4 files changed, 29 insertions(+) diff --git a/python/ql/test/library-tests/taint/strings/Taint.qll b/python/ql/test/library-tests/taint/strings/Taint.qll index 6c39426d1611..62dba92a45dc 100644 --- a/python/ql/test/library-tests/taint/strings/Taint.qll +++ b/python/ql/test/library-tests/taint/strings/Taint.qll @@ -34,3 +34,11 @@ class ExceptionInfoSource extends TaintSource { override string toString() { result = "Exception info source" } } + +class ExternalFileObjectSource extends TaintSource { + ExternalFileObjectSource() { this.(NameNode).getId() = "TAINTED_FILE" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalFileObject } + + override string toString() { result = "Tainted file source" } +} diff --git a/python/ql/test/library-tests/taint/strings/TestStep.expected b/python/ql/test/library-tests/taint/strings/TestStep.expected index 6efa8fb6edfb..af31aec81d99 100644 --- a/python/ql/test/library-tests/taint/strings/TestStep.expected +++ b/python/ql/test/library-tests/taint/strings/TestStep.expected @@ -62,6 +62,12 @@ | Taint externally controlled string | test.py:66 | test.py:66:22:66:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:68 | test.py:68:29:68:42 | tainted_string | | | Taint externally controlled string | test.py:67 | test.py:67:29:67:42 | tainted_string | | --> | Taint [externally controlled string] | test.py:67 | test.py:67:20:67:43 | urlsplit() | | | Taint externally controlled string | test.py:68 | test.py:68:29:68:42 | tainted_string | | --> | Taint [externally controlled string] | test.py:68 | test.py:68:20:68:43 | urlparse() | | +| Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:74 | test.py:74:9:74:20 | tainted_file | | +| Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:75 | test.py:75:9:75:20 | tainted_file | | +| Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:76 | test.py:76:9:76:20 | tainted_file | | +| Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:77 | test.py:77:9:77:20 | tainted_file | | +| Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:78 | test.py:78:27:78:38 | tainted_file | | +| Taint file[externally controlled string] | test.py:75 | test.py:75:9:75:20 | tainted_file | | --> | Taint externally controlled string | test.py:75 | test.py:75:9:75:27 | Attribute() | | | Taint json[externally controlled string] | test.py:6 | test.py:6:20:6:45 | Attribute() | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint externally controlled string | test.py:7 | test.py:7:9:7:25 | Subscript | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:25 | Subscript | | diff --git a/python/ql/test/library-tests/taint/strings/TestTaint.expected b/python/ql/test/library-tests/taint/strings/TestTaint.expected index ff7372a8ce67..3b428997caba 100644 --- a/python/ql/test/library-tests/taint/strings/TestTaint.expected +++ b/python/ql/test/library-tests/taint/strings/TestTaint.expected @@ -22,3 +22,8 @@ | test.py:58 | test_untrusted | res | externally controlled string | | test.py:69 | test_urlsplit_urlparse | urlparse_res | [externally controlled string] | | test.py:69 | test_urlsplit_urlparse | urlsplit_res | [externally controlled string] | +| test.py:74 | test_tainted_file | tainted_file | file[externally controlled string] | +| test.py:75 | test_tainted_file | Attribute() | externally controlled string | +| test.py:76 | test_tainted_file | Attribute() | NO TAINT | +| test.py:77 | test_tainted_file | Attribute() | NO TAINT | +| test.py:78 | test_tainted_file | ListComp | NO TAINT | diff --git a/python/ql/test/library-tests/taint/strings/test.py b/python/ql/test/library-tests/taint/strings/test.py index 207f52c807c3..0967e8335add 100644 --- a/python/ql/test/library-tests/taint/strings/test.py +++ b/python/ql/test/library-tests/taint/strings/test.py @@ -67,3 +67,13 @@ def test_urlsplit_urlparse(): urlsplit_res = urlsplit(tainted_string) urlparse_res = urlparse(tainted_string) test(urlsplit_res, urlparse_res) + +def test_tainted_file(): + tainted_file = TAINTED_FILE + test( + tainted_file, + tainted_file.read(), + tainted_file.readline(), + tainted_file.readlines(), + [line for line in tainted_file], + ) From 86630f1d6c390743c068de2d405d8f7b7dc7180e Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 23 Apr 2020 10:37:16 +0200 Subject: [PATCH 3/5] Python: Handle readline, readlines for ExternalFileObject --- python/ql/src/semmle/python/security/strings/External.qll | 4 +++- python/ql/test/library-tests/taint/strings/TestStep.expected | 2 ++ python/ql/test/library-tests/taint/strings/TestTaint.expected | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/ql/src/semmle/python/security/strings/External.qll b/python/ql/src/semmle/python/security/strings/External.qll index 7ee41bd1ac49..e25e58ecde7b 100644 --- a/python/ql/src/semmle/python/security/strings/External.qll +++ b/python/ql/src/semmle/python/security/strings/External.qll @@ -191,7 +191,9 @@ class ExternalFileObject extends TaintKind { TaintKind getValue() { result = valueKind } override TaintKind getTaintOfMethodResult(string name) { - name = "read" and result = this.getValue() + name in ["read", "readline"] and result = this.getValue() + or + name = "readlines" and result.(SequenceKind).getItem() = this.getValue() } } diff --git a/python/ql/test/library-tests/taint/strings/TestStep.expected b/python/ql/test/library-tests/taint/strings/TestStep.expected index af31aec81d99..b6e7852f3852 100644 --- a/python/ql/test/library-tests/taint/strings/TestStep.expected +++ b/python/ql/test/library-tests/taint/strings/TestStep.expected @@ -68,6 +68,8 @@ | Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:77 | test.py:77:9:77:20 | tainted_file | | | Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:78 | test.py:78:27:78:38 | tainted_file | | | Taint file[externally controlled string] | test.py:75 | test.py:75:9:75:20 | tainted_file | | --> | Taint externally controlled string | test.py:75 | test.py:75:9:75:27 | Attribute() | | +| Taint file[externally controlled string] | test.py:76 | test.py:76:9:76:20 | tainted_file | | --> | Taint externally controlled string | test.py:76 | test.py:76:9:76:31 | Attribute() | | +| Taint file[externally controlled string] | test.py:77 | test.py:77:9:77:20 | tainted_file | | --> | Taint [externally controlled string] | test.py:77 | test.py:77:9:77:32 | Attribute() | | | Taint json[externally controlled string] | test.py:6 | test.py:6:20:6:45 | Attribute() | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint externally controlled string | test.py:7 | test.py:7:9:7:25 | Subscript | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:25 | Subscript | | diff --git a/python/ql/test/library-tests/taint/strings/TestTaint.expected b/python/ql/test/library-tests/taint/strings/TestTaint.expected index 3b428997caba..6f8f86b1e951 100644 --- a/python/ql/test/library-tests/taint/strings/TestTaint.expected +++ b/python/ql/test/library-tests/taint/strings/TestTaint.expected @@ -24,6 +24,6 @@ | test.py:69 | test_urlsplit_urlparse | urlsplit_res | [externally controlled string] | | test.py:74 | test_tainted_file | tainted_file | file[externally controlled string] | | test.py:75 | test_tainted_file | Attribute() | externally controlled string | -| test.py:76 | test_tainted_file | Attribute() | NO TAINT | -| test.py:77 | test_tainted_file | Attribute() | NO TAINT | +| test.py:76 | test_tainted_file | Attribute() | externally controlled string | +| test.py:77 | test_tainted_file | Attribute() | [externally controlled string] | | test.py:78 | test_tainted_file | ListComp | NO TAINT | From 06edd076b639927dc4702930bb379a4787b138ba Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 23 Apr 2020 14:11:28 +0200 Subject: [PATCH 4/5] Python: Enable taint when iterating over ExternalFileObject --- python/ql/src/semmle/python/security/strings/External.qll | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/ql/src/semmle/python/security/strings/External.qll b/python/ql/src/semmle/python/security/strings/External.qll index e25e58ecde7b..701e5c5017af 100644 --- a/python/ql/src/semmle/python/security/strings/External.qll +++ b/python/ql/src/semmle/python/security/strings/External.qll @@ -195,6 +195,8 @@ class ExternalFileObject extends TaintKind { or name = "readlines" and result.(SequenceKind).getItem() = this.getValue() } + + override TaintKind getTaintForIteration() { result = this.getValue() } } /** From fe50811bbfd6a87cd5edf1e99738f1022901b0f7 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 23 Apr 2020 14:10:17 +0200 Subject: [PATCH 5/5] Python: In taint test, list comprehension => for loop Apparently they're not the same thing :( --- python/ql/test/library-tests/taint/strings/TestStep.expected | 4 +++- python/ql/test/library-tests/taint/strings/TestTaint.expected | 2 +- python/ql/test/library-tests/taint/strings/test.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python/ql/test/library-tests/taint/strings/TestStep.expected b/python/ql/test/library-tests/taint/strings/TestStep.expected index b6e7852f3852..cc391bb2f172 100644 --- a/python/ql/test/library-tests/taint/strings/TestStep.expected +++ b/python/ql/test/library-tests/taint/strings/TestStep.expected @@ -62,14 +62,16 @@ | Taint externally controlled string | test.py:66 | test.py:66:22:66:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:68 | test.py:68:29:68:42 | tainted_string | | | Taint externally controlled string | test.py:67 | test.py:67:29:67:42 | tainted_string | | --> | Taint [externally controlled string] | test.py:67 | test.py:67:20:67:43 | urlsplit() | | | Taint externally controlled string | test.py:68 | test.py:68:29:68:42 | tainted_string | | --> | Taint [externally controlled string] | test.py:68 | test.py:68:20:68:43 | urlparse() | | +| Taint externally controlled string | test.py:79 | test.py:79:5:79:29 | For | | --> | Taint externally controlled string | test.py:80 | test.py:80:14:80:17 | line | | | Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:74 | test.py:74:9:74:20 | tainted_file | | | Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:75 | test.py:75:9:75:20 | tainted_file | | | Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:76 | test.py:76:9:76:20 | tainted_file | | | Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:77 | test.py:77:9:77:20 | tainted_file | | -| Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:78 | test.py:78:27:78:38 | tainted_file | | +| Taint file[externally controlled string] | test.py:72 | test.py:72:20:72:31 | TAINTED_FILE | | --> | Taint file[externally controlled string] | test.py:79 | test.py:79:17:79:28 | tainted_file | | | Taint file[externally controlled string] | test.py:75 | test.py:75:9:75:20 | tainted_file | | --> | Taint externally controlled string | test.py:75 | test.py:75:9:75:27 | Attribute() | | | Taint file[externally controlled string] | test.py:76 | test.py:76:9:76:20 | tainted_file | | --> | Taint externally controlled string | test.py:76 | test.py:76:9:76:31 | Attribute() | | | Taint file[externally controlled string] | test.py:77 | test.py:77:9:77:20 | tainted_file | | --> | Taint [externally controlled string] | test.py:77 | test.py:77:9:77:32 | Attribute() | | +| Taint file[externally controlled string] | test.py:79 | test.py:79:17:79:28 | tainted_file | | --> | Taint externally controlled string | test.py:79 | test.py:79:5:79:29 | For | | | Taint json[externally controlled string] | test.py:6 | test.py:6:20:6:45 | Attribute() | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint externally controlled string | test.py:7 | test.py:7:9:7:25 | Subscript | | | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:25 | Subscript | | diff --git a/python/ql/test/library-tests/taint/strings/TestTaint.expected b/python/ql/test/library-tests/taint/strings/TestTaint.expected index 6f8f86b1e951..54340f2b1a74 100644 --- a/python/ql/test/library-tests/taint/strings/TestTaint.expected +++ b/python/ql/test/library-tests/taint/strings/TestTaint.expected @@ -26,4 +26,4 @@ | test.py:75 | test_tainted_file | Attribute() | externally controlled string | | test.py:76 | test_tainted_file | Attribute() | externally controlled string | | test.py:77 | test_tainted_file | Attribute() | [externally controlled string] | -| test.py:78 | test_tainted_file | ListComp | NO TAINT | +| test.py:80 | test_tainted_file | line | externally controlled string | diff --git a/python/ql/test/library-tests/taint/strings/test.py b/python/ql/test/library-tests/taint/strings/test.py index 0967e8335add..b2bd7d7d4e0e 100644 --- a/python/ql/test/library-tests/taint/strings/test.py +++ b/python/ql/test/library-tests/taint/strings/test.py @@ -75,5 +75,6 @@ def test_tainted_file(): tainted_file.read(), tainted_file.readline(), tainted_file.readlines(), - [line for line in tainted_file], ) + for line in tainted_file: + test(line)