From 4bfd55f1af6338689bbac86bb3b933bae75f8397 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Oct 2020 14:00:25 +0200 Subject: [PATCH 1/3] Python: Show problem with os.path modeling This is not a very good test for showing that we don't handle direct imports, but it was the best I had available without inventing something new. It's very fragile, since any of these would propagate taint (due to handling all `join` calls as if the qualifier was a string): ospath_alias.join(ts) ospath_alias.join(ts, "foo", "bar") But this test DOES serve the purpose of illustrating that my fix works :D --- .../defaultAdditionalTaintStep/TestTaint.expected | 3 ++- .../tainttracking/defaultAdditionalTaintStep/test_string.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected index 28dea1f38510..8c6debe56105 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected +++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected @@ -137,9 +137,10 @@ | test_string.py:143 | fail | binary_decode_encode | base64.decodestring(..) | | test_string.py:148 | fail | binary_decode_encode | quopri.encodestring(..) | | test_string.py:149 | fail | binary_decode_encode | quopri.decodestring(..) | -| test_string.py:158 | ok | test_os_path_join | os.path.join(..) | | test_string.py:159 | ok | test_os_path_join | os.path.join(..) | | test_string.py:160 | ok | test_os_path_join | os.path.join(..) | +| test_string.py:161 | ok | test_os_path_join | os.path.join(..) | +| test_string.py:162 | fail | test_os_path_join | ospath_alias.join(..) | | test_unpacking.py:16 | ok | unpacking | a | | test_unpacking.py:16 | ok | unpacking | b | | test_unpacking.py:16 | ok | unpacking | c | diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py index a7422f0ad4c2..5c80a8212f34 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py +++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py @@ -152,12 +152,14 @@ def binary_decode_encode(): def test_os_path_join(): import os + import os.path as ospath_alias print("\n# test_os_path_join") ts = TAINTED_STRING ensure_tainted( os.path.join(ts, "foo", "bar"), os.path.join(ts), os.path.join("foo", "bar", ts), + ospath_alias.join("foo", "bar", ts), ) From 76c9b8c49fcd17e38598e01684527f5b6a98e5b7 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Oct 2020 14:37:23 +0200 Subject: [PATCH 2/3] Python: Expose importNode instead of importModule/importMember Since predicate name `import` is not allowed, I adopted `importNode` as it sort of matches what `exprNode` does. --- Due to only using `importMember` in `os_attr` we previously didn't handle `import os.path as alias` :| I did creat a hotfix for this (https://github.com/github/codeql/pull/4446), but in doing so I realized the core of the problem: We're exposing ourselves to making these kinds of mistakes by having BOTH importModule and importMember, and we don't really gain anything from doing this! We do loose the ability to easily only modeling `from mod import val` and not `import mod.val`, but I don't think that will ever be relevant. This change will also make us to recognize some invalid code, for example in import os.system as runtime_error we would now model that `runtime_error` is a reference to the `os.system` function (although the actual import would result in a runtime error). Overall these are tradeoffs I'm willing to make, as it does makes things simpler from a QL modeling point of view, and THAT sounds nice :+1: --- .../dataflow/internal/DataFlowUtil.qll | 21 +------------------ .../semmle/python/frameworks/Flask.qll | 4 ++-- .../semmle/python/frameworks/Stdlib.qll | 14 ++++++------- .../import-helper/ImportHelper.expected | 7 ------- .../dataflow/import-helper/ImportHelper.ql | 6 +----- .../TestTaint.expected | 2 +- .../dataflow/typetracking/moduleattr.ql | 2 +- 7 files changed, 13 insertions(+), 43 deletions(-) diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll b/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll index 762ce7fb9218..4f198bae6821 100644 --- a/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll +++ b/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll @@ -36,10 +36,8 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) } * Example: If `mypkg/__init__.py` contains `foo = 42`, then `from mypkg import foo` will not import the module * `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo` * to refer to the module. - * - * Also see `DataFlow::importMember` */ -Node importModule(string name) { +Node importNode(string name) { exists(Variable var, Import imp, Alias alias | alias = imp.getAName() and alias.getAsname() = var.getAStore() and @@ -72,20 +70,3 @@ Node importModule(string name) { // reference to `foo.bar`, as desired. result.asCfgNode().getNode() = any(ImportExpr i | i.getAnImportedModuleName() = name) } - -/** - * Gets a EssaNode that holds the value imported by using fully qualified name in - *`from import `. - * - * Also see `DataFlow::importModule`. - */ -EssaNode importMember(string moduleName, string memberName) { - exists(Variable var, Import imp, Alias alias, ImportMember member | - alias = imp.getAName() and - member = alias.getValue() and - moduleName = member.getModule().(ImportExpr).getImportedModuleName() and - memberName = member.getName() and - alias.getAsname() = var.getAStore() and - result.getVar().(AssignmentDefinition).getSourceVariable() = var - ) -} diff --git a/python/ql/src/experimental/semmle/python/frameworks/Flask.qll b/python/ql/src/experimental/semmle/python/frameworks/Flask.qll index 5b4079331d70..371f3425470f 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Flask.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Flask.qll @@ -15,7 +15,7 @@ private module Flask { /** Gets a reference to the `flask` module. */ DataFlow::Node flask(DataFlow::TypeTracker t) { t.start() and - result = DataFlow::importModule("flask") + result = DataFlow::importNode("flask") or exists(DataFlow::TypeTracker t2 | result = flask(t2).track(t2, t)) } @@ -27,7 +27,7 @@ private module Flask { /** Gets a reference to the `flask.request` object. */ DataFlow::Node request(DataFlow::TypeTracker t) { t.start() and - result = DataFlow::importMember("flask", "request") + result = DataFlow::importNode("flask.request") or t.startInAttr("request") and result = flask() diff --git a/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll b/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll index 5fa36c67e094..20f17a709f0e 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll @@ -17,7 +17,7 @@ private module Stdlib { /** Gets a reference to the `os` module. */ private DataFlow::Node os(DataFlow::TypeTracker t) { t.start() and - result = DataFlow::importModule("os") + result = DataFlow::importNode("os") or exists(DataFlow::TypeTracker t2 | result = os(t2).track(t2, t)) } @@ -42,10 +42,10 @@ private module Stdlib { "path"] and ( t.start() and - result = DataFlow::importMember("os", attr_name) + result = DataFlow::importNode("os." + attr_name) or t.startInAttr(attr_name) and - result = DataFlow::importModule("os") + result = DataFlow::importNode("os") ) or // Due to bad performance when using normal setup with `os_attr(t2, attr_name).track(t2, t)` @@ -85,7 +85,7 @@ private module Stdlib { /** Gets a reference to the `os.path.join` function. */ private DataFlow::Node join(DataFlow::TypeTracker t) { t.start() and - result = DataFlow::importMember("os.path", "join") + result = DataFlow::importNode("os.path.join") or t.startInAttr("join") and result = os::path() @@ -190,7 +190,7 @@ private module Stdlib { /** Gets a reference to the `subprocess` module. */ private DataFlow::Node subprocess(DataFlow::TypeTracker t) { t.start() and - result = DataFlow::importModule("subprocess") + result = DataFlow::importNode("subprocess") or exists(DataFlow::TypeTracker t2 | result = subprocess(t2).track(t2, t)) } @@ -208,10 +208,10 @@ private module Stdlib { attr_name in ["Popen", "call", "check_call", "check_output", "run"] and ( t.start() and - result = DataFlow::importMember("subprocess", attr_name) + result = DataFlow::importNode("subprocess." + attr_name) or t.startInAttr(attr_name) and - result = DataFlow::importModule("subprocess") + result = DataFlow::importNode("subprocess") ) or // Due to bad performance when using normal setup with `subprocess_attr(t2, attr_name).track(t2, t)` diff --git a/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected b/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected index 2085902c6cfe..6ebece73b2f5 100644 --- a/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected +++ b/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected @@ -1,4 +1,3 @@ -importModule | test1.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg | | test1.py:1:8:1:12 | GSSA Variable mypkg | mypkg | | test2.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg | @@ -32,9 +31,3 @@ importModule | test7.py:5:8:5:16 | GSSA Variable mypkg | mypkg | | test7.py:9:6:9:10 | ControlFlowNode for ImportExpr | mypkg | | test7.py:9:19:9:21 | GSSA Variable foo | mypkg.foo | -importMember -| test2.py:1:19:1:21 | GSSA Variable foo | mypkg | foo | -| test2.py:1:24:1:26 | GSSA Variable bar | mypkg | bar | -| test5.py:9:26:9:29 | GSSA Variable _bar | mypkg | bar | -| test7.py:1:19:1:21 | GSSA Variable foo | mypkg | foo | -| test7.py:9:19:9:21 | GSSA Variable foo | mypkg | foo | diff --git a/python/ql/test/experimental/dataflow/import-helper/ImportHelper.ql b/python/ql/test/experimental/dataflow/import-helper/ImportHelper.ql index 68d794f16367..418f03494fd5 100644 --- a/python/ql/test/experimental/dataflow/import-helper/ImportHelper.ql +++ b/python/ql/test/experimental/dataflow/import-helper/ImportHelper.ql @@ -1,8 +1,4 @@ import python import experimental.dataflow.DataFlow -query predicate importModule(DataFlow::Node res, string name) { res = DataFlow::importModule(name) } - -query predicate importMember(DataFlow::Node res, string moduleName, string memberName) { - res = DataFlow::importMember(moduleName, memberName) -} +query predicate importNode(DataFlow::Node res, string name) { res = DataFlow::importNode(name) } diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected index 8c6debe56105..f43a7102fc6c 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected +++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected @@ -140,7 +140,7 @@ | test_string.py:159 | ok | test_os_path_join | os.path.join(..) | | test_string.py:160 | ok | test_os_path_join | os.path.join(..) | | test_string.py:161 | ok | test_os_path_join | os.path.join(..) | -| test_string.py:162 | fail | test_os_path_join | ospath_alias.join(..) | +| test_string.py:162 | ok | test_os_path_join | ospath_alias.join(..) | | test_unpacking.py:16 | ok | unpacking | a | | test_unpacking.py:16 | ok | unpacking | b | | test_unpacking.py:16 | ok | unpacking | c | diff --git a/python/ql/test/experimental/dataflow/typetracking/moduleattr.ql b/python/ql/test/experimental/dataflow/typetracking/moduleattr.ql index 15616d918609..8314b724724a 100644 --- a/python/ql/test/experimental/dataflow/typetracking/moduleattr.ql +++ b/python/ql/test/experimental/dataflow/typetracking/moduleattr.ql @@ -4,7 +4,7 @@ import experimental.dataflow.TypeTracker DataFlow::Node module_tracker(TypeTracker t) { t.start() and - result = DataFlow::importModule("module") + result = DataFlow::importNode("module") or exists(TypeTracker t2 | result = module_tracker(t2).track(t2, t)) } From 2c5996f6944a6ecc7f06d1caeea070365c41cbbd Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 13 Oct 2020 17:21:21 +0200 Subject: [PATCH 3/3] Python: Refactor subprocess_attr type-tracker Co-authored-by: Taus --- python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll b/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll index 20f17a709f0e..23869dc08242 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll @@ -211,7 +211,7 @@ private module Stdlib { result = DataFlow::importNode("subprocess." + attr_name) or t.startInAttr(attr_name) and - result = DataFlow::importNode("subprocess") + result = subprocess() ) or // Due to bad performance when using normal setup with `subprocess_attr(t2, attr_name).track(t2, t)`