From b11b5784b24b1ba74333d5a33b25b5727057d868 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 27 Aug 2020 16:08:51 +0200 Subject: [PATCH 01/10] Python: Adtop more complete tests from old dataflow impl The ones in test/experimental/dataflow/[consistency,regression]/test.py was a copy from test/library-tests/taint/dataflow/test.py. However, test/library-tests/taint/dataflow/test.py only contains a subset of test/library-tests/taint/config/test.py, that only contains a subset of test/library-tests/taint/general/test.py This commit updates the experimental dataflow tests to be a copy of the test/library-tests/taint/general/test.py file. There seems to have been a few changes to the file after it being copied, in `test_truth` and `test_early_exit`. I have no reproduced those changes. --- .../experimental/dataflow/consistency/test.py | 70 ++++++++++++++++++- .../experimental/dataflow/regression/test.py | 70 ++++++++++++++++++- 2 files changed, 136 insertions(+), 4 deletions(-) diff --git a/python/ql/test/experimental/dataflow/consistency/test.py b/python/ql/test/experimental/dataflow/consistency/test.py index 22c5e2e0fc79..447af8483f78 100644 --- a/python/ql/test/experimental/dataflow/consistency/test.py +++ b/python/ql/test/experimental/dataflow/consistency/test.py @@ -104,6 +104,26 @@ def test16(): t = module.dangerous_func() SINK(t) +class C(object): pass + +def x_sink(arg): + SINK(arg.x) + +def test17(): + t = C() + t.x = module.dangerous + SINK(t.x) + +def test18(): + t = C() + t.x = module.dangerous + t = hub(t) + x_sink(t) + +def test19(): + t = CUSTOM_SOURCE + t = hub(TAINT_FROM_ARG(t)) + CUSTOM_SINK(t) def test20(cond): if cond: @@ -163,9 +183,55 @@ def test_truth(): if t: SINK(t) else: - SINK(t) # Regression: FP here + SINK(t) + if not t: + SINK(t) + else: + SINK(t) + +def test_early_exit(): + t = FALSEY if not t: - SINK(t) # Regression: FP here + return + t + +def flow_through_type_test_if_no_class(): + t = SOURCE + if isinstance(t, str): + SINK(t) else: SINK(t) +def flow_in_iteration(): + t = ITERABLE_SOURCE + for i in t: + i + return i + +def flow_in_generator(): + seq = [SOURCE] + for i in seq: + yield i + +def flow_from_generator(): + for x in flow_in_generator(): + SINK(x) + +def const_eq_clears_taint(): + tainted = SOURCE + if tainted == "safe": + SINK(tainted) # safe + SINK(tainted) # unsafe + +def const_eq_clears_taint2(): + tainted = SOURCE + if tainted != "safe": + return + SINK(tainted) # safe + +def non_const_eq_preserves_taint(x): + tainted = SOURCE + if tainted == tainted: + SINK(tainted) # unsafe + if tainted == x: + SINK(tainted) # unsafe diff --git a/python/ql/test/experimental/dataflow/regression/test.py b/python/ql/test/experimental/dataflow/regression/test.py index 233c010a5bae..08d2597e99e2 100644 --- a/python/ql/test/experimental/dataflow/regression/test.py +++ b/python/ql/test/experimental/dataflow/regression/test.py @@ -100,6 +100,26 @@ def test16(): t = module.dangerous_func() SINK(t) +class C(object): pass + +def x_sink(arg): + SINK(arg.x) + +def test17(): + t = C() + t.x = module.dangerous + SINK(t.x) + +def test18(): + t = C() + t.x = module.dangerous + t = hub(t) + x_sink(t) + +def test19(): + t = CUSTOM_SOURCE + t = hub(TAINT_FROM_ARG(t)) + CUSTOM_SINK(t) def test20(cond): if cond: @@ -159,9 +179,55 @@ def test_truth(): if t: SINK(t) else: - SINK(t) # Regression: FP here + SINK(t) + if not t: + SINK(t) + else: + SINK(t) + +def test_early_exit(): + t = FALSEY if not t: - SINK(t) # Regression: FP here + return + t + +def flow_through_type_test_if_no_class(): + t = SOURCE + if isinstance(t, str): + SINK(t) else: SINK(t) +def flow_in_iteration(): + t = ITERABLE_SOURCE + for i in t: + i + return i + +def flow_in_generator(): + seq = [SOURCE] + for i in seq: + yield i + +def flow_from_generator(): + for x in flow_in_generator(): + SINK(x) + +def const_eq_clears_taint(): + tainted = SOURCE + if tainted == "safe": + SINK(tainted) # safe + SINK(tainted) # unsafe + +def const_eq_clears_taint2(): + tainted = SOURCE + if tainted != "safe": + return + SINK(tainted) # safe + +def non_const_eq_preserves_taint(x): + tainted = SOURCE + if tainted == tainted: + SINK(tainted) # unsafe + if tainted == x: + SINK(tainted) # unsafe From f1e11f1efd4ae23833cbc7587598d39045e981e1 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 27 Aug 2020 16:17:12 +0200 Subject: [PATCH 02/10] Python: updated expected output from new shared dataflow tests I did not verify whether these changes are OK or not, simply ran and accepted the tests. --- .../consistency/dataflow-consistency.expected | 84 ++++++++++++++----- .../dataflow/regression/dataflow.expected | 21 +++-- 2 files changed, 76 insertions(+), 29 deletions(-) diff --git a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected index 1d9e912351a5..84c9a1cf39d9 100644 --- a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected +++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected @@ -1,8 +1,16 @@ uniqueEnclosingCallable | test.py:0:0:0:0 | GSSA Variable __name__ | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable __package__ | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable const_eq_clears_taint | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable const_eq_clears_taint2 | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable flow_from_generator | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable flow_in_generator | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable flow_in_iteration | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable flow_through_type_test_if_no_class | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable non_const_eq_preserves_taint | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable test23 | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable test24 | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable test_early_exit | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. | | test.py:6:1:6:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | @@ -51,25 +59,51 @@ uniqueEnclosingCallable | test.py:99:5:99:10 | GSSA Variable test15 | Node should have one enclosing callable but has 0. | | test.py:103:1:103:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | | test.py:103:5:103:10 | GSSA Variable test16 | Node should have one enclosing callable but has 0. | -| test.py:108:1:108:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:108:5:108:10 | GSSA Variable test20 | Node should have one enclosing callable but has 0. | -| test.py:118:1:118:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:118:5:118:10 | GSSA Variable test21 | Node should have one enclosing callable but has 0. | +| test.py:107:1:107:16 | ControlFlowNode for ClassExpr | Node should have one enclosing callable but has 0. | +| test.py:107:7:107:7 | GSSA Variable C | Node should have one enclosing callable but has 0. | +| test.py:109:1:109:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:109:5:109:10 | GSSA Variable x_sink | Node should have one enclosing callable but has 0. | +| test.py:112:1:112:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:112:5:112:10 | GSSA Variable test17 | Node should have one enclosing callable but has 0. | +| test.py:117:1:117:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:117:5:117:10 | GSSA Variable test18 | Node should have one enclosing callable but has 0. | +| test.py:123:1:123:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:123:5:123:10 | GSSA Variable test19 | Node should have one enclosing callable but has 0. | | test.py:128:1:128:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:128:5:128:10 | GSSA Variable test22 | Node should have one enclosing callable but has 0. | -| test.py:139:20:139:38 | ControlFlowNode for ImportMember | Node should have one enclosing callable but has 0. | -| test.py:139:33:139:38 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. | -| test.py:140:1:140:12 | ControlFlowNode for SINK() | Node should have one enclosing callable but has 0. | -| test.py:140:1:140:12 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. | -| test.py:140:6:140:11 | ControlFlowNode for unsafe | Node should have one enclosing callable but has 0. | -| test.py:142:1:142:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:142:5:142:10 | GSSA Variable test23 | Node should have one enclosing callable but has 0. | -| test.py:146:1:146:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:146:5:146:10 | GSSA Variable test24 | Node should have one enclosing callable but has 0. | -| test.py:151:1:151:29 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:151:5:151:22 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. | -| test.py:161:1:161:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:161:5:161:14 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. | +| test.py:128:5:128:10 | GSSA Variable test20 | Node should have one enclosing callable but has 0. | +| test.py:138:1:138:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:138:5:138:10 | GSSA Variable test21 | Node should have one enclosing callable but has 0. | +| test.py:148:1:148:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:148:5:148:10 | GSSA Variable test22 | Node should have one enclosing callable but has 0. | +| test.py:159:20:159:38 | ControlFlowNode for ImportMember | Node should have one enclosing callable but has 0. | +| test.py:159:33:159:38 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. | +| test.py:160:1:160:12 | ControlFlowNode for SINK() | Node should have one enclosing callable but has 0. | +| test.py:160:1:160:12 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. | +| test.py:160:6:160:11 | ControlFlowNode for unsafe | Node should have one enclosing callable but has 0. | +| test.py:162:1:162:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:162:5:162:10 | GSSA Variable test23 | Node should have one enclosing callable but has 0. | +| test.py:166:1:166:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:166:5:166:10 | GSSA Variable test24 | Node should have one enclosing callable but has 0. | +| test.py:171:1:171:29 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:171:5:171:22 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. | +| test.py:181:1:181:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:181:5:181:14 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. | +| test.py:192:1:192:22 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:192:5:192:19 | GSSA Variable test_early_exit | Node should have one enclosing callable but has 0. | +| test.py:198:1:198:41 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:198:5:198:38 | GSSA Variable flow_through_type_test_if_no_class | Node should have one enclosing callable but has 0. | +| test.py:205:1:205:24 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:205:5:205:21 | GSSA Variable flow_in_iteration | Node should have one enclosing callable but has 0. | +| test.py:211:1:211:24 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:211:5:211:21 | GSSA Variable flow_in_generator | Node should have one enclosing callable but has 0. | +| test.py:216:1:216:26 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:216:5:216:23 | GSSA Variable flow_from_generator | Node should have one enclosing callable but has 0. | +| test.py:220:1:220:28 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:220:5:220:25 | GSSA Variable const_eq_clears_taint | Node should have one enclosing callable but has 0. | +| test.py:226:1:226:29 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:226:5:226:26 | GSSA Variable const_eq_clears_taint2 | Node should have one enclosing callable but has 0. | +| test.py:232:1:232:36 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:232:5:232:32 | GSSA Variable non_const_eq_preserves_taint | Node should have one enclosing callable but has 0. | uniqueType uniqueNodeLocation missingLocation @@ -86,8 +120,9 @@ uniquePostUpdate postIsInSameCallable reverseRead storeIsPostUpdate -| test.py:152:9:152:16 | ControlFlowNode for List | Store targets should be PostUpdateNodes. | -| test.py:153:9:153:24 | ControlFlowNode for Dict | Store targets should be PostUpdateNodes. | +| test.py:172:9:172:16 | ControlFlowNode for List | Store targets should be PostUpdateNodes. | +| test.py:173:9:173:24 | ControlFlowNode for Dict | Store targets should be PostUpdateNodes. | +| test.py:212:11:212:18 | ControlFlowNode for List | Store targets should be PostUpdateNodes. | argHasPostUpdate | test.py:25:10:25:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | | test.py:29:10:29:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | @@ -101,5 +136,10 @@ argHasPostUpdate | test.py:74:17:74:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | | test.py:81:13:81:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | | test.py:86:13:86:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | -| test.py:158:15:158:15 | ControlFlowNode for l | ArgumentNode is missing PostUpdateNode. | -| test.py:159:15:159:15 | ControlFlowNode for d | ArgumentNode is missing PostUpdateNode. | +| test.py:120:13:120:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | +| test.py:121:12:121:12 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | +| test.py:125:13:125:29 | ControlFlowNode for TAINT_FROM_ARG() | ArgumentNode is missing PostUpdateNode. | +| test.py:178:15:178:15 | ControlFlowNode for l | ArgumentNode is missing PostUpdateNode. | +| test.py:179:15:179:15 | ControlFlowNode for d | ArgumentNode is missing PostUpdateNode. | +| test.py:200:19:200:19 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | +| test.py:200:22:200:24 | ControlFlowNode for str | ArgumentNode is missing PostUpdateNode. | diff --git a/python/ql/test/experimental/dataflow/regression/dataflow.expected b/python/ql/test/experimental/dataflow/regression/dataflow.expected index 0d5dca4d4d13..396c74b3b5d5 100644 --- a/python/ql/test/experimental/dataflow/regression/dataflow.expected +++ b/python/ql/test/experimental/dataflow/regression/dataflow.expected @@ -7,10 +7,17 @@ | test.py:62:13:62:18 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg | | test.py:67:13:67:18 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg | | test.py:76:9:76:14 | ControlFlowNode for SOURCE | test.py:78:10:78:10 | ControlFlowNode for t | -| test.py:108:13:108:18 | ControlFlowNode for SOURCE | test.py:112:14:112:14 | ControlFlowNode for t | -| test.py:139:10:139:15 | ControlFlowNode for SOURCE | test.py:140:14:140:14 | ControlFlowNode for t | -| test.py:143:9:143:14 | ControlFlowNode for SOURCE | test.py:145:10:145:10 | ControlFlowNode for s | -| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:160:14:160:14 | ControlFlowNode for t | -| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:162:14:162:14 | ControlFlowNode for t | -| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:164:14:164:14 | ControlFlowNode for t | -| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:166:14:166:14 | ControlFlowNode for t | +| test.py:128:13:128:18 | ControlFlowNode for SOURCE | test.py:132:14:132:14 | ControlFlowNode for t | +| test.py:159:10:159:15 | ControlFlowNode for SOURCE | test.py:160:14:160:14 | ControlFlowNode for t | +| test.py:163:9:163:14 | ControlFlowNode for SOURCE | test.py:165:10:165:10 | ControlFlowNode for s | +| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:180:14:180:14 | ControlFlowNode for t | +| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:182:14:182:14 | ControlFlowNode for t | +| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:184:14:184:14 | ControlFlowNode for t | +| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:186:14:186:14 | ControlFlowNode for t | +| test.py:195:9:195:14 | ControlFlowNode for SOURCE | test.py:197:14:197:14 | ControlFlowNode for t | +| test.py:195:9:195:14 | ControlFlowNode for SOURCE | test.py:199:14:199:14 | ControlFlowNode for t | +| test.py:217:15:217:20 | ControlFlowNode for SOURCE | test.py:219:14:219:20 | ControlFlowNode for tainted | +| test.py:217:15:217:20 | ControlFlowNode for SOURCE | test.py:220:10:220:16 | ControlFlowNode for tainted | +| test.py:223:15:223:20 | ControlFlowNode for SOURCE | test.py:226:10:226:16 | ControlFlowNode for tainted | +| test.py:229:15:229:20 | ControlFlowNode for SOURCE | test.py:231:14:231:20 | ControlFlowNode for tainted | +| test.py:229:15:229:20 | ControlFlowNode for SOURCE | test.py:233:14:233:20 | ControlFlowNode for tainted | From 654c4f39ac0283c8092346bfa1a688fdb2149bfa Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 27 Aug 2020 16:32:26 +0200 Subject: [PATCH 03/10] Python: Add missing `module.py` to consistency/regression tests --- .../dataflow/consistency/dataflow-consistency.expected | 8 ++++++++ .../test/experimental/dataflow/consistency/module.py | 10 ++++++++++ .../experimental/dataflow/regression/dataflow.expected | 1 + .../ql/test/experimental/dataflow/regression/module.py | 10 ++++++++++ 4 files changed, 29 insertions(+) create mode 100644 python/ql/test/experimental/dataflow/consistency/module.py create mode 100644 python/ql/test/experimental/dataflow/regression/module.py diff --git a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected index 84c9a1cf39d9..36afae405e04 100644 --- a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected +++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected @@ -1,4 +1,12 @@ uniqueEnclosingCallable +| module.py:1:1:1:9 | GSSA Variable dangerous | Node should have one enclosing callable but has 0. | +| module.py:1:13:1:18 | ControlFlowNode for SOURCE | Node should have one enclosing callable but has 0. | +| module.py:2:1:2:4 | GSSA Variable safe | Node should have one enclosing callable but has 0. | +| module.py:2:8:2:13 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. | +| module.py:5:1:5:21 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| module.py:5:5:5:18 | GSSA Variable dangerous_func | Node should have one enclosing callable but has 0. | +| module.py:10:1:10:5 | GSSA Variable safe2 | Node should have one enclosing callable but has 0. | +| module.py:10:9:10:14 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable __name__ | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable __package__ | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable const_eq_clears_taint | Node should have one enclosing callable but has 0. | diff --git a/python/ql/test/experimental/dataflow/consistency/module.py b/python/ql/test/experimental/dataflow/consistency/module.py new file mode 100644 index 000000000000..7766a61fa180 --- /dev/null +++ b/python/ql/test/experimental/dataflow/consistency/module.py @@ -0,0 +1,10 @@ +dangerous = SOURCE +safe = "safe" + + +def dangerous_func(): + return SOURCE + + +safe2 = SOURCE +safe2 = "safe" diff --git a/python/ql/test/experimental/dataflow/regression/dataflow.expected b/python/ql/test/experimental/dataflow/regression/dataflow.expected index 396c74b3b5d5..2281a9011427 100644 --- a/python/ql/test/experimental/dataflow/regression/dataflow.expected +++ b/python/ql/test/experimental/dataflow/regression/dataflow.expected @@ -1,3 +1,4 @@ +| module.py:6:12:6:17 | ControlFlowNode for SOURCE | test.py:101:10:101:10 | ControlFlowNode for t | | test.py:3:10:3:15 | ControlFlowNode for SOURCE | test.py:3:10:3:15 | ControlFlowNode for SOURCE | | test.py:6:9:6:14 | ControlFlowNode for SOURCE | test.py:7:10:7:10 | ControlFlowNode for s | | test.py:10:12:10:17 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg | diff --git a/python/ql/test/experimental/dataflow/regression/module.py b/python/ql/test/experimental/dataflow/regression/module.py new file mode 100644 index 000000000000..7766a61fa180 --- /dev/null +++ b/python/ql/test/experimental/dataflow/regression/module.py @@ -0,0 +1,10 @@ +dangerous = SOURCE +safe = "safe" + + +def dangerous_func(): + return SOURCE + + +safe2 = SOURCE +safe2 = "safe" From 5f3eda0a22acde7be7edf6f05b70653ccf715dfa Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 31 Aug 2020 09:06:13 +0200 Subject: [PATCH 04/10] Python: Annotate test file Also add test of custom flow --- .../regression/custom_dataflow.expected | 1 + .../dataflow/regression/custom_dataflow.ql | 29 +++++++++++ .../experimental/dataflow/regression/test.py | 48 +++++++++---------- 3 files changed, 54 insertions(+), 24 deletions(-) create mode 100644 python/ql/test/experimental/dataflow/regression/custom_dataflow.expected create mode 100644 python/ql/test/experimental/dataflow/regression/custom_dataflow.ql diff --git a/python/ql/test/experimental/dataflow/regression/custom_dataflow.expected b/python/ql/test/experimental/dataflow/regression/custom_dataflow.expected new file mode 100644 index 000000000000..0ae109d52aea --- /dev/null +++ b/python/ql/test/experimental/dataflow/regression/custom_dataflow.expected @@ -0,0 +1 @@ +| test.py:126:13:126:25 | ControlFlowNode for CUSTOM_SOURCE | test.py:130:21:130:21 | ControlFlowNode for t | diff --git a/python/ql/test/experimental/dataflow/regression/custom_dataflow.ql b/python/ql/test/experimental/dataflow/regression/custom_dataflow.ql new file mode 100644 index 000000000000..5644425a7012 --- /dev/null +++ b/python/ql/test/experimental/dataflow/regression/custom_dataflow.ql @@ -0,0 +1,29 @@ +/** + * This query is meant to catch the flows from `CUSTOM_SOURCE` to `CUSTOM_SINK`. + * + * This should be compared to + * python/ql/test/library-tests/taint/dataflow/Dataflow.ql + * A first goal is to have identical results; after that we + * hope to remove the false positive. + */ + +import experimental.dataflow.DataFlow + +class CustomTestConfiguration extends DataFlow::Configuration { + CustomTestConfiguration() { this = "CustomTestConfiguration" } + + override predicate isSource(DataFlow::Node node) { + node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "CUSTOM_SOURCE" + } + + override predicate isSink(DataFlow::Node node) { + exists(CallNode call | + call.getFunction().(NameNode).getId() in ["CUSTOM_SINK", "CUSTOM_SINK_F"] and + node.(DataFlow::CfgNode).getNode() = call.getAnArg() + ) + } +} + +from DataFlow::Node source, DataFlow::Node sink +where exists(CustomTestConfiguration cfg | cfg.hasFlow(source, sink)) +select source, sink diff --git a/python/ql/test/experimental/dataflow/regression/test.py b/python/ql/test/experimental/dataflow/regression/test.py index 08d2597e99e2..dc19f6b9e785 100644 --- a/python/ql/test/experimental/dataflow/regression/test.py +++ b/python/ql/test/experimental/dataflow/regression/test.py @@ -30,7 +30,7 @@ def test6(cond): else: t = SOURCE if cond: - SINK(t) + SINK_F(t) def test7(cond): if cond: @@ -50,7 +50,7 @@ def sink3(cond, arg): if cond: sink(arg) -def test8(cond): +def test8(cond): # This flow is shadowed by previous tests, perhaps do a path query t = source2() sink2(t) @@ -80,21 +80,21 @@ def test11(): def test12(): t = "safe" t = hub(t) - SINK(t) + SINK_F(t) import module def test13(): t = module.dangerous - SINK(t) + SINK(t) # Flow not found def test14(): t = module.safe - SINK(t) + SINK_F(t) def test15(): t = module.safe2 - SINK(t) + SINK_F(t) def test16(): t = module.dangerous_func() @@ -108,13 +108,13 @@ def x_sink(arg): def test17(): t = C() t.x = module.dangerous - SINK(t.x) + SINK(t.x) # Flow not found def test18(): t = C() t.x = module.dangerous t = hub(t) - x_sink(t) + x_sink(t) # Flow not found def test19(): t = CUSTOM_SOURCE @@ -137,23 +137,23 @@ def test21(cond): else: t = SOURCE if not cond: - CUSTOM_SINK(t) + CUSTOM_SINK_F(t) else: - SINK(t) + SINK_F(t) def test22(cond): if cond: t = CUSTOM_SOURCE else: t = SOURCE - t = TAINT_FROM_ARG(t) + t = TAINT_FROM_ARG(t) # Blocks data flow if cond: CUSTOM_SINK(t) else: SINK(t) from module import dangerous as unsafe -SINK(unsafe) +SINK(unsafe) # Flow not found def test23(): with SOURCE as t: @@ -161,16 +161,16 @@ def test23(): def test24(): s = SOURCE - SANITIZE(s) - SINK(s) + SANITIZE(s) # Does not block data flow + SINK_F(s) def test_update_extend(x, y): l = [SOURCE] d = {"key" : SOURCE} x.extend(l) y.update(d) - SINK(x[0]) - SINK(y["key"]) + SINK(x[0]) # Flow not found + SINK(y["key"]) # Flow not found l2 = list(l) d2 = dict(d) @@ -179,9 +179,9 @@ def test_truth(): if t: SINK(t) else: - SINK(t) + SINK_F(t) # False positive if not t: - SINK(t) + SINK_F(t) # False positive else: SINK(t) @@ -194,12 +194,12 @@ def test_early_exit(): def flow_through_type_test_if_no_class(): t = SOURCE if isinstance(t, str): - SINK(t) + SINK(t) # Flows's both here.. else: - SINK(t) + SINK(t) # ..and here def flow_in_iteration(): - t = ITERABLE_SOURCE + t = ITERABLE_SOURCE # Seems to not be sunk anywhere for i in t: i return i @@ -211,19 +211,19 @@ def flow_in_generator(): def flow_from_generator(): for x in flow_in_generator(): - SINK(x) + SINK(x) # Flow not found def const_eq_clears_taint(): tainted = SOURCE if tainted == "safe": - SINK(tainted) # safe + SINK(tainted) # safe # FP SINK(tainted) # unsafe def const_eq_clears_taint2(): tainted = SOURCE if tainted != "safe": return - SINK(tainted) # safe + SINK(tainted) # safe # FP def non_const_eq_preserves_taint(x): tainted = SOURCE From 4977790617e3c947852f7f7cb72e8c4f617b7eb8 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Sep 2020 13:46:47 +0200 Subject: [PATCH 05/10] Python: dataflow regression tests: fix source2 --- .../test/experimental/dataflow/regression/dataflow.expected | 6 +++--- python/ql/test/experimental/dataflow/regression/test.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/ql/test/experimental/dataflow/regression/dataflow.expected b/python/ql/test/experimental/dataflow/regression/dataflow.expected index 2281a9011427..d573ec5d17f1 100644 --- a/python/ql/test/experimental/dataflow/regression/dataflow.expected +++ b/python/ql/test/experimental/dataflow/regression/dataflow.expected @@ -10,10 +10,10 @@ | test.py:76:9:76:14 | ControlFlowNode for SOURCE | test.py:78:10:78:10 | ControlFlowNode for t | | test.py:128:13:128:18 | ControlFlowNode for SOURCE | test.py:132:14:132:14 | ControlFlowNode for t | | test.py:159:10:159:15 | ControlFlowNode for SOURCE | test.py:160:14:160:14 | ControlFlowNode for t | -| test.py:163:9:163:14 | ControlFlowNode for SOURCE | test.py:165:10:165:10 | ControlFlowNode for s | +| test.py:163:9:163:14 | ControlFlowNode for SOURCE | test.py:165:12:165:12 | ControlFlowNode for s | | test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:180:14:180:14 | ControlFlowNode for t | -| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:182:14:182:14 | ControlFlowNode for t | -| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:184:14:184:14 | ControlFlowNode for t | +| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:182:16:182:16 | ControlFlowNode for t | +| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:184:16:184:16 | ControlFlowNode for t | | test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:186:14:186:14 | ControlFlowNode for t | | test.py:195:9:195:14 | ControlFlowNode for SOURCE | test.py:197:14:197:14 | ControlFlowNode for t | | test.py:195:9:195:14 | ControlFlowNode for SOURCE | test.py:199:14:199:14 | ControlFlowNode for t | diff --git a/python/ql/test/experimental/dataflow/regression/test.py b/python/ql/test/experimental/dataflow/regression/test.py index dc19f6b9e785..dbfc6485f4e7 100644 --- a/python/ql/test/experimental/dataflow/regression/test.py +++ b/python/ql/test/experimental/dataflow/regression/test.py @@ -40,8 +40,8 @@ def test7(cond): if cond: SINK(t) -def source2(arg): - return source(arg) +def source2(): + return source() def sink2(arg): sink(arg) @@ -50,7 +50,7 @@ def sink3(cond, arg): if cond: sink(arg) -def test8(cond): # This flow is shadowed by previous tests, perhaps do a path query +def test8(cond): t = source2() sink2(t) From 552637a446698aa16e441e3d646687dba17b3f42 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Sep 2020 13:50:24 +0200 Subject: [PATCH 06/10] Python: dataflow regression tests: fix flow_in_iteration --- python/ql/test/experimental/dataflow/regression/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/test/experimental/dataflow/regression/test.py b/python/ql/test/experimental/dataflow/regression/test.py index dbfc6485f4e7..0908bb557499 100644 --- a/python/ql/test/experimental/dataflow/regression/test.py +++ b/python/ql/test/experimental/dataflow/regression/test.py @@ -201,8 +201,8 @@ def flow_through_type_test_if_no_class(): def flow_in_iteration(): t = ITERABLE_SOURCE # Seems to not be sunk anywhere for i in t: - i - return i + SINK(i) + SINK(i) def flow_in_generator(): seq = [SOURCE] From bf3a266f581ccbe14497dc50b7ecd85a243c1890 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Sep 2020 13:51:00 +0200 Subject: [PATCH 07/10] Python: dataflow regression tests: remove taint tracking tests they will be reintroduced in an other PR --- .../experimental/dataflow/regression/test.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/python/ql/test/experimental/dataflow/regression/test.py b/python/ql/test/experimental/dataflow/regression/test.py index 0908bb557499..40fe30c8499f 100644 --- a/python/ql/test/experimental/dataflow/regression/test.py +++ b/python/ql/test/experimental/dataflow/regression/test.py @@ -212,22 +212,3 @@ def flow_in_generator(): def flow_from_generator(): for x in flow_in_generator(): SINK(x) # Flow not found - -def const_eq_clears_taint(): - tainted = SOURCE - if tainted == "safe": - SINK(tainted) # safe # FP - SINK(tainted) # unsafe - -def const_eq_clears_taint2(): - tainted = SOURCE - if tainted != "safe": - return - SINK(tainted) # safe # FP - -def non_const_eq_preserves_taint(x): - tainted = SOURCE - if tainted == tainted: - SINK(tainted) # unsafe - if tainted == x: - SINK(tainted) # unsafe From b958c3b833706e4344cc0640e673079d2ffabae0 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Sep 2020 11:13:32 +0200 Subject: [PATCH 08/10] Python: Update comment for test8 --- python/ql/test/experimental/dataflow/regression/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/test/experimental/dataflow/regression/test.py b/python/ql/test/experimental/dataflow/regression/test.py index 40fe30c8499f..fcf7573b452a 100644 --- a/python/ql/test/experimental/dataflow/regression/test.py +++ b/python/ql/test/experimental/dataflow/regression/test.py @@ -50,7 +50,7 @@ def sink3(cond, arg): if cond: sink(arg) -def test8(cond): +def test8(cond): # This test currently adds nothing, as we only track SOURCE -> SINK, and previous tests already add flow from line 10 to line 13 t = source2() sink2(t) From aad51af4cea6aad2e56cf17fc7faa4c7378ebb69 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 3 Sep 2020 11:25:41 +0200 Subject: [PATCH 09/10] Python: use concrete iterable source --- .../test/experimental/dataflow/regression/dataflow.expected | 5 ----- python/ql/test/experimental/dataflow/regression/test.py | 6 +++--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/python/ql/test/experimental/dataflow/regression/dataflow.expected b/python/ql/test/experimental/dataflow/regression/dataflow.expected index d573ec5d17f1..e7b3b140e505 100644 --- a/python/ql/test/experimental/dataflow/regression/dataflow.expected +++ b/python/ql/test/experimental/dataflow/regression/dataflow.expected @@ -17,8 +17,3 @@ | test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:186:14:186:14 | ControlFlowNode for t | | test.py:195:9:195:14 | ControlFlowNode for SOURCE | test.py:197:14:197:14 | ControlFlowNode for t | | test.py:195:9:195:14 | ControlFlowNode for SOURCE | test.py:199:14:199:14 | ControlFlowNode for t | -| test.py:217:15:217:20 | ControlFlowNode for SOURCE | test.py:219:14:219:20 | ControlFlowNode for tainted | -| test.py:217:15:217:20 | ControlFlowNode for SOURCE | test.py:220:10:220:16 | ControlFlowNode for tainted | -| test.py:223:15:223:20 | ControlFlowNode for SOURCE | test.py:226:10:226:16 | ControlFlowNode for tainted | -| test.py:229:15:229:20 | ControlFlowNode for SOURCE | test.py:231:14:231:20 | ControlFlowNode for tainted | -| test.py:229:15:229:20 | ControlFlowNode for SOURCE | test.py:233:14:233:20 | ControlFlowNode for tainted | diff --git a/python/ql/test/experimental/dataflow/regression/test.py b/python/ql/test/experimental/dataflow/regression/test.py index fcf7573b452a..dbcfd4c4584e 100644 --- a/python/ql/test/experimental/dataflow/regression/test.py +++ b/python/ql/test/experimental/dataflow/regression/test.py @@ -199,10 +199,10 @@ def flow_through_type_test_if_no_class(): SINK(t) # ..and here def flow_in_iteration(): - t = ITERABLE_SOURCE # Seems to not be sunk anywhere + t = [SOURCE] for i in t: - SINK(i) - SINK(i) + SINK(i) # Flow not found + SINK(i) # Flow not found def flow_in_generator(): seq = [SOURCE] From 29bf98ad26016d774a78a7d48d5833752236adf1 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Sep 2020 15:03:53 +0200 Subject: [PATCH 10/10] Python: Fix CUSTOM_SOURCE dataflow regression test --- .../test/experimental/dataflow/regression/custom_dataflow.ql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/ql/test/experimental/dataflow/regression/custom_dataflow.ql b/python/ql/test/experimental/dataflow/regression/custom_dataflow.ql index 5644425a7012..ac154d77f199 100644 --- a/python/ql/test/experimental/dataflow/regression/custom_dataflow.ql +++ b/python/ql/test/experimental/dataflow/regression/custom_dataflow.ql @@ -7,19 +7,20 @@ * hope to remove the false positive. */ +import python import experimental.dataflow.DataFlow class CustomTestConfiguration extends DataFlow::Configuration { CustomTestConfiguration() { this = "CustomTestConfiguration" } override predicate isSource(DataFlow::Node node) { - node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "CUSTOM_SOURCE" + node.asCfgNode().(NameNode).getId() = "CUSTOM_SOURCE" } override predicate isSink(DataFlow::Node node) { exists(CallNode call | call.getFunction().(NameNode).getId() in ["CUSTOM_SINK", "CUSTOM_SINK_F"] and - node.(DataFlow::CfgNode).getNode() = call.getAnArg() + node.asCfgNode() = call.getAnArg() ) } }