diff --git a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected index 135ebd187792..ef84b8816a3c 100644 --- a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected +++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected @@ -1,8 +1,24 @@ uniqueEnclosingCallable +| module.py:1:1:1:9 | GSSA Variable dangerous | Node should have one enclosing callable but has 0. | +| module.py:1:13:1:18 | ControlFlowNode for SOURCE | Node should have one enclosing callable but has 0. | +| module.py:2:1:2:4 | GSSA Variable safe | Node should have one enclosing callable but has 0. | +| module.py:2:8:2:13 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. | +| module.py:5:1:5:21 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| module.py:5:5:5:18 | GSSA Variable dangerous_func | Node should have one enclosing callable but has 0. | +| module.py:10:1:10:5 | GSSA Variable safe2 | Node should have one enclosing callable but has 0. | +| module.py:10:9:10:14 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable __name__ | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable __package__ | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable const_eq_clears_taint | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable const_eq_clears_taint2 | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable flow_from_generator | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable flow_in_generator | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable flow_in_iteration | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable flow_through_type_test_if_no_class | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable non_const_eq_preserves_taint | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable test23 | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable test24 | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable test_early_exit | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. | | test.py:0:0:0:0 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. | | test.py:6:1:6:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | @@ -51,25 +67,51 @@ uniqueEnclosingCallable | test.py:99:5:99:10 | GSSA Variable test15 | Node should have one enclosing callable but has 0. | | test.py:103:1:103:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | | test.py:103:5:103:10 | GSSA Variable test16 | Node should have one enclosing callable but has 0. | -| test.py:108:1:108:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:108:5:108:10 | GSSA Variable test20 | Node should have one enclosing callable but has 0. | -| test.py:118:1:118:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:118:5:118:10 | GSSA Variable test21 | Node should have one enclosing callable but has 0. | +| test.py:107:1:107:16 | ControlFlowNode for ClassExpr | Node should have one enclosing callable but has 0. | +| test.py:107:7:107:7 | GSSA Variable C | Node should have one enclosing callable but has 0. | +| test.py:109:1:109:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:109:5:109:10 | GSSA Variable x_sink | Node should have one enclosing callable but has 0. | +| test.py:112:1:112:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:112:5:112:10 | GSSA Variable test17 | Node should have one enclosing callable but has 0. | +| test.py:117:1:117:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:117:5:117:10 | GSSA Variable test18 | Node should have one enclosing callable but has 0. | +| test.py:123:1:123:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:123:5:123:10 | GSSA Variable test19 | Node should have one enclosing callable but has 0. | | test.py:128:1:128:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:128:5:128:10 | GSSA Variable test22 | Node should have one enclosing callable but has 0. | -| test.py:139:20:139:38 | ControlFlowNode for ImportMember | Node should have one enclosing callable but has 0. | -| test.py:139:33:139:38 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. | -| test.py:140:1:140:12 | ControlFlowNode for SINK() | Node should have one enclosing callable but has 0. | -| test.py:140:1:140:12 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. | -| test.py:140:6:140:11 | ControlFlowNode for unsafe | Node should have one enclosing callable but has 0. | -| test.py:142:1:142:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:142:5:142:10 | GSSA Variable test23 | Node should have one enclosing callable but has 0. | -| test.py:146:1:146:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:146:5:146:10 | GSSA Variable test24 | Node should have one enclosing callable but has 0. | -| test.py:151:1:151:29 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:151:5:151:22 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. | -| test.py:161:1:161:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | -| test.py:161:5:161:14 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. | +| test.py:128:5:128:10 | GSSA Variable test20 | Node should have one enclosing callable but has 0. | +| test.py:138:1:138:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:138:5:138:10 | GSSA Variable test21 | Node should have one enclosing callable but has 0. | +| test.py:148:1:148:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:148:5:148:10 | GSSA Variable test22 | Node should have one enclosing callable but has 0. | +| test.py:159:20:159:38 | ControlFlowNode for ImportMember | Node should have one enclosing callable but has 0. | +| test.py:159:33:159:38 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. | +| test.py:160:1:160:12 | ControlFlowNode for SINK() | Node should have one enclosing callable but has 0. | +| test.py:160:1:160:12 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. | +| test.py:160:6:160:11 | ControlFlowNode for unsafe | Node should have one enclosing callable but has 0. | +| test.py:162:1:162:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:162:5:162:10 | GSSA Variable test23 | Node should have one enclosing callable but has 0. | +| test.py:166:1:166:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:166:5:166:10 | GSSA Variable test24 | Node should have one enclosing callable but has 0. | +| test.py:171:1:171:29 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:171:5:171:22 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. | +| test.py:181:1:181:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:181:5:181:14 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. | +| test.py:192:1:192:22 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:192:5:192:19 | GSSA Variable test_early_exit | Node should have one enclosing callable but has 0. | +| test.py:198:1:198:41 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:198:5:198:38 | GSSA Variable flow_through_type_test_if_no_class | Node should have one enclosing callable but has 0. | +| test.py:205:1:205:24 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:205:5:205:21 | GSSA Variable flow_in_iteration | Node should have one enclosing callable but has 0. | +| test.py:211:1:211:24 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:211:5:211:21 | GSSA Variable flow_in_generator | Node should have one enclosing callable but has 0. | +| test.py:216:1:216:26 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:216:5:216:23 | GSSA Variable flow_from_generator | Node should have one enclosing callable but has 0. | +| test.py:220:1:220:28 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:220:5:220:25 | GSSA Variable const_eq_clears_taint | Node should have one enclosing callable but has 0. | +| test.py:226:1:226:29 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:226:5:226:26 | GSSA Variable const_eq_clears_taint2 | Node should have one enclosing callable but has 0. | +| test.py:232:1:232:36 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:232:5:232:32 | GSSA Variable non_const_eq_preserves_taint | Node should have one enclosing callable but has 0. | uniqueType uniqueNodeLocation missingLocation diff --git a/python/ql/test/experimental/dataflow/consistency/module.py b/python/ql/test/experimental/dataflow/consistency/module.py new file mode 100644 index 000000000000..7766a61fa180 --- /dev/null +++ b/python/ql/test/experimental/dataflow/consistency/module.py @@ -0,0 +1,10 @@ +dangerous = SOURCE +safe = "safe" + + +def dangerous_func(): + return SOURCE + + +safe2 = SOURCE +safe2 = "safe" diff --git a/python/ql/test/experimental/dataflow/consistency/test.py b/python/ql/test/experimental/dataflow/consistency/test.py index 22c5e2e0fc79..447af8483f78 100644 --- a/python/ql/test/experimental/dataflow/consistency/test.py +++ b/python/ql/test/experimental/dataflow/consistency/test.py @@ -104,6 +104,26 @@ def test16(): t = module.dangerous_func() SINK(t) +class C(object): pass + +def x_sink(arg): + SINK(arg.x) + +def test17(): + t = C() + t.x = module.dangerous + SINK(t.x) + +def test18(): + t = C() + t.x = module.dangerous + t = hub(t) + x_sink(t) + +def test19(): + t = CUSTOM_SOURCE + t = hub(TAINT_FROM_ARG(t)) + CUSTOM_SINK(t) def test20(cond): if cond: @@ -163,9 +183,55 @@ def test_truth(): if t: SINK(t) else: - SINK(t) # Regression: FP here + SINK(t) + if not t: + SINK(t) + else: + SINK(t) + +def test_early_exit(): + t = FALSEY if not t: - SINK(t) # Regression: FP here + return + t + +def flow_through_type_test_if_no_class(): + t = SOURCE + if isinstance(t, str): + SINK(t) else: SINK(t) +def flow_in_iteration(): + t = ITERABLE_SOURCE + for i in t: + i + return i + +def flow_in_generator(): + seq = [SOURCE] + for i in seq: + yield i + +def flow_from_generator(): + for x in flow_in_generator(): + SINK(x) + +def const_eq_clears_taint(): + tainted = SOURCE + if tainted == "safe": + SINK(tainted) # safe + SINK(tainted) # unsafe + +def const_eq_clears_taint2(): + tainted = SOURCE + if tainted != "safe": + return + SINK(tainted) # safe + +def non_const_eq_preserves_taint(x): + tainted = SOURCE + if tainted == tainted: + SINK(tainted) # unsafe + if tainted == x: + SINK(tainted) # unsafe diff --git a/python/ql/test/experimental/dataflow/regression/custom_dataflow.expected b/python/ql/test/experimental/dataflow/regression/custom_dataflow.expected new file mode 100644 index 000000000000..0ae109d52aea --- /dev/null +++ b/python/ql/test/experimental/dataflow/regression/custom_dataflow.expected @@ -0,0 +1 @@ +| test.py:126:13:126:25 | ControlFlowNode for CUSTOM_SOURCE | test.py:130:21:130:21 | ControlFlowNode for t | diff --git a/python/ql/test/experimental/dataflow/regression/custom_dataflow.ql b/python/ql/test/experimental/dataflow/regression/custom_dataflow.ql new file mode 100644 index 000000000000..ac154d77f199 --- /dev/null +++ b/python/ql/test/experimental/dataflow/regression/custom_dataflow.ql @@ -0,0 +1,30 @@ +/** + * This query is meant to catch the flows from `CUSTOM_SOURCE` to `CUSTOM_SINK`. + * + * This should be compared to + * python/ql/test/library-tests/taint/dataflow/Dataflow.ql + * A first goal is to have identical results; after that we + * hope to remove the false positive. + */ + +import python +import experimental.dataflow.DataFlow + +class CustomTestConfiguration extends DataFlow::Configuration { + CustomTestConfiguration() { this = "CustomTestConfiguration" } + + override predicate isSource(DataFlow::Node node) { + node.asCfgNode().(NameNode).getId() = "CUSTOM_SOURCE" + } + + override predicate isSink(DataFlow::Node node) { + exists(CallNode call | + call.getFunction().(NameNode).getId() in ["CUSTOM_SINK", "CUSTOM_SINK_F"] and + node.asCfgNode() = call.getAnArg() + ) + } +} + +from DataFlow::Node source, DataFlow::Node sink +where exists(CustomTestConfiguration cfg | cfg.hasFlow(source, sink)) +select source, sink diff --git a/python/ql/test/experimental/dataflow/regression/dataflow.expected b/python/ql/test/experimental/dataflow/regression/dataflow.expected index 0d5dca4d4d13..e7b3b140e505 100644 --- a/python/ql/test/experimental/dataflow/regression/dataflow.expected +++ b/python/ql/test/experimental/dataflow/regression/dataflow.expected @@ -1,3 +1,4 @@ +| module.py:6:12:6:17 | ControlFlowNode for SOURCE | test.py:101:10:101:10 | ControlFlowNode for t | | test.py:3:10:3:15 | ControlFlowNode for SOURCE | test.py:3:10:3:15 | ControlFlowNode for SOURCE | | test.py:6:9:6:14 | ControlFlowNode for SOURCE | test.py:7:10:7:10 | ControlFlowNode for s | | test.py:10:12:10:17 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg | @@ -7,10 +8,12 @@ | test.py:62:13:62:18 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg | | test.py:67:13:67:18 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg | | test.py:76:9:76:14 | ControlFlowNode for SOURCE | test.py:78:10:78:10 | ControlFlowNode for t | -| test.py:108:13:108:18 | ControlFlowNode for SOURCE | test.py:112:14:112:14 | ControlFlowNode for t | -| test.py:139:10:139:15 | ControlFlowNode for SOURCE | test.py:140:14:140:14 | ControlFlowNode for t | -| test.py:143:9:143:14 | ControlFlowNode for SOURCE | test.py:145:10:145:10 | ControlFlowNode for s | -| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:160:14:160:14 | ControlFlowNode for t | -| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:162:14:162:14 | ControlFlowNode for t | -| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:164:14:164:14 | ControlFlowNode for t | -| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:166:14:166:14 | ControlFlowNode for t | +| test.py:128:13:128:18 | ControlFlowNode for SOURCE | test.py:132:14:132:14 | ControlFlowNode for t | +| test.py:159:10:159:15 | ControlFlowNode for SOURCE | test.py:160:14:160:14 | ControlFlowNode for t | +| test.py:163:9:163:14 | ControlFlowNode for SOURCE | test.py:165:12:165:12 | ControlFlowNode for s | +| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:180:14:180:14 | ControlFlowNode for t | +| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:182:16:182:16 | ControlFlowNode for t | +| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:184:16:184:16 | ControlFlowNode for t | +| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:186:14:186:14 | ControlFlowNode for t | +| test.py:195:9:195:14 | ControlFlowNode for SOURCE | test.py:197:14:197:14 | ControlFlowNode for t | +| test.py:195:9:195:14 | ControlFlowNode for SOURCE | test.py:199:14:199:14 | ControlFlowNode for t | diff --git a/python/ql/test/experimental/dataflow/regression/module.py b/python/ql/test/experimental/dataflow/regression/module.py new file mode 100644 index 000000000000..7766a61fa180 --- /dev/null +++ b/python/ql/test/experimental/dataflow/regression/module.py @@ -0,0 +1,10 @@ +dangerous = SOURCE +safe = "safe" + + +def dangerous_func(): + return SOURCE + + +safe2 = SOURCE +safe2 = "safe" diff --git a/python/ql/test/experimental/dataflow/regression/test.py b/python/ql/test/experimental/dataflow/regression/test.py index 233c010a5bae..dbcfd4c4584e 100644 --- a/python/ql/test/experimental/dataflow/regression/test.py +++ b/python/ql/test/experimental/dataflow/regression/test.py @@ -30,7 +30,7 @@ def test6(cond): else: t = SOURCE if cond: - SINK(t) + SINK_F(t) def test7(cond): if cond: @@ -40,8 +40,8 @@ def test7(cond): if cond: SINK(t) -def source2(arg): - return source(arg) +def source2(): + return source() def sink2(arg): sink(arg) @@ -50,7 +50,7 @@ def sink3(cond, arg): if cond: sink(arg) -def test8(cond): +def test8(cond): # This test currently adds nothing, as we only track SOURCE -> SINK, and previous tests already add flow from line 10 to line 13 t = source2() sink2(t) @@ -80,26 +80,46 @@ def test11(): def test12(): t = "safe" t = hub(t) - SINK(t) + SINK_F(t) import module def test13(): t = module.dangerous - SINK(t) + SINK(t) # Flow not found def test14(): t = module.safe - SINK(t) + SINK_F(t) def test15(): t = module.safe2 - SINK(t) + SINK_F(t) def test16(): t = module.dangerous_func() SINK(t) +class C(object): pass + +def x_sink(arg): + SINK(arg.x) + +def test17(): + t = C() + t.x = module.dangerous + SINK(t.x) # Flow not found + +def test18(): + t = C() + t.x = module.dangerous + t = hub(t) + x_sink(t) # Flow not found + +def test19(): + t = CUSTOM_SOURCE + t = hub(TAINT_FROM_ARG(t)) + CUSTOM_SINK(t) def test20(cond): if cond: @@ -117,23 +137,23 @@ def test21(cond): else: t = SOURCE if not cond: - CUSTOM_SINK(t) + CUSTOM_SINK_F(t) else: - SINK(t) + SINK_F(t) def test22(cond): if cond: t = CUSTOM_SOURCE else: t = SOURCE - t = TAINT_FROM_ARG(t) + t = TAINT_FROM_ARG(t) # Blocks data flow if cond: CUSTOM_SINK(t) else: SINK(t) from module import dangerous as unsafe -SINK(unsafe) +SINK(unsafe) # Flow not found def test23(): with SOURCE as t: @@ -141,16 +161,16 @@ def test23(): def test24(): s = SOURCE - SANITIZE(s) - SINK(s) + SANITIZE(s) # Does not block data flow + SINK_F(s) def test_update_extend(x, y): l = [SOURCE] d = {"key" : SOURCE} x.extend(l) y.update(d) - SINK(x[0]) - SINK(y["key"]) + SINK(x[0]) # Flow not found + SINK(y["key"]) # Flow not found l2 = list(l) d2 = dict(d) @@ -159,9 +179,36 @@ def test_truth(): if t: SINK(t) else: - SINK(t) # Regression: FP here + SINK_F(t) # False positive if not t: - SINK(t) # Regression: FP here + SINK_F(t) # False positive else: SINK(t) +def test_early_exit(): + t = FALSEY + if not t: + return + t + +def flow_through_type_test_if_no_class(): + t = SOURCE + if isinstance(t, str): + SINK(t) # Flows's both here.. + else: + SINK(t) # ..and here + +def flow_in_iteration(): + t = [SOURCE] + for i in t: + SINK(i) # Flow not found + SINK(i) # Flow not found + +def flow_in_generator(): + seq = [SOURCE] + for i in seq: + yield i + +def flow_from_generator(): + for x in flow_in_generator(): + SINK(x) # Flow not found