From 3b7e29bed6dbaf7d5d2af59144d904965e1be849 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 20 Dec 2023 12:06:52 +0100 Subject: [PATCH 1/4] Python: add test for crosstalk --- .../ApiGraphs/py3/test_crosstalk.expected | 4 ++++ .../library-tests/ApiGraphs/py3/test_crosstalk.py | 13 +++++++++++++ .../library-tests/ApiGraphs/py3/test_crosstalk.ql | 8 ++++++++ 3 files changed, 25 insertions(+) create mode 100644 python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected create mode 100644 python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.py create mode 100644 python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.ql diff --git a/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected b/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected new file mode 100644 index 000000000000..e45d1430ca50 --- /dev/null +++ b/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected @@ -0,0 +1,4 @@ +| test_crosstalk.py:8:16:8:18 | ControlFlowNode for f() | bar | +| test_crosstalk.py:8:16:8:18 | ControlFlowNode for f() | baz | +| test_crosstalk.py:13:16:13:18 | ControlFlowNode for g() | bar | +| test_crosstalk.py:13:16:13:18 | ControlFlowNode for g() | baz | diff --git a/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.py b/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.py new file mode 100644 index 000000000000..c58d5770031b --- /dev/null +++ b/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.py @@ -0,0 +1,13 @@ + +def outer(): + from foo import bar, baz + + def inner_bar(): + f = bar + g = baz + return f() + + def inner_baz(): + f = bar + g = baz + return g() \ No newline at end of file diff --git a/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.ql b/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.ql new file mode 100644 index 000000000000..0367e8548357 --- /dev/null +++ b/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.ql @@ -0,0 +1,8 @@ +import python +import semmle.python.ApiGraphs + +from API::CallNode callNode, string member +where + callNode = API::moduleImport("foo").getMember(member).getACall() and + callNode.getLocation().getFile().getBaseName() = "test_crosstalk.py" +select callNode, member From 169d7a3c9827684e5a888503a3adaa0c1bbc4294 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 19 Dec 2023 22:55:41 +0100 Subject: [PATCH 2/4] Python: Add scope entry definition nodes otherwise we confuse captured variables in the single scope entry cfg node. Now we have one for each defined variable. --- .../dataflow/new/internal/DataFlowPrivate.qll | 5 +++- .../dataflow/new/internal/DataFlowPublic.qll | 29 +++++++++++++++++-- .../dataflow/new/internal/LocalSources.qll | 4 +-- .../new/internal/TypeTrackingImpl.qll | 2 +- .../dataflow/coverage/localFlow.expected | 10 +++---- .../EnclosingCallable.expected | 1 - .../dataflow/typetracking/moduleattr.expected | 2 +- .../dataflow/typetracking/tracked.ql | 2 +- .../ApiGraphs/py3/test_crosstalk.expected | 2 -- 9 files changed, 41 insertions(+), 16 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index f1f9668e8563..50c095c8c656 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -352,7 +352,10 @@ module LocalFlow { // nodeFrom is `y` on first line // nodeTo is `y` on second line exists(EssaDefinition def | - nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode() and + nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode() + or + nodeFrom.(ScopeEntryDefinitionNode).getDefinition() = def + | AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode()) ) or diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index f5731f44ab02..35eae2f99cb8 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -27,9 +27,12 @@ newtype TNode = isExpressionNode(node) or node.getNode() instanceof Pattern - or - node = any(ScopeEntryDefinition def | not def.getScope() instanceof Module).getDefiningNode() } or + /** + * A node corresponding to a scope entry definition. That is, the value of a variable + * as it enters a scope. + */ + TScopeEntryDefinitionNode(ScopeEntryDefinition def) { not def.getScope() instanceof Module } or /** * A synthetic node representing the value of an object before a state change. * @@ -257,6 +260,28 @@ class ExprNode extends CfgNode { /** Gets a node corresponding to expression `e`. */ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e } +/** + * A node corresponding to a scope entry definition. That is, the value of a variable + * as it enters a scope. + */ +class ScopeEntryDefinitionNode extends Node, TScopeEntryDefinitionNode { + ScopeEntryDefinition def; + + ScopeEntryDefinitionNode() { this = TScopeEntryDefinitionNode(def) } + + /** Gets the `ScopeEntryDefinition` associated with this node. */ + ScopeEntryDefinition getDefinition() { result = def } + + /** Gets the source variable represented by this node. */ + SsaSourceVariable getVariable() { result = def.getSourceVariable() } + + override Location getLocation() { result = def.getLocation() } + + override Scope getScope() { result = def.getScope() } + + override string toString() { result = "Entry definition for " + this.getVariable().toString() } +} + /** * The value of a parameter at function entry, viewed as a node in a data * flow graph. diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll index ac45700bf470..34b137b35115 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll @@ -71,7 +71,7 @@ class LocalSourceNode extends Node { or // We include all scope entry definitions, as these act as the local source within the scope they // enter. - this.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode() + this instanceof ScopeEntryDefinitionNode or this instanceof ParameterNode } @@ -167,7 +167,7 @@ class LocalSourceNodeNotModuleVariableNode extends LocalSourceNode { LocalSourceNodeNotModuleVariableNode() { this instanceof ExprNode or - this.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode() + this instanceof ScopeEntryDefinitionNode } } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 4c3538d75f73..1a9bdb5202ee 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -251,7 +251,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { e.getSourceVariable() = var and var.hasDefiningNode(def) | - nodeTo.asCfgNode() = e.getDefiningNode() and + nodeTo.(DataFlowPublic::ScopeEntryDefinitionNode).getDefinition() = e and nodeFrom.asCfgNode() = def.getValue() and var.getScope().getScope*() = nodeFrom.getScope() ) diff --git a/python/ql/test/experimental/dataflow/coverage/localFlow.expected b/python/ql/test/experimental/dataflow/coverage/localFlow.expected index 9712b9939f08..1cb2cab49fae 100644 --- a/python/ql/test/experimental/dataflow/coverage/localFlow.expected +++ b/python/ql/test/experimental/dataflow/coverage/localFlow.expected @@ -1,12 +1,12 @@ -| test.py:41:1:41:33 | Entry node for Function test_tuple_with_local_flow | test.py:42:10:42:18 | ControlFlowNode for NONSOURCE | -| test.py:41:1:41:33 | Entry node for Function test_tuple_with_local_flow | test.py:42:21:42:26 | ControlFlowNode for SOURCE | -| test.py:41:1:41:33 | Entry node for Function test_tuple_with_local_flow | test.py:44:5:44:8 | ControlFlowNode for SINK | +| test.py:41:1:41:33 | Entry definition for SsaSourceVariable NONSOURCE | test.py:42:10:42:18 | ControlFlowNode for NONSOURCE | +| test.py:41:1:41:33 | Entry definition for SsaSourceVariable SINK | test.py:44:5:44:8 | ControlFlowNode for SINK | +| test.py:41:1:41:33 | Entry definition for SsaSourceVariable SOURCE | test.py:42:21:42:26 | ControlFlowNode for SOURCE | | test.py:42:5:42:5 | ControlFlowNode for x | test.py:43:9:43:9 | ControlFlowNode for x | | test.py:42:10:42:26 | ControlFlowNode for Tuple | test.py:42:5:42:5 | ControlFlowNode for x | | test.py:43:5:43:5 | ControlFlowNode for y | test.py:44:10:44:10 | ControlFlowNode for y | | test.py:43:9:43:12 | ControlFlowNode for Subscript | test.py:43:5:43:5 | ControlFlowNode for y | -| test.py:208:1:208:53 | Entry node for Function test_nested_comprehension_deep_with_local_flow | test.py:209:25:209:30 | ControlFlowNode for SOURCE | -| test.py:208:1:208:53 | Entry node for Function test_nested_comprehension_deep_with_local_flow | test.py:210:5:210:8 | ControlFlowNode for SINK | +| test.py:208:1:208:53 | Entry definition for SsaSourceVariable SINK | test.py:210:5:210:8 | ControlFlowNode for SINK | +| test.py:208:1:208:53 | Entry definition for SsaSourceVariable SOURCE | test.py:209:25:209:30 | ControlFlowNode for SOURCE | | test.py:209:5:209:5 | ControlFlowNode for x | test.py:210:10:210:10 | ControlFlowNode for x | | test.py:209:9:209:68 | ControlFlowNode for .0 | test.py:209:9:209:68 | ControlFlowNode for .0 | | test.py:209:9:209:68 | ControlFlowNode for ListComp | test.py:209:5:209:5 | ControlFlowNode for x | diff --git a/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected b/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected index a2f7e02fd4ec..3bd4cd81d54f 100644 --- a/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected +++ b/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected @@ -18,7 +18,6 @@ | generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 | | generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 | | generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp | -| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | Entry node for Function listcomp | | generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for Yield | | generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for x | | generator.py:1:1:1:23 | Function generator_func | generator.py:2:19:2:19 | ControlFlowNode for x | diff --git a/python/ql/test/experimental/dataflow/typetracking/moduleattr.expected b/python/ql/test/experimental/dataflow/typetracking/moduleattr.expected index baa29e053ce2..ff9673aaaea8 100644 --- a/python/ql/test/experimental/dataflow/typetracking/moduleattr.expected +++ b/python/ql/test/experimental/dataflow/typetracking/moduleattr.expected @@ -6,6 +6,6 @@ module_attr_tracker | import_as_attr.py:1:28:1:35 | ControlFlowNode for attr_ref | | import_as_attr.py:3:1:3:1 | ControlFlowNode for x | | import_as_attr.py:3:5:3:12 | ControlFlowNode for attr_ref | -| import_as_attr.py:5:1:5:10 | Entry node for Function fun | +| import_as_attr.py:5:1:5:10 | Entry definition for SsaSourceVariable attr_ref | | import_as_attr.py:6:5:6:5 | ControlFlowNode for y | | import_as_attr.py:6:9:6:16 | ControlFlowNode for attr_ref | diff --git a/python/ql/test/experimental/dataflow/typetracking/tracked.ql b/python/ql/test/experimental/dataflow/typetracking/tracked.ql index e1e707612211..56e0c19eed3c 100644 --- a/python/ql/test/experimental/dataflow/typetracking/tracked.ql +++ b/python/ql/test/experimental/dataflow/typetracking/tracked.ql @@ -26,7 +26,7 @@ module TrackedTest implements TestSig { not e.getLocation().getStartLine() = 0 and // We do not wish to annotate scope entry definitions, // as they do not appear in the source code. - not e.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode() and + not e instanceof DataFlow::ScopeEntryDefinitionNode and tag = "tracked" and location = e.getLocation() and value = t.getAttr() and diff --git a/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected b/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected index e45d1430ca50..58698e5ec9d6 100644 --- a/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected +++ b/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected @@ -1,4 +1,2 @@ | test_crosstalk.py:8:16:8:18 | ControlFlowNode for f() | bar | -| test_crosstalk.py:8:16:8:18 | ControlFlowNode for f() | baz | -| test_crosstalk.py:13:16:13:18 | ControlFlowNode for g() | bar | | test_crosstalk.py:13:16:13:18 | ControlFlowNode for g() | baz | From 07c88dc0be38177d2c9d13d79eb8daced4f2d27a Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 20 Dec 2023 10:26:07 +0100 Subject: [PATCH 3/4] Python: remove unnecessary post-processing also, it is slightly incorrect... --- .../semmle/python/dataflow/new/internal/DataFlowPrivate.qll | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 50c095c8c656..0043aa3b2fda 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -484,8 +484,7 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { * or at runtime when callables in the module are called. */ predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) { - IncludePostUpdateFlow::step/2>::step(nodeFrom, - nodeTo) + LocalFlow::localFlowStep(nodeFrom, nodeTo) } private predicate summaryLocalStep(Node nodeFrom, Node nodeTo) { From 7749b8e60e4b86b2183381f8ff00d1ceb3b1e2b3 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 20 Dec 2023 12:53:37 +0100 Subject: [PATCH 4/4] Python: add change-note --- .../2023-12-20-add-scope-entry-definition-nodes.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 python/ql/lib/change-notes/2023-12-20-add-scope-entry-definition-nodes.md diff --git a/python/ql/lib/change-notes/2023-12-20-add-scope-entry-definition-nodes.md b/python/ql/lib/change-notes/2023-12-20-add-scope-entry-definition-nodes.md new file mode 100644 index 000000000000..f2fca008e44c --- /dev/null +++ b/python/ql/lib/change-notes/2023-12-20-add-scope-entry-definition-nodes.md @@ -0,0 +1,5 @@ +--- +category: fix +--- + +- We would previously confuse all captured variables into a single scope entry node. Now they each get their own node so they can be tracked properly.