From b905a3d5e3daeb906553db6f141c20db23e52ce4 Mon Sep 17 00:00:00 2001 From: Taus Brock-Nannestad Date: Tue, 6 Oct 2020 16:36:29 +0200 Subject: [PATCH 1/9] Python: Attribute access API --- .../src/experimental/dataflow/TypeTracker.qll | 17 +- .../dataflow/internal/Attributes.qll | 208 ++++++++++++++++++ .../dataflow/internal/DataFlowPublic.qll | 1 + .../dataflow/typetracking/attribute_tests.py | 70 ++++++ 4 files changed, 289 insertions(+), 7 deletions(-) create mode 100644 python/ql/src/experimental/dataflow/internal/Attributes.qll diff --git a/python/ql/src/experimental/dataflow/TypeTracker.qll b/python/ql/src/experimental/dataflow/TypeTracker.qll index 4491279a971e..fc14251211f2 100644 --- a/python/ql/src/experimental/dataflow/TypeTracker.qll +++ b/python/ql/src/experimental/dataflow/TypeTracker.qll @@ -6,7 +6,7 @@ private import internal.DataFlowPrivate /** Any string that may appear as the name of an attribute or access path. */ class AttributeName extends string { - AttributeName() { this = any(Attribute a).getName() } + AttributeName() { this = any(AttrRef a).getAttributeName() } } /** Either an attribute name, or the empty string (representing no attribute). */ @@ -115,11 +115,10 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) { * assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called. */ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) { - exists(AttributeAssignment a, Node var | - a.getName() = attr and - simpleLocalFlowStep*(nodeTo, var) and - var.asVar() = a.getInput() and - nodeFrom.asCfgNode() = a.getValue() + exists(AttrWrite a | + a.getAttributeName() = attr and + nodeFrom = a.getValue() and + simpleLocalFlowStep*(nodeTo, a.getObject()) ) } @@ -127,7 +126,11 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) { * Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`. */ predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) { - exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode()) + exists(AttrRead a | + attr = a.getAttributeName() and + nodeFrom = a.getObject() and + nodeTo = a + ) } /** diff --git a/python/ql/src/experimental/dataflow/internal/Attributes.qll b/python/ql/src/experimental/dataflow/internal/Attributes.qll new file mode 100644 index 000000000000..ec8725f8c2ae --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/Attributes.qll @@ -0,0 +1,208 @@ +/** This module provides an API for attribute reads and writes. */ + +import DataFlowPublic +private import DataFlowPrivate + +/** + * A data flow node that reads or writes an attribute of an object. + * + * This abstract base class only knows about the base object on which the attribute is being + * accessed, and the attribute itself, if it is statically inferrable. + */ +abstract class AttrRef extends Node { + /** + * Gets the data flow node corresponding to the object whose attribute is being read or written. + */ + abstract Node getObject(); + + /** + * Gets the expression control flow node that defines the attribute being accessed. This is + * usually an identifier or literal. + */ + abstract ExprNode getAttributeNameExpr(); + + /** Holds if this attribute reference may access an attribute named `attrName`. */ + predicate mayHaveAttributeName(string attrName) { none() } + + /** Gets the name of the attribute being read or written, if it can be determined statically. */ + abstract string getAttributeName(); +} + +/** + * A data flow node that writes an attribute of an object. This includes + * - Simple attribute writes: `object.attr = value` + * - Dynamic attribute writes: `setattr(object, attr, value)` + * - Fields written during class initialization: `class MyClass: attr = value` + */ +abstract class AttrWrite extends AttrRef { + /** Gets the data flow node corresponding to the value that is written to the attribute. */ + abstract Node getValue(); +} + +/** A simple attribute assignment: `object.attr = value`. */ +private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode { + DefinitionNode attr_node; + + AttributeAssignmentAsAttrWrite() { this = TCfgNode(attr_node) and attr_node instanceof AttrNode } + + override Node getValue() { result = TCfgNode(attr_node.(DefinitionNode).getValue()) } + + override Node getObject() { result = TCfgNode(attr_node.(AttrNode).getObject()) } + + override ExprNode getAttributeNameExpr() { + // Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be + // identifiers, and are therefore represented directly as strings. + // Use `getAttributeName` to access the name of the attribute. + none() + } + + override string getAttributeName() { result = attr_node.(AttrNode).getName() } +} + +import semmle.python.types.Builtins + +/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */ +private class BuiltInCallNode extends CallNode { + string name; + + BuiltInCallNode() { + // TODO disallow instances where `setattr` may refer to an in-scope variable of that name. + exists(NameNode id | this.getFunction() = id and id.getId() = name and id.isGlobal()) and + name = any(Builtin b).getName() + } + + /** Gets the name of the built-in function that is called at this `CallNode` */ + string getBuiltinName() { result = name } +} + +/** + * Represents a call to the built-ins that handle dynamic inspection and modification of + * attributes: `getattr`, `setattr`, and `hasattr`. + */ +private class BuiltinAttrCallNode extends BuiltInCallNode { + BuiltinAttrCallNode() { + name = "setattr" or + name = "getattr" or + name = "hasattr" + } + + /** Gets the control flow node for object on which the attribute is accessed. */ + ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] } + + /** + * Gets the control flow node for the value that is being written to the attribute. + * Only relevant for `setattr` calls. + */ + ControlFlowNode getValue() { + // only valid for `setattr` + name = "setattr" and + result in [this.getArg(2), this.getArgByName("value")] + } + + /** Gets the control flow node that defines the name of the attribute being accessed. */ + ControlFlowNode getName() { result in [this.getArg(1), this.getArgByName("name")] } +} + +/** Represents calls to the built-in `setattr`. */ +private class SetAttrCallNode extends BuiltinAttrCallNode { + SetAttrCallNode() { name = "setattr" } +} + +/** Represents calls to the built-in `getattr`. */ +private class GetAttrCallNode extends BuiltinAttrCallNode { + GetAttrCallNode() { name = "getattr" } +} + +/** An attribute assignment using `setattr`, e.g. `setattr(object, attr, value)` */ +private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode { + SetAttrCallNode setattr_call; + + SetAttrCallAsAttrWrite() { this = TCfgNode(setattr_call) } + + override Node getValue() { result = TCfgNode(setattr_call.getValue()) } + + override Node getObject() { result = TCfgNode(setattr_call.getObject()) } + + override ExprNode getAttributeNameExpr() { result = TCfgNode(setattr_call.getName()) } + + override string getAttributeName() { + // TODO track this back using local flow + exists(StrConst s, Node nodeFrom | + s = nodeFrom.asExpr() and + simpleLocalFlowStep*(nodeFrom, this.getAttributeNameExpr()) and + result = s.getText() + ) + } +} + +/** + * An attribute assignment via a class field, e.g. + * ```python + * class MyClass: + * attr = value + * ``` + * is treated as equivalent to `MyClass.attr = value`. + */ +private class ClassDefinitionAsAttrWrite extends AttrWrite, Node { + ClassExpr cls; + DefinitionNode attr_node; + + ClassDefinitionAsAttrWrite() { + attr_node instanceof NameNode and + this.asCfgNode() = attr_node and + attr_node.getScope() = cls.getInnerScope() + } + + override Node getValue() { result = TCfgNode(attr_node.getValue()) } + + override Node getObject() { result = TCfgNode(cls.getAFlowNode()) } + + override ExprNode getAttributeNameExpr() { none() } + + override string getAttributeName() { result = attr_node.(NameNode).getId() } +} + +/** + * A read of an attribute on an object. This includes + * - Simple attribute reads: `object.attr` + * - Dynamic attribute reads using `getattr`: `getattr(object, attr)` + * - Qualified imports: `from module import attr as name` + */ +abstract class AttrRead extends AttrRef, Node { } + +/** A simple attribute read, e.g. `object.attr` */ +private class AttributeReadAsAttrRead extends AttrRead, CfgNode { + AttrNode attr_node; + + AttributeReadAsAttrRead() { this = TCfgNode(attr_node) } + + override Node getObject() { result = TCfgNode(attr_node.getObject()) } + + override ExprNode getAttributeNameExpr() { + // Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be + // identifiers, and are therefore represented directly as strings. + // Use `getAttributeName` to access the name of the attribute. + none() + } + + override string getAttributeName() { result = attr_node.getName() } +} + +/** An attribute read using `getattr`: `getattr(object, attr)` */ +private class GetAttrCallAsAttrRead extends AttrRead, CfgNode { + GetAttrCallNode getattr_call; + + GetAttrCallAsAttrRead() { this.asCfgNode() = getattr_call } + + override Node getObject() { result = TCfgNode(getattr_call.getObject()) } + + override ExprNode getAttributeNameExpr() { result = TCfgNode(getattr_call.getName()) } + + override string getAttributeName() { + exists(StrConst s, Node nodeFrom | + s = nodeFrom.asExpr() and + simpleLocalFlowStep*(nodeFrom, this.getAttributeNameExpr()) and + result = s.getText() + ) + } +} diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll b/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll index fb68fe3b1f14..9cb9d115b226 100644 --- a/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll +++ b/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll @@ -5,6 +5,7 @@ private import python private import DataFlowPrivate import experimental.dataflow.TypeTracker +import Attributes private import semmle.python.essa.SsaCompute /** diff --git a/python/ql/test/experimental/dataflow/typetracking/attribute_tests.py b/python/ql/test/experimental/dataflow/typetracking/attribute_tests.py index 7d88489842f9..5e8e87f8ae32 100644 --- a/python/ql/test/experimental/dataflow/typetracking/attribute_tests.py +++ b/python/ql/test/experimental/dataflow/typetracking/attribute_tests.py @@ -29,3 +29,73 @@ def test_incompatible_types(): expects_int(x) # $int=field $f+:str=field x.field = str("Hello") # $f+:int=field $str=field $f+:int $str expects_string(x) # $f+:int=field $str=field + + +# Attributes assigned statically to a class + +class MyClass: # $tracked=field + field = tracked # $tracked + +lookup = MyClass.field # $tracked $tracked=field +instance = MyClass() # $tracked=field +lookup2 = instance.field # $f-:tracked + +## Dynamic attribute access + +# Via `getattr`/`setattr` + +def setattr_immediate_write(): + x = SomeClass() # $tracked=foo + setattr(x,"foo", tracked) # $tracked $tracked=foo + y = x.foo # $tracked $tracked=foo + do_stuff(y) # $tracked + +def getattr_immediate_read(): + x = SomeClass() # $tracked=foo + x.foo = tracked # $tracked $tracked=foo + y = getattr(x,"foo") # $tracked $tracked=foo + do_stuff(y) # $tracked + +def setattr_indirect_write(): + attr = "foo" + x = SomeClass() # $tracked=foo + setattr(x, attr, tracked) # $tracked $tracked=foo + y = x.foo # $tracked $tracked=foo + do_stuff(y) # $tracked + +def getattr_indirect_read(): + attr = "foo" + x = SomeClass() # $tracked=foo + x.foo = tracked # $tracked $tracked=foo + y = getattr(x, attr) #$tracked $tracked=foo + do_stuff(y) # $tracked + +# Via `__dict__` -- not currently implemented. + +def dunder_dict_immediate_write(): + x = SomeClass() # $f-:tracked=foo + x.__dict__["foo"] = tracked # $tracked $f-:tracked=foo + y = x.foo # $f-:tracked $f-:tracked=foo + do_stuff(y) # $f-:tracked + +def dunder_dict_immediate_read(): + x = SomeClass() # $tracked=foo + x.foo = tracked # $tracked $tracked=foo + y = x.__dict__["foo"] # $f-:tracked $tracked=foo + do_stuff(y) # $f-:tracked + +def dunder_dict_indirect_write(): + attr = "foo" + x = SomeClass() # $f-:tracked=foo + x.__dict__[attr] = tracked # $tracked $f-:tracked=foo + y = x.foo # $f-:tracked $f-:tracked=foo + do_stuff(y) # $f-:tracked + +def dunder_dict_indirect_read(): + attr = "foo" + x = SomeClass() # $tracked=foo + x.foo = tracked # $tracked $tracked=foo + y = x.__dict__[attr] # $f-:tracked $tracked=foo + do_stuff(y) # $f-:tracked + + From e9ecc00b370ba137ae550f86ea7483401367e6e5 Mon Sep 17 00:00:00 2001 From: Taus Brock-Nannestad Date: Thu, 8 Oct 2020 14:53:54 +0200 Subject: [PATCH 2/9] Python: Implement and use `mayHaveAttributeName` --- python/ql/src/experimental/dataflow/TypeTracker.qll | 4 ++-- .../src/experimental/dataflow/internal/Attributes.qll | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/python/ql/src/experimental/dataflow/TypeTracker.qll b/python/ql/src/experimental/dataflow/TypeTracker.qll index fc14251211f2..a5e3493c12d0 100644 --- a/python/ql/src/experimental/dataflow/TypeTracker.qll +++ b/python/ql/src/experimental/dataflow/TypeTracker.qll @@ -116,7 +116,7 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) { */ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) { exists(AttrWrite a | - a.getAttributeName() = attr and + a.mayHaveAttributeName(attr) and nodeFrom = a.getValue() and simpleLocalFlowStep*(nodeTo, a.getObject()) ) @@ -127,7 +127,7 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) { */ predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) { exists(AttrRead a | - attr = a.getAttributeName() and + a.mayHaveAttributeName(attr) and nodeFrom = a.getObject() and nodeTo = a ) diff --git a/python/ql/src/experimental/dataflow/internal/Attributes.qll b/python/ql/src/experimental/dataflow/internal/Attributes.qll index ec8725f8c2ae..4f0259003a4e 100644 --- a/python/ql/src/experimental/dataflow/internal/Attributes.qll +++ b/python/ql/src/experimental/dataflow/internal/Attributes.qll @@ -1,5 +1,6 @@ /** This module provides an API for attribute reads and writes. */ +import DataFlowUtil import DataFlowPublic private import DataFlowPrivate @@ -22,7 +23,14 @@ abstract class AttrRef extends Node { abstract ExprNode getAttributeNameExpr(); /** Holds if this attribute reference may access an attribute named `attrName`. */ - predicate mayHaveAttributeName(string attrName) { none() } + predicate mayHaveAttributeName(string attrName) { + attrName = this.getAttributeName() + or + exists(Node nodeFrom | + localFlow(nodeFrom, this.getAttributeNameExpr()) and + attrName = nodeFrom.asExpr().(StrConst).getText() + ) + } /** Gets the name of the attribute being read or written, if it can be determined statically. */ abstract string getAttributeName(); From 31596ef56988d1f97fcc13bf551bc84bde5e0af7 Mon Sep 17 00:00:00 2001 From: Taus Brock-Nannestad Date: Thu, 8 Oct 2020 14:55:27 +0200 Subject: [PATCH 3/9] Python: Clean up and extend built-in call node classes --- .../src/experimental/dataflow/internal/Attributes.qll | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/python/ql/src/experimental/dataflow/internal/Attributes.qll b/python/ql/src/experimental/dataflow/internal/Attributes.qll index 4f0259003a4e..7317300a825f 100644 --- a/python/ql/src/experimental/dataflow/internal/Attributes.qll +++ b/python/ql/src/experimental/dataflow/internal/Attributes.qll @@ -74,7 +74,7 @@ private class BuiltInCallNode extends CallNode { string name; BuiltInCallNode() { - // TODO disallow instances where `setattr` may refer to an in-scope variable of that name. + // TODO disallow instances where the name of the built-in may refer to an in-scope variable of that name. exists(NameNode id | this.getFunction() = id and id.getId() = name and id.isGlobal()) and name = any(Builtin b).getName() } @@ -85,14 +85,10 @@ private class BuiltInCallNode extends CallNode { /** * Represents a call to the built-ins that handle dynamic inspection and modification of - * attributes: `getattr`, `setattr`, and `hasattr`. + * attributes: `getattr`, `setattr`, `hasattr`, and `delattr`. */ private class BuiltinAttrCallNode extends BuiltInCallNode { - BuiltinAttrCallNode() { - name = "setattr" or - name = "getattr" or - name = "hasattr" - } + BuiltinAttrCallNode() { name in ["setattr", "getattr", "hasattr", "delattr"] } /** Gets the control flow node for object on which the attribute is accessed. */ ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] } From ceb249680ec909086ef8e841489315583134cb20 Mon Sep 17 00:00:00 2001 From: Taus Brock-Nannestad Date: Thu, 8 Oct 2020 15:00:14 +0200 Subject: [PATCH 4/9] Python: Reuse existing `node` fields Also changes `x = TCfgNode(y)` to `x.asCfgNode() = y` where applicable. --- .../dataflow/internal/Attributes.qll | 83 +++++++++++-------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/python/ql/src/experimental/dataflow/internal/Attributes.qll b/python/ql/src/experimental/dataflow/internal/Attributes.qll index 7317300a825f..9fd1adf83f65 100644 --- a/python/ql/src/experimental/dataflow/internal/Attributes.qll +++ b/python/ql/src/experimental/dataflow/internal/Attributes.qll @@ -47,15 +47,24 @@ abstract class AttrWrite extends AttrRef { abstract Node getValue(); } +/** + * Represents a control flow node for a simple attribute assignment. That is, + * ```python + * object.attr = value + * ``` + * Also gives access to the `value` being written, by extending `DefinitionNode`. + */ +private class AttributeAssignmentNode extends DefinitionNode, AttrNode, DataFlowCfgNode { + override ControlFlowNode getValue() { result = DefinitionNode.super.getValue() } +} + /** A simple attribute assignment: `object.attr = value`. */ private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode { - DefinitionNode attr_node; + override AttributeAssignmentNode node; - AttributeAssignmentAsAttrWrite() { this = TCfgNode(attr_node) and attr_node instanceof AttrNode } + override Node getValue() { result.asCfgNode() = node.getValue() } - override Node getValue() { result = TCfgNode(attr_node.(DefinitionNode).getValue()) } - - override Node getObject() { result = TCfgNode(attr_node.(AttrNode).getObject()) } + override Node getObject() { result.asCfgNode() = node.getObject() } override ExprNode getAttributeNameExpr() { // Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be @@ -64,13 +73,13 @@ private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode { none() } - override string getAttributeName() { result = attr_node.(AttrNode).getName() } + override string getAttributeName() { result = node.getName() } } import semmle.python.types.Builtins /** Represents `CallNode`s that may refer to calls to built-in functions or classes. */ -private class BuiltInCallNode extends CallNode { +private class BuiltInCallNode extends CallNode, DataFlowCfgNode { string name; BuiltInCallNode() { @@ -119,15 +128,13 @@ private class GetAttrCallNode extends BuiltinAttrCallNode { /** An attribute assignment using `setattr`, e.g. `setattr(object, attr, value)` */ private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode { - SetAttrCallNode setattr_call; - - SetAttrCallAsAttrWrite() { this = TCfgNode(setattr_call) } + override SetAttrCallNode node; - override Node getValue() { result = TCfgNode(setattr_call.getValue()) } + override Node getValue() { result.asCfgNode() = node.getValue() } - override Node getObject() { result = TCfgNode(setattr_call.getObject()) } + override Node getObject() { result.asCfgNode() = node.getObject() } - override ExprNode getAttributeNameExpr() { result = TCfgNode(setattr_call.getName()) } + override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() } override string getAttributeName() { // TODO track this back using local flow @@ -139,6 +146,18 @@ private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode { } } +/** + * Represents an attribute of a class that is assigned statically during class definition. For instance + * ```python + * class MyClass: + * attr = value + * ... + * ``` + * Instances of this class correspond to the `NameNode` for `attr`, and also gives access to `value` by + * virtue of being a `DefinitionNode`. + */ +private class ClassAttributeAssignmentNode extends DefinitionNode, NameNode, DataFlowCfgNode { } + /** * An attribute assignment via a class field, e.g. * ```python @@ -147,23 +166,19 @@ private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode { * ``` * is treated as equivalent to `MyClass.attr = value`. */ -private class ClassDefinitionAsAttrWrite extends AttrWrite, Node { +private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode { ClassExpr cls; - DefinitionNode attr_node; + override ClassAttributeAssignmentNode node; - ClassDefinitionAsAttrWrite() { - attr_node instanceof NameNode and - this.asCfgNode() = attr_node and - attr_node.getScope() = cls.getInnerScope() - } + ClassDefinitionAsAttrWrite() { node.getScope() = cls.getInnerScope() } - override Node getValue() { result = TCfgNode(attr_node.getValue()) } + override Node getValue() { result.asCfgNode() = node.getValue() } - override Node getObject() { result = TCfgNode(cls.getAFlowNode()) } + override Node getObject() { result.asCfgNode() = cls.getAFlowNode() } override ExprNode getAttributeNameExpr() { none() } - override string getAttributeName() { result = attr_node.(NameNode).getId() } + override string getAttributeName() { result = node.getId() } } /** @@ -174,13 +189,17 @@ private class ClassDefinitionAsAttrWrite extends AttrWrite, Node { */ abstract class AttrRead extends AttrRef, Node { } +/** + * A convenience class for embedding `AttrNode` into `DataFlowCfgNode`, as the former is not + * obviously a subtype of the latter. + */ +private class DataFlowAttrNode extends AttrNode, DataFlowCfgNode { } + /** A simple attribute read, e.g. `object.attr` */ private class AttributeReadAsAttrRead extends AttrRead, CfgNode { - AttrNode attr_node; + override DataFlowAttrNode node; - AttributeReadAsAttrRead() { this = TCfgNode(attr_node) } - - override Node getObject() { result = TCfgNode(attr_node.getObject()) } + override Node getObject() { result.asCfgNode() = node.getObject() } override ExprNode getAttributeNameExpr() { // Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be @@ -189,18 +208,16 @@ private class AttributeReadAsAttrRead extends AttrRead, CfgNode { none() } - override string getAttributeName() { result = attr_node.getName() } + override string getAttributeName() { result = node.getName() } } /** An attribute read using `getattr`: `getattr(object, attr)` */ private class GetAttrCallAsAttrRead extends AttrRead, CfgNode { - GetAttrCallNode getattr_call; - - GetAttrCallAsAttrRead() { this.asCfgNode() = getattr_call } + override GetAttrCallNode node; - override Node getObject() { result = TCfgNode(getattr_call.getObject()) } + override Node getObject() { result.asCfgNode() = node.getObject() } - override ExprNode getAttributeNameExpr() { result = TCfgNode(getattr_call.getName()) } + override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() } override string getAttributeName() { exists(StrConst s, Node nodeFrom | From df447c0af9ec7d62b428ebdfb5d21efcad2c1203 Mon Sep 17 00:00:00 2001 From: Taus Brock-Nannestad Date: Thu, 8 Oct 2020 15:01:24 +0200 Subject: [PATCH 5/9] Python: Remove flow from `getAttributeName` --- .../experimental/dataflow/internal/Attributes.qll | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/python/ql/src/experimental/dataflow/internal/Attributes.qll b/python/ql/src/experimental/dataflow/internal/Attributes.qll index 9fd1adf83f65..ad8256c9a5a4 100644 --- a/python/ql/src/experimental/dataflow/internal/Attributes.qll +++ b/python/ql/src/experimental/dataflow/internal/Attributes.qll @@ -137,12 +137,7 @@ private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode { override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() } override string getAttributeName() { - // TODO track this back using local flow - exists(StrConst s, Node nodeFrom | - s = nodeFrom.asExpr() and - simpleLocalFlowStep*(nodeFrom, this.getAttributeNameExpr()) and - result = s.getText() - ) + result = this.getAttributeNameExpr().asExpr().(StrConst).getText() } } @@ -220,10 +215,6 @@ private class GetAttrCallAsAttrRead extends AttrRead, CfgNode { override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() } override string getAttributeName() { - exists(StrConst s, Node nodeFrom | - s = nodeFrom.asExpr() and - simpleLocalFlowStep*(nodeFrom, this.getAttributeNameExpr()) and - result = s.getText() - ) + result = this.getAttributeNameExpr().asExpr().(StrConst).getText() } } From d46453caaa6dd28ab0ed5a183fa077e4e17f4c61 Mon Sep 17 00:00:00 2001 From: Taus Brock-Nannestad Date: Thu, 8 Oct 2020 18:08:55 +0200 Subject: [PATCH 6/9] Python: Support named imports as attribute reads Required a small change in `DataFlow::importModule` to get the desired behaviour (cf. the type trackers defined in `moduleattr.ql`, but this should be harmless. The node that is added doesn't have any flow anywhere. --- .../dataflow/internal/Attributes.qll | 28 +++++++++++++++++++ .../dataflow/internal/DataFlowUtil.qll | 10 +++++-- .../import-helper/ImportHelper.expected | 20 +++++++++++++ .../dataflow/typetracking/import_as_attr.py | 9 ++++++ .../dataflow/typetracking/moduleattr.expected | 10 +++++++ .../dataflow/typetracking/moduleattr.ql | 23 +++++++++++++++ 6 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 python/ql/test/experimental/dataflow/typetracking/import_as_attr.py create mode 100644 python/ql/test/experimental/dataflow/typetracking/moduleattr.expected create mode 100644 python/ql/test/experimental/dataflow/typetracking/moduleattr.ql diff --git a/python/ql/src/experimental/dataflow/internal/Attributes.qll b/python/ql/src/experimental/dataflow/internal/Attributes.qll index ad8256c9a5a4..b7fb893d0019 100644 --- a/python/ql/src/experimental/dataflow/internal/Attributes.qll +++ b/python/ql/src/experimental/dataflow/internal/Attributes.qll @@ -218,3 +218,31 @@ private class GetAttrCallAsAttrRead extends AttrRead, CfgNode { result = this.getAttributeNameExpr().asExpr().(StrConst).getText() } } + +/** + * A convenience class for embedding `ImportMemberNode` into `DataFlowCfgNode`, as the former is not + * obviously a subtype of the latter. + */ +private class DataFlowImportMemberNode extends ImportMemberNode, DataFlowCfgNode { } + +/** + * Represents a named import as an attribute read. That is, + * ```python + * from module import attr as attr_ref + * ``` + * is treated as if it is a read of the attribute `module.attr`, even if `module` is not imported directly. + */ +private class ModuleAttributeImportAsAttrRead extends AttrRead, CfgNode { + override DataFlowImportMemberNode node; + + override Node getObject() { result.asCfgNode() = node.getModule(_) } + + override ExprNode getAttributeNameExpr() { + // The name of an imported attribute doesn't exist as a `Node` in the control flow graph, as it + // can only ever be an identifier, and is therefore represented directly as a string. + // Use `getAttributeName` to access the name of the attribute. + none() + } + + override string getAttributeName() { exists(node.getModule(result)) } +} diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll b/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll index c868e1762ecc..b18614572a7b 100644 --- a/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll +++ b/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll @@ -36,7 +36,7 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) } * * Also see `DataFlow::importMember` */ -EssaNode importModule(string name) { +Node importModule(string name) { exists(Variable var, Import imp, Alias alias | alias = imp.getAName() and alias.getAsname() = var.getAStore() and @@ -45,8 +45,14 @@ EssaNode importModule(string name) { or name = alias.getValue().(ImportExpr).getImportedModuleName() ) and - result.getVar().(AssignmentDefinition).getSourceVariable() = var + result.(EssaNode).getVar().(AssignmentDefinition).getSourceVariable() = var ) + or + // In `from module import attr`, we want to consider `module` to be an expression that refers to a + // module of that name, as this allows us to refer to attributes of this module, even if it's + // never imported directly. Note that there crucially isn't any _flow_ from `module` to references + // to that same identifier. + result.asCfgNode().getNode() = any(ImportExpr i | i.getAnImportedModuleName() = name) } /** diff --git a/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected b/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected index 521bb780a56a..2085902c6cfe 100644 --- a/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected +++ b/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected @@ -1,16 +1,36 @@ importModule +| test1.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg | | test1.py:1:8:1:12 | GSSA Variable mypkg | mypkg | +| test2.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg | +| test2.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg | | test2.py:1:19:1:21 | GSSA Variable foo | mypkg.foo | | test2.py:1:24:1:26 | GSSA Variable bar | mypkg.bar | +| test3.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg | +| test3.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg.foo | +| test3.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg | +| test3.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg.bar | | test3.py:2:8:2:16 | GSSA Variable mypkg | mypkg | +| test4.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg | +| test4.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg.foo | | test4.py:1:21:1:24 | GSSA Variable _foo | mypkg.foo | +| test4.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg | +| test4.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg.bar | | test4.py:2:21:2:24 | GSSA Variable _bar | mypkg.bar | +| test5.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg | | test5.py:1:8:1:12 | GSSA Variable mypkg | mypkg | +| test5.py:9:6:9:10 | ControlFlowNode for ImportExpr | mypkg | | test5.py:9:26:9:29 | GSSA Variable _bar | mypkg.bar | +| test6.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg | | test6.py:1:8:1:12 | GSSA Variable mypkg | mypkg | +| test6.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg | +| test6.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg.foo | | test6.py:5:8:5:16 | GSSA Variable mypkg | mypkg | +| test7.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg | | test7.py:1:19:1:21 | GSSA Variable foo | mypkg.foo | +| test7.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg | +| test7.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg.foo | | test7.py:5:8:5:16 | GSSA Variable mypkg | mypkg | +| test7.py:9:6:9:10 | ControlFlowNode for ImportExpr | mypkg | | test7.py:9:19:9:21 | GSSA Variable foo | mypkg.foo | importMember | test2.py:1:19:1:21 | GSSA Variable foo | mypkg | foo | diff --git a/python/ql/test/experimental/dataflow/typetracking/import_as_attr.py b/python/ql/test/experimental/dataflow/typetracking/import_as_attr.py new file mode 100644 index 000000000000..1e2085f126dc --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking/import_as_attr.py @@ -0,0 +1,9 @@ +from module import attr as attr_ref + +x = attr_ref + +def fun(): + y = attr_ref + +# The following should _not_ be a reference to the above module, since we don't actually import it. +z = module diff --git a/python/ql/test/experimental/dataflow/typetracking/moduleattr.expected b/python/ql/test/experimental/dataflow/typetracking/moduleattr.expected new file mode 100644 index 000000000000..adfc8c5a3795 --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking/moduleattr.expected @@ -0,0 +1,10 @@ +module_tracker +| import_as_attr.py:1:6:1:11 | ControlFlowNode for ImportExpr | +module_attr_tracker +| import_as_attr.py:0:0:0:0 | ModuleVariableNode for Global Variable attr_ref in Module import_as_attr | +| import_as_attr.py:1:20:1:35 | ControlFlowNode for ImportMember | +| import_as_attr.py:1:28:1:35 | GSSA Variable attr_ref | +| import_as_attr.py:3:1:3:1 | GSSA Variable x | +| import_as_attr.py:3:5:3:12 | ControlFlowNode for attr_ref | +| import_as_attr.py:6:5:6:5 | SSA variable y | +| import_as_attr.py:6:9:6:16 | ControlFlowNode for attr_ref | diff --git a/python/ql/test/experimental/dataflow/typetracking/moduleattr.ql b/python/ql/test/experimental/dataflow/typetracking/moduleattr.ql new file mode 100644 index 000000000000..15616d918609 --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking/moduleattr.ql @@ -0,0 +1,23 @@ +import python +import experimental.dataflow.DataFlow +import experimental.dataflow.TypeTracker + +DataFlow::Node module_tracker(TypeTracker t) { + t.start() and + result = DataFlow::importModule("module") + or + exists(TypeTracker t2 | result = module_tracker(t2).track(t2, t)) +} + +query DataFlow::Node module_tracker() { result = module_tracker(DataFlow::TypeTracker::end()) } + +DataFlow::Node module_attr_tracker(TypeTracker t) { + t.startInAttr("attr") and + result = module_tracker() + or + exists(TypeTracker t2 | result = module_attr_tracker(t2).track(t2, t)) +} + +query DataFlow::Node module_attr_tracker() { + result = module_attr_tracker(DataFlow::TypeTracker::end()) +} From 60eec7b1363c0182d54c567b4495a48646b993ca Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 8 Oct 2020 18:14:20 +0200 Subject: [PATCH 7/9] Python: Update python/ql/src/experimental/dataflow/internal/Attributes.qll Co-authored-by: Rasmus Wriedt Larsen --- python/ql/src/experimental/dataflow/internal/Attributes.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/experimental/dataflow/internal/Attributes.qll b/python/ql/src/experimental/dataflow/internal/Attributes.qll index b7fb893d0019..ba380dcb06c5 100644 --- a/python/ql/src/experimental/dataflow/internal/Attributes.qll +++ b/python/ql/src/experimental/dataflow/internal/Attributes.qll @@ -17,7 +17,7 @@ abstract class AttrRef extends Node { abstract Node getObject(); /** - * Gets the expression control flow node that defines the attribute being accessed. This is + * Gets the expression node that defines the attribute being accessed, if any. This is * usually an identifier or literal. */ abstract ExprNode getAttributeNameExpr(); From b07c7abacc34b71d4426cd12cb55853d565277da Mon Sep 17 00:00:00 2001 From: Taus Brock-Nannestad Date: Mon, 12 Oct 2020 13:49:08 +0200 Subject: [PATCH 8/9] Python: Clear up attribute name access QLDoc --- .../experimental/dataflow/internal/Attributes.qll | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/dataflow/internal/Attributes.qll b/python/ql/src/experimental/dataflow/internal/Attributes.qll index ba380dcb06c5..d47233166edb 100644 --- a/python/ql/src/experimental/dataflow/internal/Attributes.qll +++ b/python/ql/src/experimental/dataflow/internal/Attributes.qll @@ -22,7 +22,11 @@ abstract class AttrRef extends Node { */ abstract ExprNode getAttributeNameExpr(); - /** Holds if this attribute reference may access an attribute named `attrName`. */ + /** + * Holds if this attribute reference may access an attribute named `attrName`. + * Uses local data flow to track potential attribute names, which may lead to imprecision. If more + * precision is needed, consider using `getAttributeName` instead. + */ predicate mayHaveAttributeName(string attrName) { attrName = this.getAttributeName() or @@ -32,7 +36,11 @@ abstract class AttrRef extends Node { ) } - /** Gets the name of the attribute being read or written, if it can be determined statically. */ + /** + * Gets the name of the attribute being read or written. Only holds if the attribute name has + * been uniquely determined statically. For attributes that are computed dynamically, + * consider using `mayHaveAttributeName` instead. + */ abstract string getAttributeName(); } From 3288cf1a75a1d19817821a02ce9732e580ac68af Mon Sep 17 00:00:00 2001 From: Taus Brock-Nannestad Date: Mon, 12 Oct 2020 16:38:21 +0200 Subject: [PATCH 9/9] Python: Hopefully final changes to documentation. --- .../dataflow/internal/Attributes.qll | 6 ++-- .../dataflow/internal/DataFlowUtil.qll | 28 +++++++++++++++---- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/python/ql/src/experimental/dataflow/internal/Attributes.qll b/python/ql/src/experimental/dataflow/internal/Attributes.qll index d47233166edb..6ba1f7e9cc07 100644 --- a/python/ql/src/experimental/dataflow/internal/Attributes.qll +++ b/python/ql/src/experimental/dataflow/internal/Attributes.qll @@ -37,9 +37,9 @@ abstract class AttrRef extends Node { } /** - * Gets the name of the attribute being read or written. Only holds if the attribute name has - * been uniquely determined statically. For attributes that are computed dynamically, - * consider using `mayHaveAttributeName` instead. + * Gets the name of the attribute being read or written. For dynamic attribute accesses, this + * method is not guaranteed to return a result. For such cases, using `mayHaveAttributeName` may yield + * better results. */ abstract string getAttributeName(); } diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll b/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll index b18614572a7b..762ce7fb9218 100644 --- a/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll +++ b/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll @@ -18,7 +18,7 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) } /** - * Gets an EssaNode that holds the module imported by `name`. + * Gets a `Node` that refers to the module referenced by `name`. * Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a * reference to the module `pkg`. * @@ -27,6 +27,9 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) } * 2. `from import ` when ` = + "." + ` * 3. `from import ` when ` = + "." + ` * + * Finally, in `from import ` we consider the `ImportExpr` corresponding to + * `` to be a reference to that module. + * * Note: * While it is technically possible that `import mypkg.foo` and `from mypkg import foo` can give different values, * it's highly unlikely that this will be a problem in production level code. @@ -48,10 +51,25 @@ Node importModule(string name) { result.(EssaNode).getVar().(AssignmentDefinition).getSourceVariable() = var ) or - // In `from module import attr`, we want to consider `module` to be an expression that refers to a - // module of that name, as this allows us to refer to attributes of this module, even if it's - // never imported directly. Note that there crucially isn't any _flow_ from `module` to references - // to that same identifier. + // Although it may seem superfluous to consider the `foo` part of `from foo import bar as baz` to + // be a reference to a module (since that reference only makes sense locally within the `import` + // statement), it's important for our use of type trackers to consider this local reference to + // also refer to the `foo` module. That way, if one wants to track references to the `bar` + // attribute using a type tracker, one can simply write + // + // ```ql + // DataFlow::Node bar_attr_tracker(TypeTracker t) { + // t.startInAttr("bar") and + // result = foo_module_tracker() + // or + // exists(TypeTracker t2 | result = bar_attr_tracker(t2).track(t2, t)) + // } + // ``` + // + // Where `foo_module_tracker` is a type tracker that tracks references to the `foo` module. + // Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz` + // is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a + // reference to `foo.bar`, as desired. result.asCfgNode().getNode() = any(ImportExpr i | i.getAnImportedModuleName() = name) }