diff --git a/python/change-notes/2021-02-18-type-backtrackers.md b/python/change-notes/2021-02-18-type-backtrackers.md new file mode 100644 index 000000000000..d6829a6da65d --- /dev/null +++ b/python/change-notes/2021-02-18-type-backtrackers.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* Added support for type backtracking. Dual to the `TypeTracker` class, the `TypeBackTracker` class allows the propagation of user-defined type information backwards from a set of data flow nodes. diff --git a/python/ql/src/semmle/python/dataflow/new/TypeTracker.qll b/python/ql/src/semmle/python/dataflow/new/TypeTracker.qll index b6de3cd7764b..42352cf924d8 100644 --- a/python/ql/src/semmle/python/dataflow/new/TypeTracker.qll +++ b/python/ql/src/semmle/python/dataflow/new/TypeTracker.qll @@ -324,3 +324,144 @@ module TypeTracker { */ TypeTracker end() { result.end() } } + +private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalAttributeName attr) + +/** + * Summary of the steps needed to back-track a use of a value to a given dataflow node. + * + * This can for example be used to track callbacks that are passed to a certain API, + * so we can model specific parameters of that callback as having a certain type. + * + * Note that type back-tracking does not provide a source/sink relation, that is, + * it may determine that a node will be used in an API call somewhere, but it won't + * determine exactly where that use was, or the path that led to the use. + * + * It is recommended that all uses of this type are written in the following form, + * for back-tracking some callback type `myCallback`: + * + * ``` + * DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) { + * t.start() and + * result = (< some API call >).getArgument(< n >).getALocalSource() + * or + * exists (DataFlow::TypeBackTracker t2 | + * result = myCallback(t2).backtrack(t2, t) + * ) + * } + * + * DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) } + * ``` + * + * Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent + * `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual + * intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`. + */ +class TypeBackTracker extends TTypeBackTracker { + Boolean hasReturn; + string attr; + + TypeBackTracker() { this = MkTypeBackTracker(hasReturn, attr) } + + /** Gets the summary resulting from prepending `step` to this type-tracking summary. */ + TypeBackTracker prepend(StepSummary step) { + step = LevelStep() and result = this + or + step = CallStep() and hasReturn = false and result = this + or + step = ReturnStep() and result = MkTypeBackTracker(true, attr) + or + exists(string p | step = LoadStep(p) and attr = "" and result = MkTypeBackTracker(hasReturn, p)) + or + step = StoreStep(attr) and result = MkTypeBackTracker(hasReturn, "") + } + + /** Gets a textual representation of this summary. */ + string toString() { + exists(string withReturn, string withAttr | + (if hasReturn = true then withReturn = "with" else withReturn = "without") and + (if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and + result = "type back-tracker " + withReturn + " return steps" + withAttr + ) + } + + /** + * Holds if this is the starting point of type tracking. + */ + predicate start() { hasReturn = false and attr = "" } + + /** + * Holds if this is the end point of type tracking. + */ + predicate end() { attr = "" } + + /** + * INTERNAL. DO NOT USE. + * + * Holds if this type has been back-tracked into a call through return edge. + */ + boolean hasReturn() { result = hasReturn } + + /** + * Gets a type tracker that starts where this one has left off to allow continued + * tracking. + * + * This predicate is only defined if the type has not been tracked into an attribute. + */ + TypeBackTracker continue() { attr = "" and result = this } + + /** + * Gets the summary that corresponds to having taken a backwards + * heap and/or inter-procedural step from `nodeTo` to `nodeFrom`. + */ + pragma[inline] + TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) { + exists(StepSummary summary | + StepSummary::step(nodeFrom, nodeTo, summary) and + this = result.prepend(summary) + ) + } + + /** + * Gets the summary that corresponds to having taken a backwards + * local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`. + * + * Unlike `TypeBackTracker::step`, this predicate exposes all edges + * in the flowgraph, and not just the edges between + * `LocalSourceNode`s. It may therefore be less performant. + * + * Type tracking predicates using small steps typically take the following form: + * ```ql + * DataFlow::Node myType(DataFlow::TypeBackTracker t) { + * t.start() and + * result = < some API call >.getArgument(< n >) + * or + * exists (DataFlow::TypeBackTracker t2 | + * t = t2.smallstep(result, myType(t2)) + * ) + * } + * + * DataFlow::Node myType() { + * result = myType(DataFlow::TypeBackTracker::end()) + * } + * ``` + */ + pragma[inline] + TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) { + exists(StepSummary summary | + StepSummary::smallstep(nodeFrom, nodeTo, summary) and + this = result.prepend(summary) + ) + or + typePreservingStep(nodeFrom, nodeTo) and + this = result + } +} + +/** Provides predicates for implementing custom `TypeBackTracker`s. */ +module TypeBackTracker { + /** + * Gets a valid end point of type back-tracking. + */ + TypeBackTracker end() { result.end() } +} diff --git a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 3d844047327d..acbbfda1ca6e 100644 --- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -125,6 +125,19 @@ class Node extends TNode { */ pragma[inline] Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) } + + /** + * Gets a node that may flow into this one using one heap and/or interprocedural step. + * + * See `TypeBackTracker` for more details about how to use this. + */ + pragma[inline] + LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) } + + /** + * Gets a local source node from which data may flow to this node in zero or more local steps. + */ + LocalSourceNode getALocalSource() { result.flowsTo(this) } } /** A data-flow node corresponding to an SSA variable. */