Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/change-notes/2021-02-18-type-backtrackers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lgtm,codescanning
* Added support for type backtracking. Dual to the `TypeTracker` class, the `TypeBackTracker` class allows the propagation of user-defined type information backwards from a set of data flow nodes.
141 changes: 141 additions & 0 deletions python/ql/src/semmle/python/dataflow/new/TypeTracker.qll
Original file line number Diff line number Diff line change
Expand Up @@ -324,3 +324,144 @@ module TypeTracker {
*/
TypeTracker end() { result.end() }
}

private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalAttributeName attr)

/**
* Summary of the steps needed to back-track a use of a value to a given dataflow node.
*
* This can for example be used to track callbacks that are passed to a certain API,
* so we can model specific parameters of that callback as having a certain type.
*
* Note that type back-tracking does not provide a source/sink relation, that is,
* it may determine that a node will be used in an API call somewhere, but it won't
* determine exactly where that use was, or the path that led to the use.
*
* It is recommended that all uses of this type are written in the following form,
* for back-tracking some callback type `myCallback`:
*
* ```
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
* t.start() and
* result = (< some API call >).getArgument(< n >).getALocalSource()
* or
* exists (DataFlow::TypeBackTracker t2 |
* result = myCallback(t2).backtrack(t2, t)
* )
* }
*
* DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
* ```
*
* Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
* `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
* intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
*/
class TypeBackTracker extends TTypeBackTracker {
Boolean hasReturn;
string attr;

TypeBackTracker() { this = MkTypeBackTracker(hasReturn, attr) }

/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
TypeBackTracker prepend(StepSummary step) {
step = LevelStep() and result = this
or
step = CallStep() and hasReturn = false and result = this
or
step = ReturnStep() and result = MkTypeBackTracker(true, attr)
or
exists(string p | step = LoadStep(p) and attr = "" and result = MkTypeBackTracker(hasReturn, p))
or
step = StoreStep(attr) and result = MkTypeBackTracker(hasReturn, "")
}

/** Gets a textual representation of this summary. */
string toString() {
exists(string withReturn, string withAttr |
(if hasReturn = true then withReturn = "with" else withReturn = "without") and
(if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
result = "type back-tracker " + withReturn + " return steps" + withAttr
)
}

/**
* Holds if this is the starting point of type tracking.
*/
predicate start() { hasReturn = false and attr = "" }

/**
* Holds if this is the end point of type tracking.
*/
predicate end() { attr = "" }

/**
* INTERNAL. DO NOT USE.
*
* Holds if this type has been back-tracked into a call through return edge.
*/
boolean hasReturn() { result = hasReturn }

/**
* Gets a type tracker that starts where this one has left off to allow continued
* tracking.
*
* This predicate is only defined if the type has not been tracked into an attribute.
*/
TypeBackTracker continue() { attr = "" and result = this }

/**
* Gets the summary that corresponds to having taken a backwards
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
*/
pragma[inline]
TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
exists(StepSummary summary |
StepSummary::step(nodeFrom, nodeTo, summary) and
this = result.prepend(summary)
)
}

/**
* Gets the summary that corresponds to having taken a backwards
* local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
*
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
* in the flowgraph, and not just the edges between
* `LocalSourceNode`s. It may therefore be less performant.
*
* Type tracking predicates using small steps typically take the following form:
* ```ql
* DataFlow::Node myType(DataFlow::TypeBackTracker t) {
* t.start() and
* result = < some API call >.getArgument(< n >)
* or
* exists (DataFlow::TypeBackTracker t2 |
* t = t2.smallstep(result, myType(t2))
* )
* }
*
* DataFlow::Node myType() {
* result = myType(DataFlow::TypeBackTracker::end())
* }
* ```
*/
pragma[inline]
TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
this = result.prepend(summary)
)
or
typePreservingStep(nodeFrom, nodeTo) and
this = result
}
}

/** Provides predicates for implementing custom `TypeBackTracker`s. */
module TypeBackTracker {
/**
* Gets a valid end point of type back-tracking.
*/
TypeBackTracker end() { result.end() }
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,19 @@ class Node extends TNode {
*/
pragma[inline]
Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }

/**
* Gets a node that may flow into this one using one heap and/or interprocedural step.
*
* See `TypeBackTracker` for more details about how to use this.
*/
pragma[inline]
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }

/**
* Gets a local source node from which data may flow to this node in zero or more local steps.
*/
LocalSourceNode getALocalSource() { result.flowsTo(this) }
}

/** A data-flow node corresponding to an SSA variable. */
Expand Down