Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
6db55cd
Python: add missing annotation
yoff Nov 27, 2023
c054ba6
python: instantiate module for variable capture
yoff Oct 11, 2023
b513871
Python: add consistency exclusions
yoff Nov 29, 2023
797deeb
Python: exclude `CaptureNode`s
yoff Nov 29, 2023
7565873
Python: test callbacks to library calls
yoff Dec 5, 2023
17a0029
Python: support callbacks to library calls
yoff Dec 5, 2023
453ab9c
Python: restrict `LibraryLambdaMethod`
yoff Dec 6, 2023
061fd01
Python: further restrict `LibraryLambdaMethod`
yoff Dec 6, 2023
5471c92
Python: exclusion for summary nodes
yoff Dec 6, 2023
efcdb3e
Python: filter local flow from a node to itself
yoff Dec 6, 2023
f32d5e4
Python: typo
yoff Dec 6, 2023
38e0321
Python: allow `CaptureArgumentNode`s as multiple arguemnts
yoff Dec 6, 2023
479d81f
Python: fix nonlocal captured variables
yoff Dec 12, 2023
2a5736e
Python: add consistency exception
yoff Dec 14, 2023
b6123de
Python: simplify assignments to captured variables
yoff Dec 14, 2023
abd544d
Python: consistency failure gone
yoff Dec 14, 2023
c395d2d
Apply suggestions from code review
yoff Dec 15, 2023
f96c52e
Python: make compile again
yoff Dec 15, 2023
2051ba3
Python: hide synthesized capture nodes
yoff Dec 15, 2023
262d43a
Python: Make compile and add comment
yoff Dec 15, 2023
bfdcae4
Python : `P` -> `PY`
yoff Dec 15, 2023
5b6ea15
Python: remove unneeded consistency exclusion
yoff Dec 15, 2023
d3b237b
Python: rename synthetic lambda nodes
yoff Dec 15, 2023
4b89a41
Update python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDisp…
yoff Dec 15, 2023
a311582
Python: Bring back (now simplified) exclusion
yoff Dec 15, 2023
b07316f
Update python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPriv…
yoff Dec 15, 2023
739b839
Python: use updated names
yoff Dec 15, 2023
f668453
Python: move things around
yoff Dec 15, 2023
e1bf282
Python: split variable capture instantiation out
yoff Dec 15, 2023
8601105
Python: Address TODO comment
yoff Dec 15, 2023
1ee11ae
Merge branch 'main' of https://github.com/github/codeql into python/c…
yoff Dec 15, 2023
416ba6a
Python: use updated API
yoff Dec 15, 2023
e36b079
Python: fix compilation error
yoff Dec 15, 2023
4a1fcde
Python: abandon synthetic node
yoff Dec 15, 2023
5de1725
Python: update class name
yoff Dec 15, 2023
661ba1c
Python: move restriction into branch predicate
yoff Dec 15, 2023
b505778
Python: remove non-local steps
yoff Dec 16, 2023
64655a0
Python: Use enw class name
yoff Dec 16, 2023
d6544cc
Python: remove consistency exclusion
yoff Dec 18, 2023
c88d686
Python: move `SynthCapturePostUpdateNode`
yoff Dec 18, 2023
bf1ad23
Python: add comments
yoff Dec 18, 2023
25c83dc
Python: adjust comment
yoff Dec 18, 2023
7324177
Python: address QL alerts
yoff Dec 18, 2023
86bb884
Python: better comment
yoff Dec 18, 2023
456209b
Python: Move predicate closer to its use
yoff Dec 18, 2023
c0b3d98
Python: Add a bit more detail to comment.
yoff Dec 18, 2023
6e4011d
Python: rename sythetic nodes
yoff Dec 18, 2023
78c484f
Python: remove support for capturing callbacks
yoff Dec 18, 2023
8b7b582
Python: add change-note
yoff Dec 18, 2023
a60c52b
Merge branch 'main' into python/captured-variables-basic
yoff Dec 18, 2023
1417c2c
Update python/ql/lib/change-notes/2023-12-18-support-variable-capture.md
yoff Dec 19, 2023
dfe71e3
Python: Add scope entry definition nodes
yoff Dec 19, 2023
c2dcd55
Python: remove unnecessary post-processing
yoff Dec 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion python/ql/consistency-queries/DataFlowConsistency.ql
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ private module Input implements InputSig<PythonDataFlow> {
private import Private
private import Public

predicate postWithInFlowExclude(Node n) { n instanceof FlowSummaryNode }

predicate argHasPostUpdateExclude(ArgumentNode n) {
// TODO: Implement post-updates for *args, see tests added in https://github.com/github/codeql/pull/14936
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
Expand Down Expand Up @@ -44,6 +46,13 @@ private module Input implements InputSig<PythonDataFlow> {
)
}

predicate uniqueEnclosingCallableExclude(Node n) {
// We only have a selection of valid callables.
// For instance, we do not have classes as `DataFlowCallable`s.
not n.(SynthCaptureNode).getSynthesizedCaptureNode().getEnclosingCallable() instanceof Function and
not n.(SynthCaptureNode).getSynthesizedCaptureNode().getEnclosingCallable() instanceof Module
}

predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) {
not exists(call.getLocation().getFile().getRelativePath())
}
Expand All @@ -53,7 +62,7 @@ private module Input implements InputSig<PythonDataFlow> {
}

predicate multipleArgumentCallExclude(ArgumentNode arg, DataFlowCall call) {
// since we can have multiple DataFlowCall for a CallNode (for example if can
// since we can have multiple DataFlowCall for a CallNode (for example if it can
// resolve to multiple functions), but we only make _one_ ArgumentNode for each
// argument in the CallNode, we end up violating this consistency check in those
// cases. (see `getCallArg` in DataFlowDispatch.qll)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: majorAnalysis
---
* Added support for global data-flow through captured variables.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ private import semmle.python.dataflow.new.internal.TypeTrackingImpl::CallGraphCo
newtype TParameterPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfParameterPosition() or
/**
* This is used for tracking flow through captured variables, and
* we use separate parameter/argument positions in order to distinguish
* "lambda self" from "normal self", as lambdas may also access outer `self`
* variables (through variable capture).
*/
TLambdaSelfParameterPosition() or
TPositionalParameterPosition(int index) {
index = any(Parameter p).getPosition()
or
Expand Down Expand Up @@ -78,6 +85,9 @@ class ParameterPosition extends TParameterPosition {
/** Holds if this position represents a `self`/`cls` parameter. */
predicate isSelf() { this = TSelfParameterPosition() }

/** Holds if this position represents a reference to a lambda itself. Only used for tracking flow through captured variables. */
predicate isLambdaSelf() { this = TLambdaSelfParameterPosition() }

/** Holds if this position represents a positional parameter at (0-based) `index`. */
predicate isPositional(int index) { this = TPositionalParameterPosition(index) }

Expand Down Expand Up @@ -109,6 +119,8 @@ class ParameterPosition extends TParameterPosition {
string toString() {
this.isSelf() and result = "self"
or
this.isLambdaSelf() and result = "lambda self"
or
exists(int index | this.isPositional(index) and result = "position " + index)
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
Expand All @@ -129,6 +141,13 @@ class ParameterPosition extends TParameterPosition {
newtype TArgumentPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfArgumentPosition() or
/**
* This is used for tracking flow through captured variables, and
* we use separate parameter/argument positions in order to distinguish
* "lambda self" from "normal self", as lambdas may also access outer `self`
* variables (through variable capture).
*/
TLambdaSelfArgumentPosition() or
TPositionalArgumentPosition(int index) {
exists(any(CallNode c).getArg(index))
or
Expand All @@ -153,6 +172,9 @@ class ArgumentPosition extends TArgumentPosition {
/** Holds if this position represents a `self`/`cls` argument. */
predicate isSelf() { this = TSelfArgumentPosition() }

/** Holds if this position represents a lambda `self` argument. Only used for tracking flow through captured variables. */
predicate isLambdaSelf() { this = TLambdaSelfArgumentPosition() }

/** Holds if this position represents a positional argument at (0-based) `index`. */
predicate isPositional(int index) { this = TPositionalArgumentPosition(index) }

Expand All @@ -169,6 +191,8 @@ class ArgumentPosition extends TArgumentPosition {
string toString() {
this.isSelf() and result = "self"
or
this.isLambdaSelf() and result = "lambda self"
or
exists(int pos | this.isPositional(pos) and result = "position " + pos)
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
Expand All @@ -183,6 +207,8 @@ class ArgumentPosition extends TArgumentPosition {
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
ppos.isSelf() and apos.isSelf()
or
ppos.isLambdaSelf() and apos.isLambdaSelf()
or
exists(int index | ppos.isPositional(index) and apos.isPositional(index))
or
exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
Expand Down Expand Up @@ -1506,6 +1532,37 @@ abstract class ParameterNodeImpl extends Node {
}
}

/**
* A sythetic parameter representing the values of the variables captured
* by the callable being called. This parameter represents a single object
* where all the values are stored as attributes.
* This is also known as the environment part of a closure.
*
* This is used for tracking flow through captured variables.
*/
class SynthCapturedVariablesParameterNode extends ParameterNodeImpl,
TSynthCapturedVariablesParameterNode
{
private Function callable;

SynthCapturedVariablesParameterNode() { this = TSynthCapturedVariablesParameterNode(callable) }

final Function getCallable() { result = callable }

override Parameter getParameter() { none() }

override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
c = TFunction(callable) and
pos.isLambdaSelf()
}

override Scope getScope() { result = callable }

override Location getLocation() { result = callable.getLocation() }

override string toString() { result = "lambda self in " + callable }
}

/** A parameter for a library callable with a flow summary. */
class SummaryParameterNode extends ParameterNodeImpl, FlowSummaryNode {
SummaryParameterNode() {
Expand Down Expand Up @@ -1580,6 +1637,42 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl
override Node getPreUpdateNode() { result = pre }
}

/**
* A sythetic argument representing the values of the variables captured
* by the callable being called. This argument represents a single object
* where all the values are stored as attributes.
* This is also known as the environment part of a closure.
*
* This is used for tracking flow through captured variables.
*
* TODO:
* We might want a synthetic node here, but currently that incurs problems
* with non-monotonic recursion, because of the use of `resolveCall` in the
* char pred. This may be solvable by using
* `CallGraphConstruction::Make` in staed of
* `CallGraphConstruction::Simple::Make` appropriately.
*/
class CapturedVariablesArgumentNode extends CfgNode, ArgumentNode {
CallNode callNode;

CapturedVariablesArgumentNode() {
node = callNode.getFunction() and
exists(Function target | resolveCall(callNode, target, _) |
target = any(VariableCapture::CapturedVariable v).getACapturingScope()
)
}

override string toString() { result = "Capturing closure argument" }

override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
callNode = call.getNode() and
pos.isLambdaSelf()
}

/** Gets the `CallNode` that is being passed as an argument to itself. */
CallNode getCallNode() { result = callNode }
}

/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(DataFlowCall call) {
call instanceof ExtractedDataFlowCall and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ private import semmle.python.Frameworks
import MatchUnpacking
import IterableUnpacking
import DataFlowDispatch
import VariableCapture as VariableCapture

/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
Expand Down Expand Up @@ -299,6 +300,14 @@ module LocalFlow {
nodeTo.(CfgNode).getNode() = def.getDefiningNode()
)
or
// Assignment to captured variables
// These are not covered by the `AssignmentDefinition`s in the case above,
// as they are not necessarily live.
nodeFrom.(CfgNode).getNode() = nodeTo.(CfgNode).getNode().(DefinitionNode).getValue() and
nodeTo.asExpr() = any(VariableCapture::CapturedVariable c).getAStore() and
// Exclude assignments to parameters. These are from default values and not local.
not nodeTo instanceof ParameterNode
or
// With definition
// `with f(42) as x:`
// nodeFrom is `f(42)`
Expand Down Expand Up @@ -352,7 +361,10 @@ module LocalFlow {
// nodeFrom is `y` on first line
// nodeTo is `y` on second line
exists(EssaDefinition def |
nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode() and
nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode()
or
nodeFrom.(ScopeEntryDefinitionNode).getDefinition() = def
|
AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode())
)
or
Expand Down Expand Up @@ -471,6 +483,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
or
summaryFlowSteps(nodeFrom, nodeTo)
or
variableCaptureLocalFlowStep(nodeFrom, nodeTo)
}

/**
Expand All @@ -481,8 +495,7 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
* or at runtime when callables in the module are called.
*/
predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) {
IncludePostUpdateFlow<PhaseDependentFlow<LocalFlow::localFlowStep/2>::step/2>::step(nodeFrom,
nodeTo)
LocalFlow::localFlowStep(nodeFrom, nodeTo)
}

private predicate summaryLocalStep(Node nodeFrom, Node nodeTo) {
Expand All @@ -494,6 +507,16 @@ predicate summaryFlowSteps(Node nodeFrom, Node nodeTo) {
IncludePostUpdateFlow<PhaseDependentFlow<summaryLocalStep/2>::step/2>::step(nodeFrom, nodeTo)
}

predicate variableCaptureLocalFlowStep(Node nodeFrom, Node nodeTo) {
// Blindly applying use-use flow can result in a node that steps to itself, for
// example in while-loops. To uphold dataflow consistency checks, we don't want
// that. However, we do want to allow `[post] n` to `n` (to handle while loops), so
// we should only do the filtering after `IncludePostUpdateFlow` has ben applied.
IncludePostUpdateFlow<PhaseDependentFlow<VariableCapture::valueStep/2>::step/2>::step(nodeFrom,
nodeTo) and
nodeFrom != nodeTo
}

/** `ModuleVariable`s are accessed via jump steps at runtime. */
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
// Module variable read
Expand Down Expand Up @@ -557,7 +580,7 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }

predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }

predicate localMustFlowStep(Node node1, Node node2) { none() }
predicate localMustFlowStep(Node nodeFrom, Node nodeTo) { none() }

/**
* Gets the type of `node`.
Expand Down Expand Up @@ -661,6 +684,8 @@ predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
or
VariableCapture::storeStep(nodeFrom, c, nodeTo)
}

/**
Expand Down Expand Up @@ -864,6 +889,8 @@ predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
nodeTo.(FlowSummaryNode).getSummaryNode())
or
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
or
VariableCapture::readStep(nodeFrom, c, nodeTo)
}

/** Data flows from a sequence to a subscript of the sequence. */
Expand Down Expand Up @@ -993,6 +1020,8 @@ predicate nodeIsHidden(Node n) {
n instanceof SynthDictSplatArgumentNode
or
n instanceof SynthDictSplatParameterNode
or
n instanceof SynthCaptureNode
}

class LambdaCallKind = Unit;
Expand Down Expand Up @@ -1032,6 +1061,11 @@ predicate allowParameterReturnInSelf(ParameterNode p) {
p.(ParameterNodeImpl).isParameterOf(c, pos) and
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asLibraryCallable(), pos)
)
or
exists(Function f |
VariableCapture::Flow::heuristicAllowInstanceParameterReturnInSelf(f) and
p = TSynthCapturedVariablesParameterNode(f)
)
}

/** An approximated `Content`. */
Expand Down
Loading