diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll index 0efae6ae45cd..95ec8b412ace 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll @@ -19,7 +19,7 @@ class ParameterPosition extends int { /** An argument position represented by an integer. */ class ArgumentPosition extends int { - ArgumentPosition() { this in [-2, -1] or exists(any(Call c).getArg(this)) } + ArgumentPosition() { this in [-3, -2, -1] or exists(any(Call c).getArg(this)) } /** Holds if this position represents a positional argument at position `pos`. */ predicate isPositional(int pos) { this = pos } // with the current representation, all arguments are positional @@ -109,6 +109,8 @@ module ArgumentPassing { ) } + private import semmle.python.essa.SsaCompute + /** * Gets the `n`th parameter of `callable`. * If the callable has a starred parameter, say `*tuple`, that is matched with `n=-1`. @@ -133,6 +135,15 @@ module ArgumentPassing { n = -2 and result = f.getKwarg().getAFlowNode() ) + or + // captured variable + exists(string name, ScopeEntryDefinition def | + captures_variable(callable, name, def) and + // TODO: handle more than one captured variable + n = -3 and + result.getId() = name and + AdjacentUses::firstUse(def.(EssaVariable).getDefinition(), result) + ) } /** @@ -220,6 +231,16 @@ module ArgumentPassing { call_unpacks(call, mapping, callable, name, paramN) and result = TKwUnpackedNode(call, callable, name) ) + or + // captured variable + exists(string name, ScopeEntryDefinition def, NameNode var | + captures_variable(callable, name, def) and + // TODO: handle more than one captured variable + paramN = -3 and + var.getId() = name and + result = TCfgNode(var) and + var.getBasicBlock() = call.getBasicBlock() + ) ) } @@ -271,6 +292,13 @@ module ArgumentPassing { exists(call.getNode().getKwargs()) // dict argument available ) } + + predicate captures_variable(CallableValue callable, string name, ScopeEntryDefinition def) { + def.(EssaVariable).getSourceVariable().getName() = name and + def.getScope() = callable.getScope() and + not def instanceof GlobalSsaVariable and + not def.(EssaVariable).isMetaVariable() + } } import ArgumentPassing @@ -686,6 +714,24 @@ abstract class ParameterNodeImpl extends Node { abstract predicate isParameterOf(DataFlowCallable c, int i); } +class CapturedParameterNode extends ParameterNodeImpl, TCapturedParameterNode { + CallableValue callable; + string name; + ScopeEntryDefinition def; + + CapturedParameterNode() { this = TCapturedParameterNode(callable, name, def) } + + override Parameter getParameter() { none() } + + override predicate isParameterOf(DataFlowCallable c, int i) { + c = TCallableValue(callable) and i = -3 + } + + override DataFlowCallable getEnclosingCallable() { result = TCallableValue(callable) } + + override string toString() { result = "captured parameter " + name + " of " + callable } +} + /** A parameter for a library callable with a flow summary. */ class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode { private FlowSummaryImpl::Public::SummarizedCallable sc; diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 016f748a79a9..2b78dfe624dd 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -281,6 +281,12 @@ module EssaFlow { nodeTo = TKwOverflowNode(call, callable) and nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode() ) + or + // captured variable + exists(CallableValue callable, string name, ScopeEntryDefinition def | + nodeFrom = TCapturedParameterNode(callable, name, def) and + nodeTo.asVar() = def + ) } predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) { diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 4a00d0aafc34..2aaf63a08c85 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -109,9 +109,12 @@ newtype TNode = } or TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) { FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos) + } or + TCapturedParameterNode(CallableValue callable, string name, ScopeEntryDefinition def) { + captures_variable(callable, name, def) } -class TParameterNode = TCfgNode or TSummaryParameterNode; +class TParameterNode = TCfgNode or TSummaryParameterNode or TCapturedParameterNode; /** Helper for `Node::getEnclosingCallable`. */ private DataFlowCallable getCallableScope(Scope s) { diff --git a/python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.expected b/python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.ql b/python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.ql new file mode 100644 index 000000000000..c78af9f85566 --- /dev/null +++ b/python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.ql @@ -0,0 +1,27 @@ +import python +import semmle.python.essa.Essa +import TestUtilities.InlineExpectationsTest +private import semmle.python.dataflow.new.internal.PrintNode + +class ScopeEntryTest extends InlineExpectationsTest { + ScopeEntryTest() { this = "ScopeEntryTest" } + + override string getARelevantTag() { result = "entry" } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(ScopeEntryDefinition def | + exists(def.getLocation().getFile().getRelativePath()) and + not def instanceof GlobalSsaVariable and + not def.(EssaVariable).isMetaVariable() + | + location = def.getLocation() and + tag = "entry" and + value = prettyEssaDefinition(def) and + element = def.toString() + ) + } +} + +string prettyEssaDefinition(EssaDefinition def) { + result = def.(EssaVariable).getSourceVariable().getName() +} diff --git a/python/ql/test/experimental/dataflow/variable-capture/dict.py b/python/ql/test/experimental/dataflow/variable-capture/dict.py index 09c0ddfbdbd2..97453a9d117b 100644 --- a/python/ql/test/experimental/dataflow/variable-capture/dict.py +++ b/python/ql/test/experimental/dataflow/variable-capture/dict.py @@ -34,26 +34,26 @@ def SINK_F(x): def out(): sinkO1 = { "x": "" } - def captureOut1(): + def captureOut1(): #$ entry=sinkO1 sinkO1["x"] = SOURCE captureOut1() SINK(sinkO1["x"]) #$ MISSING:captured sinkO2 = { "x": "" } def captureOut2(): - def m(): + def m(): #$ entry=sinkO2 sinkO2["x"] = SOURCE m() captureOut2() SINK(sinkO2["x"]) #$ MISSING:captured nonSink0 = { "x": "" } - def captureOut1NotCalled(): + def captureOut1NotCalled(): #$ entry=nonSink0 nonSink0["x"] = SOURCE SINK_F(nonSink0["x"]) def captureOut2NotCalled(): - def m(): + def m(): #$ entry=nonSink0 nonSink0["x"] = SOURCE captureOut2NotCalled() SINK_F(nonSink0["x"]) @@ -64,26 +64,26 @@ def test_out(): def through(tainted): sinkO1 = { "x": "" } - def captureOut1(): + def captureOut1(): #$ entry=tainted entry=sinkO1 sinkO1["x"] = tainted captureOut1() SINK(sinkO1["x"]) #$ MISSING:captured sinkO2 = { "x": "" } - def captureOut2(): - def m(): + def captureOut2(): #$ MISSING:entry=tainted entry=sinkO2 + def m(): #$ entry=tainted entry=sinkO2 sinkO2["x"] = tainted m() captureOut2() SINK(sinkO2["x"]) #$ MISSING:captured nonSink0 = { "x": "" } - def captureOut1NotCalled(): + def captureOut1NotCalled(): #$ entry=tainted entry=nonSink0 nonSink0["x"] = tainted SINK_F(nonSink0["x"]) - def captureOut2NotCalled(): - def m(): + def captureOut2NotCalled(): #$ MISSING:entry=tainted entry=nonSink0 + def m(): #$ entry=tainted entry=nonSink0 nonSink0["x"] = tainted captureOut2NotCalled() SINK_F(nonSink0["x"]) diff --git a/python/ql/test/experimental/dataflow/variable-capture/in.py b/python/ql/test/experimental/dataflow/variable-capture/in.py index dfa666fae5d3..1ca52afb81de 100644 --- a/python/ql/test/experimental/dataflow/variable-capture/in.py +++ b/python/ql/test/experimental/dataflow/variable-capture/in.py @@ -32,27 +32,33 @@ def SINK_F(x): # Capture the parameter of an outer function. def inParam(tainted): - def captureIn1(): + def captureIn1(): #$ entry=tainted sinkI1 = tainted - SINK(sinkI1) #$ MISSING:captured + SINK(sinkI1) #$ captured captureIn1() - def captureIn2(): - def m(): + def captureIn1a(): #$ entry=tainted + sinkI1 = tainted + SINK(sinkI1) #$ captured + a = captureIn1a + a() + + def captureIn2(): #$ MISSING:entry=tainted + def m(): #$ entry=tainted sinkI2 = tainted SINK(sinkI2) #$ MISSING:captured m() captureIn2() - captureIn3 = lambda arg: SINK(tainted) + captureIn3 = lambda arg: SINK(tainted) #$ entry=tainted captureIn3("") - def captureIn1NotCalled(): + def captureIn1NotCalled(): #$ entry=tainted nonSink0 = tainted SINK_F(nonSink0) - def captureIn2NotCalled(): - def m(): + def captureIn2NotCalled(): #$ MISSING:entry=tainted + def m(): #$ entry=tainted nonSink0 = tainted SINK_F(nonSink0) captureIn2NotCalled() @@ -65,27 +71,27 @@ def test_inParam(): def inLocal(): tainted = SOURCE - def captureIn1(): + def captureIn1(): #$ entry=tainted sinkI1 = tainted SINK(sinkI1) #$ MISSING:captured captureIn1() - def captureIn2(): - def m(): + def captureIn2(): #$ MISSING:entry=tainted + def m(): #$ entry=tainted sinkI2 = tainted SINK(sinkI2) #$ MISSING:captured m() captureIn2() - captureIn3 = lambda arg: SINK(tainted) + captureIn3 = lambda arg: SINK(tainted) #$ entry=tainted captureIn3("") - def captureIn1NotCalled(): + def captureIn1NotCalled(): #$ entry=tainted nonSink0 = tainted SINK_F(nonSink0) - def captureIn2NotCalled(): - def m(): + def captureIn2NotCalled(): #$ MISSING:entry=tainted + def m(): #$ entry=tainted nonSink0 = tainted SINK_F(nonSink0) captureIn2NotCalled() diff --git a/python/ql/test/experimental/dataflow/variable-capture/nonlocal.py b/python/ql/test/experimental/dataflow/variable-capture/nonlocal.py index bd8b7df86ee2..de7d95b3774d 100644 --- a/python/ql/test/experimental/dataflow/variable-capture/nonlocal.py +++ b/python/ql/test/experimental/dataflow/variable-capture/nonlocal.py @@ -34,15 +34,15 @@ def SINK_F(x): def out(): sinkO1 = "" - def captureOut1(): + def captureOut1(): #$ MISSING:entry=sink01 nonlocal sinkO1 sinkO1 = SOURCE captureOut1() SINK(sinkO1) #$ MISSING:captured sinkO2 = "" - def captureOut2(): - def m(): + def captureOut2(): #$ MISSING:entry=sink02 + def m(): #$ MISSING:entry=sink02 nonlocal sinkO2 sinkO2 = SOURCE m() @@ -50,13 +50,13 @@ def m(): SINK(sinkO2) #$ MISSING:captured nonSink0 = "" - def captureOut1NotCalled(): + def captureOut1NotCalled(): #$ MISSING:entry=nonSink0 nonlocal nonSink0 nonSink0 = SOURCE SINK_F(nonSink0) - def captureOut2NotCalled(): - def m(): + def captureOut2NotCalled(): #$ MISSING:entry=nonSink0 + def m(): #$ MISSING:entry=nonSink0 nonlocal nonSink0 nonSink0 = SOURCE captureOut2NotCalled() @@ -68,15 +68,15 @@ def test_out(): def through(tainted): sinkO1 = "" - def captureOut1(): + def captureOut1(): #$ entry=tainted MISSING:entry=sink01 nonlocal sinkO1 sinkO1 = tainted captureOut1() SINK(sinkO1) #$ MISSING:captured sinkO2 = "" - def captureOut2(): - def m(): + def captureOut2(): #$ MISSING:entry=tainted entry=sink02 + def m(): #$ entry=tainted MISSING:entry=sink02 nonlocal sinkO2 sinkO2 = tainted m() @@ -84,13 +84,13 @@ def m(): SINK(sinkO2) #$ MISSING:captured nonSink0 = "" - def captureOut1NotCalled(): + def captureOut1NotCalled(): #$ entry=tainted MISSING:entry=nonSink0 nonlocal nonSink0 nonSink0 = tainted SINK_F(nonSink0) - def captureOut2NotCalled(): - def m(): + def captureOut2NotCalled(): #$ MISSING:entry=tainted entry=nonSink0 + def m(): #$ entry=tainted MISSING:entry=nonSink0 nonlocal nonSink0 nonSink0 = tainted captureOut2NotCalled()