From bbbfe759f0cda4a84b07059ef619ff488fcab423 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 13 Jan 2023 10:55:57 +0100 Subject: [PATCH 1/3] Python: test scope entry definitions --- .../variable-capture/ScopeEntryTest.expected | 0 .../variable-capture/ScopeEntryTest.ql | 27 ++++++++++++++++++ .../dataflow/variable-capture/dict.py | 20 ++++++------- .../dataflow/variable-capture/in.py | 28 +++++++++---------- .../dataflow/variable-capture/nonlocal.py | 24 ++++++++-------- 5 files changed, 63 insertions(+), 36 deletions(-) create mode 100644 python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.expected create mode 100644 python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.ql diff --git a/python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.expected b/python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.ql b/python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.ql new file mode 100644 index 000000000000..c78af9f85566 --- /dev/null +++ b/python/ql/test/experimental/dataflow/variable-capture/ScopeEntryTest.ql @@ -0,0 +1,27 @@ +import python +import semmle.python.essa.Essa +import TestUtilities.InlineExpectationsTest +private import semmle.python.dataflow.new.internal.PrintNode + +class ScopeEntryTest extends InlineExpectationsTest { + ScopeEntryTest() { this = "ScopeEntryTest" } + + override string getARelevantTag() { result = "entry" } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(ScopeEntryDefinition def | + exists(def.getLocation().getFile().getRelativePath()) and + not def instanceof GlobalSsaVariable and + not def.(EssaVariable).isMetaVariable() + | + location = def.getLocation() and + tag = "entry" and + value = prettyEssaDefinition(def) and + element = def.toString() + ) + } +} + +string prettyEssaDefinition(EssaDefinition def) { + result = def.(EssaVariable).getSourceVariable().getName() +} diff --git a/python/ql/test/experimental/dataflow/variable-capture/dict.py b/python/ql/test/experimental/dataflow/variable-capture/dict.py index 09c0ddfbdbd2..97453a9d117b 100644 --- a/python/ql/test/experimental/dataflow/variable-capture/dict.py +++ b/python/ql/test/experimental/dataflow/variable-capture/dict.py @@ -34,26 +34,26 @@ def SINK_F(x): def out(): sinkO1 = { "x": "" } - def captureOut1(): + def captureOut1(): #$ entry=sinkO1 sinkO1["x"] = SOURCE captureOut1() SINK(sinkO1["x"]) #$ MISSING:captured sinkO2 = { "x": "" } def captureOut2(): - def m(): + def m(): #$ entry=sinkO2 sinkO2["x"] = SOURCE m() captureOut2() SINK(sinkO2["x"]) #$ MISSING:captured nonSink0 = { "x": "" } - def captureOut1NotCalled(): + def captureOut1NotCalled(): #$ entry=nonSink0 nonSink0["x"] = SOURCE SINK_F(nonSink0["x"]) def captureOut2NotCalled(): - def m(): + def m(): #$ entry=nonSink0 nonSink0["x"] = SOURCE captureOut2NotCalled() SINK_F(nonSink0["x"]) @@ -64,26 +64,26 @@ def test_out(): def through(tainted): sinkO1 = { "x": "" } - def captureOut1(): + def captureOut1(): #$ entry=tainted entry=sinkO1 sinkO1["x"] = tainted captureOut1() SINK(sinkO1["x"]) #$ MISSING:captured sinkO2 = { "x": "" } - def captureOut2(): - def m(): + def captureOut2(): #$ MISSING:entry=tainted entry=sinkO2 + def m(): #$ entry=tainted entry=sinkO2 sinkO2["x"] = tainted m() captureOut2() SINK(sinkO2["x"]) #$ MISSING:captured nonSink0 = { "x": "" } - def captureOut1NotCalled(): + def captureOut1NotCalled(): #$ entry=tainted entry=nonSink0 nonSink0["x"] = tainted SINK_F(nonSink0["x"]) - def captureOut2NotCalled(): - def m(): + def captureOut2NotCalled(): #$ MISSING:entry=tainted entry=nonSink0 + def m(): #$ entry=tainted entry=nonSink0 nonSink0["x"] = tainted captureOut2NotCalled() SINK_F(nonSink0["x"]) diff --git a/python/ql/test/experimental/dataflow/variable-capture/in.py b/python/ql/test/experimental/dataflow/variable-capture/in.py index dfa666fae5d3..badf2922213c 100644 --- a/python/ql/test/experimental/dataflow/variable-capture/in.py +++ b/python/ql/test/experimental/dataflow/variable-capture/in.py @@ -32,27 +32,27 @@ def SINK_F(x): # Capture the parameter of an outer function. def inParam(tainted): - def captureIn1(): + def captureIn1(): #$ entry=tainted sinkI1 = tainted SINK(sinkI1) #$ MISSING:captured captureIn1() - def captureIn2(): - def m(): + def captureIn2(): #$ MISSING:entry=tainted + def m(): #$ entry=tainted sinkI2 = tainted SINK(sinkI2) #$ MISSING:captured m() captureIn2() - captureIn3 = lambda arg: SINK(tainted) + captureIn3 = lambda arg: SINK(tainted) #$ entry=tainted captureIn3("") - def captureIn1NotCalled(): + def captureIn1NotCalled(): #$ entry=tainted nonSink0 = tainted SINK_F(nonSink0) - def captureIn2NotCalled(): - def m(): + def captureIn2NotCalled(): #$ MISSING:entry=tainted + def m(): #$ entry=tainted nonSink0 = tainted SINK_F(nonSink0) captureIn2NotCalled() @@ -65,27 +65,27 @@ def test_inParam(): def inLocal(): tainted = SOURCE - def captureIn1(): + def captureIn1(): #$ entry=tainted sinkI1 = tainted SINK(sinkI1) #$ MISSING:captured captureIn1() - def captureIn2(): - def m(): + def captureIn2(): #$ MISSING:entry=tainted + def m(): #$ entry=tainted sinkI2 = tainted SINK(sinkI2) #$ MISSING:captured m() captureIn2() - captureIn3 = lambda arg: SINK(tainted) + captureIn3 = lambda arg: SINK(tainted) #$ entry=tainted captureIn3("") - def captureIn1NotCalled(): + def captureIn1NotCalled(): #$ entry=tainted nonSink0 = tainted SINK_F(nonSink0) - def captureIn2NotCalled(): - def m(): + def captureIn2NotCalled(): #$ MISSING:entry=tainted + def m(): #$ entry=tainted nonSink0 = tainted SINK_F(nonSink0) captureIn2NotCalled() diff --git a/python/ql/test/experimental/dataflow/variable-capture/nonlocal.py b/python/ql/test/experimental/dataflow/variable-capture/nonlocal.py index bd8b7df86ee2..de7d95b3774d 100644 --- a/python/ql/test/experimental/dataflow/variable-capture/nonlocal.py +++ b/python/ql/test/experimental/dataflow/variable-capture/nonlocal.py @@ -34,15 +34,15 @@ def SINK_F(x): def out(): sinkO1 = "" - def captureOut1(): + def captureOut1(): #$ MISSING:entry=sink01 nonlocal sinkO1 sinkO1 = SOURCE captureOut1() SINK(sinkO1) #$ MISSING:captured sinkO2 = "" - def captureOut2(): - def m(): + def captureOut2(): #$ MISSING:entry=sink02 + def m(): #$ MISSING:entry=sink02 nonlocal sinkO2 sinkO2 = SOURCE m() @@ -50,13 +50,13 @@ def m(): SINK(sinkO2) #$ MISSING:captured nonSink0 = "" - def captureOut1NotCalled(): + def captureOut1NotCalled(): #$ MISSING:entry=nonSink0 nonlocal nonSink0 nonSink0 = SOURCE SINK_F(nonSink0) - def captureOut2NotCalled(): - def m(): + def captureOut2NotCalled(): #$ MISSING:entry=nonSink0 + def m(): #$ MISSING:entry=nonSink0 nonlocal nonSink0 nonSink0 = SOURCE captureOut2NotCalled() @@ -68,15 +68,15 @@ def test_out(): def through(tainted): sinkO1 = "" - def captureOut1(): + def captureOut1(): #$ entry=tainted MISSING:entry=sink01 nonlocal sinkO1 sinkO1 = tainted captureOut1() SINK(sinkO1) #$ MISSING:captured sinkO2 = "" - def captureOut2(): - def m(): + def captureOut2(): #$ MISSING:entry=tainted entry=sink02 + def m(): #$ entry=tainted MISSING:entry=sink02 nonlocal sinkO2 sinkO2 = tainted m() @@ -84,13 +84,13 @@ def m(): SINK(sinkO2) #$ MISSING:captured nonSink0 = "" - def captureOut1NotCalled(): + def captureOut1NotCalled(): #$ entry=tainted MISSING:entry=nonSink0 nonlocal nonSink0 nonSink0 = tainted SINK_F(nonSink0) - def captureOut2NotCalled(): - def m(): + def captureOut2NotCalled(): #$ MISSING:entry=tainted entry=nonSink0 + def m(): #$ entry=tainted MISSING:entry=nonSink0 nonlocal nonSink0 nonSink0 = tainted captureOut2NotCalled() From 9019af27016f410db85825245d21fe93e817d279 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 17 Jan 2023 11:25:05 +0100 Subject: [PATCH 2/3] python: extra test --- python/ql/test/experimental/dataflow/variable-capture/in.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/ql/test/experimental/dataflow/variable-capture/in.py b/python/ql/test/experimental/dataflow/variable-capture/in.py index badf2922213c..0d3c2a665cb7 100644 --- a/python/ql/test/experimental/dataflow/variable-capture/in.py +++ b/python/ql/test/experimental/dataflow/variable-capture/in.py @@ -37,6 +37,12 @@ def captureIn1(): #$ entry=tainted SINK(sinkI1) #$ MISSING:captured captureIn1() + def captureIn1a(): #$ entry=tainted + sinkI1 = tainted + SINK(sinkI1) #$ MISSING:captured + a = captureIn1a + a() + def captureIn2(): #$ MISSING:entry=tainted def m(): #$ entry=tainted sinkI2 = tainted From cb9ed5c8899804e8ce3e9919be41df995584e691 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 17 Jan 2023 12:06:10 +0100 Subject: [PATCH 3/3] python: hacky PoC for captured parameters - we added parameters to capturing function definitions. Capturing is detected via `ScopeEntryDefinitions`. These only exist for the inner-most nested function (see tests) We should make these exist at all levels to thread the values through. Right now, we cannot reach further nested functions (see `m` in tests). - parameters currently need to be `NameNode`s, so we select the first mention inside the function body. That also only exists in the inner-most nested function (if that is the only one to mention hte captured variable) so this should be replaced with a synthetic node. - we needed to extend `ParameterNodeImpl` and implement `isParameterOf` to make the data flow library connect them. - we added arguments to calls to capturing functions (referencing the call graph) - These should be current value of the variable at the call site This is currently approximated by the latest use before the call site which is further approximated by any variable of the same name in the same basic block as the call The right thing to do is to make the call a useand let SSA handle it. - every captured variable has been given position -3 this should be based on the variable name instead. That will be easy once we rebase on the new call graph PR, as it has genralised positions. --- .../new/internal/DataFlowDispatchPointsTo.qll | 48 ++++++++++++++++++- .../dataflow/new/internal/DataFlowPrivate.qll | 6 +++ .../dataflow/new/internal/DataFlowPublic.qll | 5 +- .../dataflow/variable-capture/in.py | 4 +- 4 files changed, 59 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll index 0efae6ae45cd..95ec8b412ace 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll @@ -19,7 +19,7 @@ class ParameterPosition extends int { /** An argument position represented by an integer. */ class ArgumentPosition extends int { - ArgumentPosition() { this in [-2, -1] or exists(any(Call c).getArg(this)) } + ArgumentPosition() { this in [-3, -2, -1] or exists(any(Call c).getArg(this)) } /** Holds if this position represents a positional argument at position `pos`. */ predicate isPositional(int pos) { this = pos } // with the current representation, all arguments are positional @@ -109,6 +109,8 @@ module ArgumentPassing { ) } + private import semmle.python.essa.SsaCompute + /** * Gets the `n`th parameter of `callable`. * If the callable has a starred parameter, say `*tuple`, that is matched with `n=-1`. @@ -133,6 +135,15 @@ module ArgumentPassing { n = -2 and result = f.getKwarg().getAFlowNode() ) + or + // captured variable + exists(string name, ScopeEntryDefinition def | + captures_variable(callable, name, def) and + // TODO: handle more than one captured variable + n = -3 and + result.getId() = name and + AdjacentUses::firstUse(def.(EssaVariable).getDefinition(), result) + ) } /** @@ -220,6 +231,16 @@ module ArgumentPassing { call_unpacks(call, mapping, callable, name, paramN) and result = TKwUnpackedNode(call, callable, name) ) + or + // captured variable + exists(string name, ScopeEntryDefinition def, NameNode var | + captures_variable(callable, name, def) and + // TODO: handle more than one captured variable + paramN = -3 and + var.getId() = name and + result = TCfgNode(var) and + var.getBasicBlock() = call.getBasicBlock() + ) ) } @@ -271,6 +292,13 @@ module ArgumentPassing { exists(call.getNode().getKwargs()) // dict argument available ) } + + predicate captures_variable(CallableValue callable, string name, ScopeEntryDefinition def) { + def.(EssaVariable).getSourceVariable().getName() = name and + def.getScope() = callable.getScope() and + not def instanceof GlobalSsaVariable and + not def.(EssaVariable).isMetaVariable() + } } import ArgumentPassing @@ -686,6 +714,24 @@ abstract class ParameterNodeImpl extends Node { abstract predicate isParameterOf(DataFlowCallable c, int i); } +class CapturedParameterNode extends ParameterNodeImpl, TCapturedParameterNode { + CallableValue callable; + string name; + ScopeEntryDefinition def; + + CapturedParameterNode() { this = TCapturedParameterNode(callable, name, def) } + + override Parameter getParameter() { none() } + + override predicate isParameterOf(DataFlowCallable c, int i) { + c = TCallableValue(callable) and i = -3 + } + + override DataFlowCallable getEnclosingCallable() { result = TCallableValue(callable) } + + override string toString() { result = "captured parameter " + name + " of " + callable } +} + /** A parameter for a library callable with a flow summary. */ class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode { private FlowSummaryImpl::Public::SummarizedCallable sc; diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 016f748a79a9..2b78dfe624dd 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -281,6 +281,12 @@ module EssaFlow { nodeTo = TKwOverflowNode(call, callable) and nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode() ) + or + // captured variable + exists(CallableValue callable, string name, ScopeEntryDefinition def | + nodeFrom = TCapturedParameterNode(callable, name, def) and + nodeTo.asVar() = def + ) } predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) { diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 4a00d0aafc34..2aaf63a08c85 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -109,9 +109,12 @@ newtype TNode = } or TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) { FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos) + } or + TCapturedParameterNode(CallableValue callable, string name, ScopeEntryDefinition def) { + captures_variable(callable, name, def) } -class TParameterNode = TCfgNode or TSummaryParameterNode; +class TParameterNode = TCfgNode or TSummaryParameterNode or TCapturedParameterNode; /** Helper for `Node::getEnclosingCallable`. */ private DataFlowCallable getCallableScope(Scope s) { diff --git a/python/ql/test/experimental/dataflow/variable-capture/in.py b/python/ql/test/experimental/dataflow/variable-capture/in.py index 0d3c2a665cb7..1ca52afb81de 100644 --- a/python/ql/test/experimental/dataflow/variable-capture/in.py +++ b/python/ql/test/experimental/dataflow/variable-capture/in.py @@ -34,12 +34,12 @@ def SINK_F(x): def inParam(tainted): def captureIn1(): #$ entry=tainted sinkI1 = tainted - SINK(sinkI1) #$ MISSING:captured + SINK(sinkI1) #$ captured captureIn1() def captureIn1a(): #$ entry=tainted sinkI1 = tainted - SINK(sinkI1) #$ MISSING:captured + SINK(sinkI1) #$ captured a = captureIn1a a()