Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
private import semmle.python.ApiGraphs

Expand Down Expand Up @@ -55,6 +56,8 @@ private module Cached {
awaitStep(nodeFrom, nodeTo)
or
asyncWithStep(nodeFrom, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
Comment on lines +59 to +60
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume this is also done by other languages that utilize flow-summaries?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and in hindsight, it should have been added with the summary work; I think I just overlooked it back then..

}
}

Expand Down Expand Up @@ -159,7 +162,7 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
*/
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: I don't think this change was required, but it doesn't matter 🤷

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact it matters greatly, since some flow steps will now involve nodes generated by flow summaries. It may not be visible until more summaries are added and we look at load and store steps, but in the branch with all the summaries, it makes a huge difference :-)

// construction by literal
//
// TODO: once we have proper flow-summary modeling, we might not need this step any
Expand All @@ -181,13 +184,6 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
// don't provide that right now.
DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo)
or
// constructor call
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
call.getArg(0) = nodeFrom
// TODO: Properly handle defaultdict/namedtuple
)
or
// functions operating on collections
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and
Expand Down
132 changes: 132 additions & 0 deletions python/ql/lib/semmle/python/frameworks/Stdlib.qll
Original file line number Diff line number Diff line change
Expand Up @@ -3790,6 +3790,138 @@ private module StdlibPrivate {
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}

// ---------------------------------------------------------------------------
// Flow summaries for functions contructing containers
// ---------------------------------------------------------------------------
/** A flow summary for `dict`. */
class DictSummary extends SummarizedCallable {
DictSummary() { this = "builtins.dict" }

override DataFlow::CallCfgNode getACall() { result = API::builtin("dict").getACall() }

override DataFlow::ArgumentNode getACallback() {
result = API::builtin("dict").getAValueReachableFromSource()
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[0].DictionaryElement[" + key + "]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
or
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[" + key + ":]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}

/** A flow summary for `list`. */
class ListSummary extends SummarizedCallable {
ListSummary() { this = "builtins.list" }

override DataFlow::CallCfgNode getACall() { result = API::builtin("list").getACall() }

override DataFlow::ArgumentNode getACallback() {
result = API::builtin("list").getAValueReachableFromSource()
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
preservesValue = true
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}

/** A flow summary for tuple */
class TupleSummary extends SummarizedCallable {
TupleSummary() { this = "builtins.tuple" }

override DataFlow::CallCfgNode getACall() { result = API::builtin("tuple").getACall() }

override DataFlow::ArgumentNode getACallback() {
result = API::builtin("tuple").getAValueReachableFromSource()
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]" and
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
preservesValue = true
)
or
// TODO: We need to also translate iterable content such as list element
// but we currently lack TupleElementAny
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}

/** A flow summary for set */
class SetSummary extends SummarizedCallable {
SetSummary() { this = "builtins.set" }

override DataFlow::CallCfgNode getACall() { result = API::builtin("set").getACall() }

override DataFlow::ArgumentNode getACallback() {
result = API::builtin("set").getAValueReachableFromSource()
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.SetElement" and
preservesValue = true
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}

/** A flow summary for frozenset */
class FrozensetSummary extends SummarizedCallable {
FrozensetSummary() { this = "builtins.frozenset" }

override DataFlow::CallCfgNode getACall() { result = API::builtin("frozenset").getACall() }

override DataFlow::ArgumentNode getACallback() {
result = API::builtin("frozenset").getAValueReachableFromSource()
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
any(SetSummary s).propagatesFlowExt(input, output, preservesValue)
}
}

/** A flow summary for `reversed`. */
class ReversedSummary extends SummarizedCallable {
ReversedSummary() { this = "builtins.reversed" }
Expand Down
5 changes: 4 additions & 1 deletion python/ql/test/experimental/dataflow/basic/callGraph.ql
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import experimental.dataflow.callGraphConfig

from DataFlow::Node source, DataFlow::Node sink
where exists(CallGraphConfig cfg | cfg.hasFlow(source, sink))
where
exists(CallGraphConfig cfg | cfg.hasFlow(source, sink)) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import experimental.dataflow.callGraphConfig

from DataFlow::Node sink
where exists(CallGraphConfig cfg | cfg.isSink(sink))
where
exists(CallGraphConfig cfg | cfg.isSink(sink)) and
exists(sink.getLocation().getFile().getRelativePath())
select sink
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a |
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import experimental.dataflow.callGraphConfig

from DataFlow::Node source
where exists(CallGraphConfig cfg | cfg.isSource(source))
where
exists(CallGraphConfig cfg | cfg.isSource(source)) and
exists(source.getLocation().getFile().getRelativePath())
select source
2 changes: 0 additions & 2 deletions python/ql/test/experimental/dataflow/basic/global.expected
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
Expand Down
4 changes: 3 additions & 1 deletion python/ql/test/experimental/dataflow/basic/global.ql
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@ import allFlowsConfig
from DataFlow::Node source, DataFlow::Node sink
where
source != sink and
exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink))
exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink)) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
Expand Down
5 changes: 4 additions & 1 deletion python/ql/test/experimental/dataflow/basic/globalStep.ql
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import allFlowsConfig

from DataFlow::PathNode fromNode, DataFlow::PathNode toNode
where toNode = fromNode.getASuccessor()
where
toNode = fromNode.getASuccessor() and
exists(fromNode.getNode().getLocation().getFile().getRelativePath()) and
exists(toNode.getNode().getLocation().getFile().getRelativePath())
select fromNode, toNode
8 changes: 0 additions & 8 deletions python/ql/test/experimental/dataflow/basic/local.expected
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |
Expand Down
5 changes: 4 additions & 1 deletion python/ql/test/experimental/dataflow/basic/local.ql
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import semmle.python.dataflow.new.DataFlow

from DataFlow::Node fromNode, DataFlow::Node toNode
where DataFlow::localFlow(fromNode, toNode)
where
DataFlow::localFlow(fromNode, toNode) and
exists(fromNode.getLocation().getFile().getRelativePath()) and
exists(toNode.getLocation().getFile().getRelativePath())
select fromNode, toNode
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |
Expand Down
5 changes: 4 additions & 1 deletion python/ql/test/experimental/dataflow/basic/localStep.ql
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import semmle.python.dataflow.new.DataFlow

from DataFlow::Node fromNode, DataFlow::Node toNode
where DataFlow::localFlowStep(fromNode, toNode)
where
DataFlow::localFlowStep(fromNode, toNode) and
exists(fromNode.getLocation().getFile().getRelativePath()) and
exists(toNode.getLocation().getFile().getRelativePath())
select fromNode, toNode
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | GSSA Variable b |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@ import maximalFlowsConfig
from DataFlow::Node source, DataFlow::Node sink
where
source != sink and
exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink))
exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink)) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink
6 changes: 0 additions & 6 deletions python/ql/test/experimental/dataflow/basic/sinks.expected
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |
Expand Down
4 changes: 3 additions & 1 deletion python/ql/test/experimental/dataflow/basic/sinks.ql
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import allFlowsConfig

from DataFlow::Node sink
where exists(AllFlowsConfig cfg | cfg.isSink(sink))
where
exists(AllFlowsConfig cfg | cfg.isSink(sink)) and
exists(sink.getLocation().getFile().getRelativePath())
select sink
6 changes: 0 additions & 6 deletions python/ql/test/experimental/dataflow/basic/sources.expected
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |
Expand Down
4 changes: 3 additions & 1 deletion python/ql/test/experimental/dataflow/basic/sources.ql
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import allFlowsConfig

from DataFlow::Node source
where exists(AllFlowsConfig cfg | cfg.isSource(source))
where
exists(AllFlowsConfig cfg | cfg.isSource(source)) and
exists(source.getLocation().getFile().getRelativePath())
select source
Loading