-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Python: Container summaries, part 1 #13146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
private import python | ||
private import semmle.python.dataflow.new.DataFlow | ||
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate | ||
private import FlowSummaryImpl as FlowSummaryImpl | ||
private import semmle.python.dataflow.new.internal.TaintTrackingPublic | ||
private import semmle.python.ApiGraphs | ||
|
||
|
@@ -55,6 +56,8 @@ private module Cached { | |
awaitStep(nodeFrom, nodeTo) | ||
or | ||
asyncWithStep(nodeFrom, nodeTo) | ||
or | ||
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false) | ||
} | ||
} | ||
|
||
|
@@ -159,7 +162,7 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT | |
* is currently very imprecise, as an example, since we model `dict.get`, we treat any | ||
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not. | ||
*/ | ||
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) { | ||
predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: I don't think this change was required, but it doesn't matter 🤷 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact it matters greatly, since some flow steps will now involve nodes generated by flow summaries. It may not be visible until more summaries are added and we look at load and store steps, but in the branch with all the summaries, it makes a huge difference :-) |
||
// construction by literal | ||
// | ||
// TODO: once we have proper flow-summary modeling, we might not need this step any | ||
|
@@ -181,13 +184,6 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) { | |
// don't provide that right now. | ||
DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo) | ||
or | ||
// constructor call | ||
exists(DataFlow::CallCfgNode call | call = nodeTo | | ||
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and | ||
call.getArg(0) = nodeFrom | ||
// TODO: Properly handle defaultdict/namedtuple | ||
) | ||
or | ||
// functions operating on collections | ||
exists(DataFlow::CallCfgNode call | call = nodeTo | | ||
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,8 @@ | ||
import experimental.dataflow.callGraphConfig | ||
|
||
from DataFlow::Node source, DataFlow::Node sink | ||
where exists(CallGraphConfig cfg | cfg.hasFlow(source, sink)) | ||
where | ||
exists(CallGraphConfig cfg | cfg.hasFlow(source, sink)) and | ||
exists(source.getLocation().getFile().getRelativePath()) and | ||
exists(sink.getLocation().getFile().getRelativePath()) | ||
select source, sink |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,3 @@ | ||
| file://:0:0:0:0 | parameter position 0 of builtins.reversed | | ||
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | | ||
| test.py:1:1:1:21 | SynthDictSplatParameterNode | | ||
| test.py:1:19:1:19 | ControlFlowNode for x | | ||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
import experimental.dataflow.callGraphConfig | ||
|
||
from DataFlow::Node sink | ||
where exists(CallGraphConfig cfg | cfg.isSink(sink)) | ||
where | ||
exists(CallGraphConfig cfg | cfg.isSink(sink)) and | ||
exists(sink.getLocation().getFile().getRelativePath()) | ||
select sink |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,2 @@ | ||
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | | ||
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | | ||
| test.py:4:10:4:10 | ControlFlowNode for z | | ||
| test.py:7:19:7:19 | ControlFlowNode for a | |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
import experimental.dataflow.callGraphConfig | ||
|
||
from DataFlow::Node source | ||
where exists(CallGraphConfig cfg | cfg.isSource(source)) | ||
where | ||
exists(CallGraphConfig cfg | cfg.isSource(source)) and | ||
exists(source.getLocation().getFile().getRelativePath()) | ||
select source |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,8 @@ | ||
import allFlowsConfig | ||
|
||
from DataFlow::PathNode fromNode, DataFlow::PathNode toNode | ||
where toNode = fromNode.getASuccessor() | ||
where | ||
toNode = fromNode.getASuccessor() and | ||
exists(fromNode.getNode().getLocation().getFile().getRelativePath()) and | ||
exists(toNode.getNode().getLocation().getFile().getRelativePath()) | ||
select fromNode, toNode |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,8 @@ | ||
import semmle.python.dataflow.new.DataFlow | ||
|
||
from DataFlow::Node fromNode, DataFlow::Node toNode | ||
where DataFlow::localFlow(fromNode, toNode) | ||
where | ||
DataFlow::localFlow(fromNode, toNode) and | ||
exists(fromNode.getLocation().getFile().getRelativePath()) and | ||
exists(toNode.getLocation().getFile().getRelativePath()) | ||
select fromNode, toNode |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,8 @@ | ||
import semmle.python.dataflow.new.DataFlow | ||
|
||
from DataFlow::Node fromNode, DataFlow::Node toNode | ||
where DataFlow::localFlowStep(fromNode, toNode) | ||
where | ||
DataFlow::localFlowStep(fromNode, toNode) and | ||
exists(fromNode.getLocation().getFile().getRelativePath()) and | ||
exists(toNode.getLocation().getFile().getRelativePath()) | ||
select fromNode, toNode |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
import allFlowsConfig | ||
|
||
from DataFlow::Node sink | ||
where exists(AllFlowsConfig cfg | cfg.isSink(sink)) | ||
where | ||
exists(AllFlowsConfig cfg | cfg.isSink(sink)) and | ||
exists(sink.getLocation().getFile().getRelativePath()) | ||
select sink |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
import allFlowsConfig | ||
|
||
from DataFlow::Node source | ||
where exists(AllFlowsConfig cfg | cfg.isSource(source)) | ||
where | ||
exists(AllFlowsConfig cfg | cfg.isSource(source)) and | ||
exists(source.getLocation().getFile().getRelativePath()) | ||
select source |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume this is also done by other languages that utilize flow-summaries?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, and in hindsight, it should have been added with the summary work; I think I just overlooked it back then..