diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll index f7b5fc45f09e..3b32d95ae0be 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll @@ -1,6 +1,7 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate +private import FlowSummaryImpl as FlowSummaryImpl private import semmle.python.dataflow.new.internal.TaintTrackingPublic private import semmle.python.ApiGraphs @@ -55,6 +56,8 @@ private module Cached { awaitStep(nodeFrom, nodeTo) or asyncWithStep(nodeFrom, nodeTo) + or + FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false) } } @@ -159,7 +162,7 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT * is currently very imprecise, as an example, since we model `dict.get`, we treat any * `.get()` will be tainted, whether it's true or not. */ -predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) { +predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { // construction by literal // // TODO: once we have proper flow-summary modeling, we might not need this step any @@ -181,13 +184,6 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) { // don't provide that right now. DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo) or - // constructor call - exists(DataFlow::CallCfgNode call | call = nodeTo | - call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and - call.getArg(0) = nodeFrom - // TODO: Properly handle defaultdict/namedtuple - ) - or // functions operating on collections exists(DataFlow::CallCfgNode call | call = nodeTo | call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 7e62a5b033e1..cd5ec31945ee 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3790,6 +3790,138 @@ private module StdlibPrivate { override DataFlow::Node getAPathArgument() { result = this.getAnInput() } } + // --------------------------------------------------------------------------- + // Flow summaries for functions contructing containers + // --------------------------------------------------------------------------- + /** A flow summary for `dict`. */ + class DictSummary extends SummarizedCallable { + DictSummary() { this = "builtins.dict" } + + override DataFlow::CallCfgNode getACall() { result = API::builtin("dict").getACall() } + + override DataFlow::ArgumentNode getACallback() { + result = API::builtin("dict").getAValueReachableFromSource() + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | + input = "Argument[0].DictionaryElement[" + key + "]" and + output = "ReturnValue.DictionaryElement[" + key + "]" and + preservesValue = true + ) + or + exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | + input = "Argument[" + key + ":]" and + output = "ReturnValue.DictionaryElement[" + key + "]" and + preservesValue = true + ) + or + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** A flow summary for `list`. */ + class ListSummary extends SummarizedCallable { + ListSummary() { this = "builtins.list" } + + override DataFlow::CallCfgNode getACall() { result = API::builtin("list").getACall() } + + override DataFlow::ArgumentNode getACallback() { + result = API::builtin("list").getAValueReachableFromSource() + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + ( + input = "Argument[0].ListElement" + or + input = "Argument[0].SetElement" + or + exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | + input = "Argument[0].TupleElement[" + i.toString() + "]" + ) + // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent + ) and + output = "ReturnValue.ListElement" and + preservesValue = true + or + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** A flow summary for tuple */ + class TupleSummary extends SummarizedCallable { + TupleSummary() { this = "builtins.tuple" } + + override DataFlow::CallCfgNode getACall() { result = API::builtin("tuple").getACall() } + + override DataFlow::ArgumentNode getACallback() { + result = API::builtin("tuple").getAValueReachableFromSource() + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | + input = "Argument[0].TupleElement[" + i.toString() + "]" and + output = "ReturnValue.TupleElement[" + i.toString() + "]" and + preservesValue = true + ) + or + // TODO: We need to also translate iterable content such as list element + // but we currently lack TupleElementAny + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** A flow summary for set */ + class SetSummary extends SummarizedCallable { + SetSummary() { this = "builtins.set" } + + override DataFlow::CallCfgNode getACall() { result = API::builtin("set").getACall() } + + override DataFlow::ArgumentNode getACallback() { + result = API::builtin("set").getAValueReachableFromSource() + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + ( + input = "Argument[0].ListElement" + or + input = "Argument[0].SetElement" + or + exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | + input = "Argument[0].TupleElement[" + i.toString() + "]" + ) + // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent + ) and + output = "ReturnValue.SetElement" and + preservesValue = true + or + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** A flow summary for frozenset */ + class FrozensetSummary extends SummarizedCallable { + FrozensetSummary() { this = "builtins.frozenset" } + + override DataFlow::CallCfgNode getACall() { result = API::builtin("frozenset").getACall() } + + override DataFlow::ArgumentNode getACallback() { + result = API::builtin("frozenset").getAValueReachableFromSource() + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + any(SetSummary s).propagatesFlowExt(input, output, preservesValue) + } + } + /** A flow summary for `reversed`. */ class ReversedSummary extends SummarizedCallable { ReversedSummary() { this = "builtins.reversed" } diff --git a/python/ql/test/experimental/dataflow/basic/callGraph.ql b/python/ql/test/experimental/dataflow/basic/callGraph.ql index 2e8d6956c705..d83da4c5c110 100644 --- a/python/ql/test/experimental/dataflow/basic/callGraph.ql +++ b/python/ql/test/experimental/dataflow/basic/callGraph.ql @@ -1,5 +1,8 @@ import experimental.dataflow.callGraphConfig from DataFlow::Node source, DataFlow::Node sink -where exists(CallGraphConfig cfg | cfg.hasFlow(source, sink)) +where + exists(CallGraphConfig cfg | cfg.hasFlow(source, sink)) and + exists(source.getLocation().getFile().getRelativePath()) and + exists(sink.getLocation().getFile().getRelativePath()) select source, sink diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected index ef35d8f50397..e4b8f905530b 100644 --- a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected +++ b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected @@ -1,5 +1,3 @@ -| file://:0:0:0:0 | parameter position 0 of builtins.reversed | -| file://:0:0:0:0 | parameter position 1 of dict.setdefault | | test.py:1:1:1:21 | SynthDictSplatParameterNode | | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSinks.ql b/python/ql/test/experimental/dataflow/basic/callGraphSinks.ql index 7d15b353274d..8b9f57a04627 100644 --- a/python/ql/test/experimental/dataflow/basic/callGraphSinks.ql +++ b/python/ql/test/experimental/dataflow/basic/callGraphSinks.ql @@ -1,5 +1,7 @@ import experimental.dataflow.callGraphConfig from DataFlow::Node sink -where exists(CallGraphConfig cfg | cfg.isSink(sink)) +where + exists(CallGraphConfig cfg | cfg.isSink(sink)) and + exists(sink.getLocation().getFile().getRelativePath()) select sink diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSources.expected b/python/ql/test/experimental/dataflow/basic/callGraphSources.expected index 74d546c5f2ba..4023ba8f3ea1 100644 --- a/python/ql/test/experimental/dataflow/basic/callGraphSources.expected +++ b/python/ql/test/experimental/dataflow/basic/callGraphSources.expected @@ -1,4 +1,2 @@ -| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | -| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:7:19:7:19 | ControlFlowNode for a | diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSources.ql b/python/ql/test/experimental/dataflow/basic/callGraphSources.ql index 21c3a5a9acea..e482a07cf396 100644 --- a/python/ql/test/experimental/dataflow/basic/callGraphSources.ql +++ b/python/ql/test/experimental/dataflow/basic/callGraphSources.ql @@ -1,5 +1,7 @@ import experimental.dataflow.callGraphConfig from DataFlow::Node source -where exists(CallGraphConfig cfg | cfg.isSource(source)) +where + exists(CallGraphConfig cfg | cfg.isSource(source)) and + exists(source.getLocation().getFile().getRelativePath()) select source diff --git a/python/ql/test/experimental/dataflow/basic/global.expected b/python/ql/test/experimental/dataflow/basic/global.expected index 11696c173356..8894bcc190a0 100644 --- a/python/ql/test/experimental/dataflow/basic/global.expected +++ b/python/ql/test/experimental/dataflow/basic/global.expected @@ -1,5 +1,3 @@ -| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | -| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | | test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/experimental/dataflow/basic/global.ql b/python/ql/test/experimental/dataflow/basic/global.ql index ba9a302b05bd..ecab29899e3e 100644 --- a/python/ql/test/experimental/dataflow/basic/global.ql +++ b/python/ql/test/experimental/dataflow/basic/global.ql @@ -3,5 +3,7 @@ import allFlowsConfig from DataFlow::Node source, DataFlow::Node sink where source != sink and - exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink)) + exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink)) and + exists(source.getLocation().getFile().getRelativePath()) and + exists(sink.getLocation().getFile().getRelativePath()) select source, sink diff --git a/python/ql/test/experimental/dataflow/basic/globalStep.expected b/python/ql/test/experimental/dataflow/basic/globalStep.expected index b11ee6fe2494..9f228998b9c4 100644 --- a/python/ql/test/experimental/dataflow/basic/globalStep.expected +++ b/python/ql/test/experimental/dataflow/basic/globalStep.expected @@ -1,5 +1,3 @@ -| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | -| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/experimental/dataflow/basic/globalStep.ql b/python/ql/test/experimental/dataflow/basic/globalStep.ql index 18014b2cc5f2..9b19749b2d8d 100644 --- a/python/ql/test/experimental/dataflow/basic/globalStep.ql +++ b/python/ql/test/experimental/dataflow/basic/globalStep.ql @@ -1,5 +1,8 @@ import allFlowsConfig from DataFlow::PathNode fromNode, DataFlow::PathNode toNode -where toNode = fromNode.getASuccessor() +where + toNode = fromNode.getASuccessor() and + exists(fromNode.getNode().getLocation().getFile().getRelativePath()) and + exists(toNode.getNode().getLocation().getFile().getRelativePath()) select fromNode, toNode diff --git a/python/ql/test/experimental/dataflow/basic/local.expected b/python/ql/test/experimental/dataflow/basic/local.expected index 18497a00a606..cdf40018ed0b 100644 --- a/python/ql/test/experimental/dataflow/basic/local.expected +++ b/python/ql/test/experimental/dataflow/basic/local.expected @@ -1,11 +1,3 @@ -| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | -| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | -| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | -| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | -| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | -| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed | -| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | -| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | parameter position 1 of dict.setdefault | | test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ | | test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ | | test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b | diff --git a/python/ql/test/experimental/dataflow/basic/local.ql b/python/ql/test/experimental/dataflow/basic/local.ql index 5541cea33dc0..e13013d22108 100644 --- a/python/ql/test/experimental/dataflow/basic/local.ql +++ b/python/ql/test/experimental/dataflow/basic/local.ql @@ -1,5 +1,8 @@ import semmle.python.dataflow.new.DataFlow from DataFlow::Node fromNode, DataFlow::Node toNode -where DataFlow::localFlow(fromNode, toNode) +where + DataFlow::localFlow(fromNode, toNode) and + exists(fromNode.getLocation().getFile().getRelativePath()) and + exists(toNode.getLocation().getFile().getRelativePath()) select fromNode, toNode diff --git a/python/ql/test/experimental/dataflow/basic/localStep.expected b/python/ql/test/experimental/dataflow/basic/localStep.expected index d05e8aa3a42b..e147bb9f4fc2 100644 --- a/python/ql/test/experimental/dataflow/basic/localStep.expected +++ b/python/ql/test/experimental/dataflow/basic/localStep.expected @@ -1,5 +1,3 @@ -| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | -| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id | | test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x | diff --git a/python/ql/test/experimental/dataflow/basic/localStep.ql b/python/ql/test/experimental/dataflow/basic/localStep.ql index 4e32450f4960..37b6bd1a7af8 100644 --- a/python/ql/test/experimental/dataflow/basic/localStep.ql +++ b/python/ql/test/experimental/dataflow/basic/localStep.ql @@ -1,5 +1,8 @@ import semmle.python.dataflow.new.DataFlow from DataFlow::Node fromNode, DataFlow::Node toNode -where DataFlow::localFlowStep(fromNode, toNode) +where + DataFlow::localFlowStep(fromNode, toNode) and + exists(fromNode.getLocation().getFile().getRelativePath()) and + exists(toNode.getLocation().getFile().getRelativePath()) select fromNode, toNode diff --git a/python/ql/test/experimental/dataflow/basic/maximalFlows.expected b/python/ql/test/experimental/dataflow/basic/maximalFlows.expected index b65b4b4d30a8..b6f8a1730f11 100644 --- a/python/ql/test/experimental/dataflow/basic/maximalFlows.expected +++ b/python/ql/test/experimental/dataflow/basic/maximalFlows.expected @@ -1,4 +1,3 @@ -| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | GSSA Variable b | | test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z | diff --git a/python/ql/test/experimental/dataflow/basic/maximalFlows.ql b/python/ql/test/experimental/dataflow/basic/maximalFlows.ql index ddd673954b99..a314cfca6123 100644 --- a/python/ql/test/experimental/dataflow/basic/maximalFlows.ql +++ b/python/ql/test/experimental/dataflow/basic/maximalFlows.ql @@ -3,5 +3,7 @@ import maximalFlowsConfig from DataFlow::Node source, DataFlow::Node sink where source != sink and - exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink)) + exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink)) and + exists(source.getLocation().getFile().getRelativePath()) and + exists(sink.getLocation().getFile().getRelativePath()) select source, sink diff --git a/python/ql/test/experimental/dataflow/basic/sinks.expected b/python/ql/test/experimental/dataflow/basic/sinks.expected index 1e516e323366..944f8190aa51 100644 --- a/python/ql/test/experimental/dataflow/basic/sinks.expected +++ b/python/ql/test/experimental/dataflow/basic/sinks.expected @@ -1,9 +1,3 @@ -| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | -| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | -| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | -| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | -| file://:0:0:0:0 | parameter position 0 of builtins.reversed | -| file://:0:0:0:0 | parameter position 1 of dict.setdefault | | test.py:0:0:0:0 | GSSA Variable __name__ | | test.py:0:0:0:0 | GSSA Variable __package__ | | test.py:0:0:0:0 | GSSA Variable b | diff --git a/python/ql/test/experimental/dataflow/basic/sinks.ql b/python/ql/test/experimental/dataflow/basic/sinks.ql index 8560bb99d3da..f17ea9b9b1bd 100644 --- a/python/ql/test/experimental/dataflow/basic/sinks.ql +++ b/python/ql/test/experimental/dataflow/basic/sinks.ql @@ -1,5 +1,7 @@ import allFlowsConfig from DataFlow::Node sink -where exists(AllFlowsConfig cfg | cfg.isSink(sink)) +where + exists(AllFlowsConfig cfg | cfg.isSink(sink)) and + exists(sink.getLocation().getFile().getRelativePath()) select sink diff --git a/python/ql/test/experimental/dataflow/basic/sources.expected b/python/ql/test/experimental/dataflow/basic/sources.expected index 1e516e323366..944f8190aa51 100644 --- a/python/ql/test/experimental/dataflow/basic/sources.expected +++ b/python/ql/test/experimental/dataflow/basic/sources.expected @@ -1,9 +1,3 @@ -| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | -| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | -| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | -| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | -| file://:0:0:0:0 | parameter position 0 of builtins.reversed | -| file://:0:0:0:0 | parameter position 1 of dict.setdefault | | test.py:0:0:0:0 | GSSA Variable __name__ | | test.py:0:0:0:0 | GSSA Variable __package__ | | test.py:0:0:0:0 | GSSA Variable b | diff --git a/python/ql/test/experimental/dataflow/basic/sources.ql b/python/ql/test/experimental/dataflow/basic/sources.ql index d079d4db5962..198882ceee60 100644 --- a/python/ql/test/experimental/dataflow/basic/sources.ql +++ b/python/ql/test/experimental/dataflow/basic/sources.ql @@ -1,5 +1,7 @@ import allFlowsConfig from DataFlow::Node source -where exists(AllFlowsConfig cfg | cfg.isSource(source)) +where + exists(AllFlowsConfig cfg | cfg.isSource(source)) and + exists(source.getLocation().getFile().getRelativePath()) select source diff --git a/python/ql/test/experimental/dataflow/coverage/test_builtins.py b/python/ql/test/experimental/dataflow/coverage/test_builtins.py index 5d9d92ffcb8f..4a3ee95fedf7 100644 --- a/python/ql/test/experimental/dataflow/coverage/test_builtins.py +++ b/python/ql/test/experimental/dataflow/coverage/test_builtins.py @@ -41,8 +41,8 @@ def SINK_F(x): def test_list_from_list(): l1 = [SOURCE, NONSOURCE] l2 = list(l1) - SINK(l2[0]) #$ MISSING: flow="SOURCE, l:-2 -> l2[0]" - SINK_F(l2[1]) # expecting FP due to imprecise flow + SINK(l2[0]) #$ flow="SOURCE, l:-2 -> l2[0]" + SINK_F(l2[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l2[1]" # -- skip list_from_string @@ -50,13 +50,13 @@ def test_list_from_list(): def test_list_from_tuple(): t = (SOURCE, NONSOURCE) l = list(t) - SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]" - SINK_F(l[1]) # expecting FP due to imprecise flow + SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]" + SINK_F(l[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[1]" def test_list_from_set(): s = {SOURCE} l = list(s) - SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]" + SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]" @expects(2) def test_list_from_dict(): @@ -78,7 +78,7 @@ def test_tuple_from_list(): def test_tuple_from_tuple(): t0 = (SOURCE, NONSOURCE) t = tuple(t0) - SINK(t[0]) #$ MISSING: flow="SOURCE, l:-2 -> t[0]" + SINK(t[0]) #$ flow="SOURCE, l:-2 -> t[0]" SINK_F(t[1]) def test_tuple_from_set(): @@ -100,19 +100,19 @@ def test_set_from_list(): l = [SOURCE] s = set(l) v = s.pop() - SINK(v) #$ MISSING: flow="SOURCE, l:-3 -> v" + SINK(v) #$ flow="SOURCE, l:-3 -> v" def test_set_from_tuple(): t = (SOURCE,) s = set(t) v = s.pop() - SINK(v) #$ MISSING: flow="SOURCE, l:-3 -> v" + SINK(v) #$ flow="SOURCE, l:-3 -> v" def test_set_from_set(): s0 = {SOURCE} s = set(s0) v = s.pop() - SINK(v) #$ MISSING: flow="SOURCE, l:-3 -> v" + SINK(v) #$ flow="SOURCE, l:-3 -> v" def test_set_from_dict(): d = {SOURCE: "val"} @@ -126,7 +126,7 @@ def test_set_from_dict(): @expects(2) def test_dict_from_keyword(): d = dict(k = SOURCE, k1 = NONSOURCE) - SINK(d["k"]) #$ MISSING: flow="SOURCE, l:-1 -> d[k]" + SINK(d["k"]) #$ flow="SOURCE, l:-1 -> d['k']" SINK_F(d["k1"]) @expects(2) @@ -139,7 +139,7 @@ def test_dict_from_list(): def test_dict_from_dict(): d1 = {'k': SOURCE, 'k1': NONSOURCE} d2 = dict(d1) - SINK(d2["k"]) #$ MISSING: flow="SOURCE, l:-2 -> d[k]" + SINK(d2["k"]) #$ flow="SOURCE, l:-2 -> d2['k']" SINK_F(d2["k1"]) ## Container methods @@ -278,8 +278,8 @@ def test_reversed_list(): l0 = [SOURCE, NONSOURCE] r = reversed(l0) l = list(r) - SINK_F(l[0]) - SINK(l[1]) #$ MISSING: flow="SOURCE, l:-4 -> l[1]" + SINK_F(l[0]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[0]" + SINK(l[1]) #$ flow="SOURCE, l:-4 -> l[1]" @expects(2) def test_reversed_tuple(): diff --git a/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected b/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected index ef6f6a2929b9..3b3f18c5b9e8 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected +++ b/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected @@ -1,5 +1,3 @@ -| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | -| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | | test.py:3:1:3:7 | GSSA Variable tainted | test.py:4:6:4:12 | ControlFlowNode for tainted | | test.py:3:11:3:16 | ControlFlowNode for SOURCE | test.py:3:1:3:7 | GSSA Variable tainted | | test.py:6:1:6:11 | ControlFlowNode for FunctionExpr | test.py:6:5:6:8 | GSSA Variable func | diff --git a/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.ql b/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.ql index efafa7fec9f3..acbb2bea137d 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.ql +++ b/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.ql @@ -3,5 +3,8 @@ import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.DataFlow from DataFlow::Node nodeFrom, DataFlow::Node nodeTo -where TaintTracking::localTaintStep(nodeFrom, nodeTo) +where + TaintTracking::localTaintStep(nodeFrom, nodeTo) and + exists(nodeFrom.getLocation().getFile().getRelativePath()) and + exists(nodeTo.getLocation().getFile().getRelativePath()) select nodeFrom, nodeTo diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py index df30a75c3e31..50f9a613f9bc 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py +++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py @@ -38,8 +38,8 @@ def test_construction(): set(tainted_list), # $ tainted frozenset(tainted_list), # $ tainted dict(tainted_dict), # $ tainted - dict(k = tainted_string)["k"], # $ MISSING: tainted - dict(dict(k = tainted_string))["k"], # $ MISSING: tainted + dict(k = tainted_string)["k"], # $ tainted + dict(dict(k = tainted_string))["k"], # $ tainted dict(["k", tainted_string]), # $ tainted )