Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 41 additions & 11 deletions python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
private import python
private import DataFlowPublic
import semmle.python.SpecialMethods
private import semmle.python.essa.SsaCompute

//--------
// Data flow graph
Expand Down Expand Up @@ -31,7 +32,7 @@ class StorePreUpdateNode extends PreUpdateNode, CfgNode {
}
}

/** A node marking the state change of an object after a read */
/** A node marking the state change of an object after a read. */
class ReadPreUpdateNode extends PreUpdateNode, CfgNode {
ReadPreUpdateNode() {
exists(Attribute a |
Expand Down Expand Up @@ -97,12 +98,19 @@ module EssaFlow {
contextManager.strictlyDominates(var)
)
or
// Use
// First use after definition
// `y = 42`
// `x = f(y)`
// nodeFrom is `y` on first line, essa var
// nodeTo is `y` on second line, cfg node
nodeFrom.(EssaNode).getVar().getASourceUse() = nodeTo.(CfgNode).getNode()
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
or
// Next use after use
// `x = f(y)`
// `z = y + 1`
// nodeFrom is 'y' on first line, cfg node
// nodeTo is `y` on second line, cfg node
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
or
// Refinements
exists(EssaEdgeRefinement r |
Expand All @@ -120,6 +128,14 @@ module EssaFlow {
nodeFrom.(EssaNode).getVar() = p.getAnInput()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In these three cases, C# takes any of the last reads of the input variable as nodeFrom, and only if there are no reads do we take the SSA node. I believe this will currently not work

if (..)
  x = taint;
  clean(x);
else
  x = taint;
  clean(x)
sink(x)

because you will jump directly from both definitions of x to the call to sink.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even worse, something like this will (I believe) also not work:

x = ...
if (...)
   x.Foo = taint;
else
   x = ...
sink(x.Foo)

because there is not step from the x in x.Foo = taint to the phi node for x after the if-then-else.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yoff have you added the testcases from above somewhere, and checked how we handle them after use-use flow? 😊

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I think the latter case will work, as long as the store step in x.Foo = taint targets the refined SSA node for x. But if it instead targets the post-update node for x (as for Java and C#), the change is needed (and it will be needed for the first case anyway).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not yet (but very soon), the need is tracked here. I think we probably need to remove some essa-flow and let use-use do the work.

)
}

predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
AdjacentUses::adjacentUseUseSameVar(nodeFrom, nodeTo)
}

predicate defToFirstUse(EssaVariable var, NameNode nodeTo) {
AdjacentUses::firstUse(var.getDefinition(), nodeTo)
}
}

//--------
Expand All @@ -131,18 +147,25 @@ module EssaFlow {
* excludes SSA flow through instance fields.
*/
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
EssaFlow::essaFlowStep(update(nodeFrom), nodeTo)
// If there is ESSA-flow out of a node `node`, we want flow
// both out of `node` and any post-update node of `node`.
exists(Node node |
not node.(EssaNode).getVar() instanceof GlobalSsaVariable and
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
EssaFlow::essaFlowStep(node, nodeTo) and
nodeFrom = update(node)
)
}

/**
* Holds if `result` is either `node`, or the post-update node for `node`.
*/
private Node update(Node node) {
exists(PostUpdateNode pun |
node = pun.getPreUpdateNode() and
result = pun
)
or
not exists(PostUpdateNode pun | node = pun.getPreUpdateNode()) and
result = node
}

Expand Down Expand Up @@ -365,15 +388,22 @@ string ppReprType(DataFlowType t) { none() }
* another. Additional steps specified by the configuration are *not*
* taken into account.
*/
predicate jumpStep(Node pred, Node succ) {
predicate jumpStep(Node nodeFrom, Node nodeTo) {
// As we have ESSA variables for global variables,
// we include ESSA flow steps involving global variables.
(
pred.(EssaNode).getVar() instanceof GlobalSsaVariable
nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable
or
succ.(EssaNode).getVar() instanceof GlobalSsaVariable
nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable
) and
EssaFlow::essaFlowStep(pred, succ)
(
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
or
// As jump steps do not respect chronology,
// we add jump steps for each def-use pair.
nodeFrom.asVar() instanceof GlobalSsaVariable and
nodeTo.asCfgNode() = nodeFrom.asVar().getASourceUse()
)
}

//--------
Expand Down
138 changes: 137 additions & 1 deletion python/ql/src/semmle/python/essa/SsaCompute.qll
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ private module SsaComputeImpl {
defUseRank(v, b, rankix, i)
}

/** A `VarAccess` `use` of `v` in `b` at index `i`. */
/** A variable access `use` of `v` in `b` at index `i`. */
cached
predicate variableUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
(v.getAUse() = use or v.hasRefinement(use, _)) and
Expand Down Expand Up @@ -348,11 +348,147 @@ private module SsaComputeImpl {
)
}
}

cached
module AdjacentUsesImpl {
/**
* Holds if `rankix` is the rank the index `i` at which there is an SSA definition or explicit use of
* `v` in the basic block `b`.
*/
cached
predicate defSourceUseRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) {
i = rank[rankix](int j | variableDefine(v, _, b, j) or variableSourceUse(v, _, b, j))
}

/** A variable access `use` of `v` in `b` at index `i`. */
cached
predicate variableSourceUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
v.getASourceUse() = use and
exists(int j |
b.getNode(j) = use and
i = 2 * j
)
}

/** Gets the maximum rank index for the given variable and basic block. */
private int lastSourceUseRank(SsaSourceVariable v, BasicBlock b) {
result = max(int rankix | defSourceUseRank(v, b, rankix, _))
}

/** Holds if `v` is defined or used in `b`. */
private predicate varOccursInBlock(SsaSourceVariable v, BasicBlock b) {
defSourceUseRank(v, b, _, _)
}

/** Holds if `v` occurs in `b` or one of `b`'s transitive successors. */
private predicate blockPrecedesVar(SsaSourceVariable v, BasicBlock b) {
varOccursInBlock(v, b.getASuccessor*())
}

/**
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
* in `b2` or one of its transitive successors but not in any block on the path
* between `b1` and `b2`.
*/
private predicate varBlockReaches(SsaSourceVariable v, BasicBlock b1, BasicBlock b2) {
varOccursInBlock(v, b1) and
b2 = b1.getASuccessor() and
blockPrecedesVar(v, b2)
or
exists(BasicBlock mid |
varBlockReaches(v, b1, mid) and
b2 = mid.getASuccessor() and
not varOccursInBlock(v, mid) and
blockPrecedesVar(v, b2)
)
}

/**
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
* `b2` but not in any block on the path between `b1` and `b2`.
*/
private predicate varBlockStep(SsaSourceVariable v, BasicBlock b1, BasicBlock b2) {
varBlockReaches(v, b1, b2) and
varOccursInBlock(v, b2)
}

/**
* Holds if `v` occurs at index `i1` in `b1` and at index `i2` in `b2` and
* there is a path between them without any occurrence of `v`.
*/
cached
predicate adjacentVarRefs(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2) {
exists(int rankix |
b1 = b2 and
defSourceUseRank(v, b1, rankix, i1) and
defSourceUseRank(v, b2, rankix + 1, i2)
)
or
defSourceUseRank(v, b1, lastSourceUseRank(v, b1), i1) and
varBlockStep(v, b1, b2) and
defSourceUseRank(v, b2, 1, i2)
}

/**
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same SSA
* variable, that is, the value read in `use1` can reach `use2` without passing
* through any other use or any SSA definition of the variable.
*/
cached
predicate adjacentUseUseSameVar(ControlFlowNode use1, ControlFlowNode use2) {
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
adjacentVarRefs(v, b1, i1, b2, i2) and
variableSourceUse(v, use1, b1, i1) and
variableSourceUse(v, use2, b2, i2)
)
}

/**
* Holds if the value defined at `def` can reach `use` without passing through
* any other uses, but possibly through phi nodes and uncertain implicit updates.
*/
cached
predicate firstUse(EssaDefinition def, ControlFlowNode use) {
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
adjacentVarRefs(v, b1, i1, b2, i2) and
definesAt(def, v, b1, i1) and
variableSourceUse(v, use, b2, i2)
)
or
exists(
SsaSourceVariable v, EssaDefinition redef, BasicBlock b1, int i1, BasicBlock b2, int i2
|
redef instanceof PhiFunction
|
adjacentVarRefs(v, b1, i1, b2, i2) and
definesAt(def, v, b1, i1) and
definesAt(redef, v, b2, i2) and
firstUse(redef, use)
)
}

/**
* Holds if `def` defines `v` at the specified position.
* Phi nodes are placed at index -1.
*/
cached
predicate definesAt(EssaDefinition def, SsaSourceVariable v, BasicBlock b, int i) {
exists(ControlFlowNode defNode |
def.(EssaNodeDefinition).definedBy(v, defNode) and
variableDefine(v, defNode, b, i)
)
or
v = def.(PhiFunction).getSourceVariable() and
b = def.(PhiFunction).getBasicBlock() and
i = -1
}
}
}

import SsaComputeImpl::SsaDefinitionsImpl as SsaDefinitions
import SsaComputeImpl::EssaDefinitionsImpl as EssaDefinitions
import SsaComputeImpl::LivenessImpl as Liveness
import SsaComputeImpl::AdjacentUsesImpl as AdjacentUses

/* This is exported primarily for testing */
/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ uniqueEnclosingCallable
| module.py:2:8:2:13 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. |
| module.py:5:1:5:21 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| module.py:5:5:5:18 | GSSA Variable dangerous_func | Node should have one enclosing callable but has 0. |
| module.py:9:9:9:14 | ControlFlowNode for SOURCE | Node should have one enclosing callable but has 0. |
| module.py:10:1:10:5 | GSSA Variable safe2 | Node should have one enclosing callable but has 0. |
| module.py:10:9:10:14 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. |
| test.py:6:1:6:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
test_taint
| test.py:22 | fail | test_custom_sanitizer | s |
| test.py:22 | ok | test_custom_sanitizer | s |
| test.py:36 | fail | test_custom_sanitizer_guard | s |
| test.py:38 | ok | test_custom_sanitizer_guard | s |
| test.py:49 | ok | test_escape | s2 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
| test_collections.py:149 | ok | list_index_aug_assign | my_list |
| test_collections.py:152 | fail | list_index_aug_assign | my_list |
| test_collections.py:159 | ok | list_append | my_list |
| test_collections.py:162 | fail | list_append | my_list |
| test_collections.py:162 | ok | list_append | my_list |
| test_collections.py:169 | ok | list_extend | my_list |
| test_collections.py:172 | fail | list_extend | my_list |
| test_collections.py:179 | ok | dict_update_dict | my_dict |
Expand All @@ -63,7 +63,7 @@
| test_collections.py:212 | fail | dict_manual_update | my_dict |
| test_collections.py:220 | fail | dict_merge | merged |
| test_collections.py:227 | ok | set_add | my_set |
| test_collections.py:230 | fail | set_add | my_set |
| test_collections.py:230 | ok | set_add | my_set |
| test_json.py:26 | ok | test | json.dumps(..) |
| test_json.py:27 | ok | test | json.loads(..) |
| test_json.py:34 | fail | test | tainted_filelike |
Expand Down