Skip to content
This repository has been archived by the owner on Jan 5, 2023. It is now read-only.

Switch to use-use dataflow #460

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 11 additions & 1 deletion ql/src/InconsistentCode/MissingErrorCheck.ql
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,16 @@ predicate checksValue(IR::Instruction instruction, DataFlow::SsaNode value) {
)
}

// Now that we have use-use flow, phi nodes aren't directly involved in the flow graph. TODO: change this?
DataFlow::SsaNode phiDefinedFrom(DataFlow::SsaNode node) {
result.getDefinition().(SsaPseudoDefinition).getAnInput() = node.getDefinition().getVariable()
}

DataFlow::SsaNode definedFrom(DataFlow::SsaNode node) {
DataFlow::localFlow(node, result) or
result = phiDefinedFrom*(node)
}

/**
* Matches if `call` is a function returning (`ptr`, `err`) where `ptr` may be nil, and neither
* `ptr` not `err` has been checked for validity as of `node`.
Expand All @@ -98,7 +108,7 @@ predicate returnUncheckedAtNode(
// localFlow is used to permit checks via either an SSA phi node or ordinary assignment.
returnUncheckedAtNode(call, node.getAPredecessor(), ptr, err) and
not exists(DataFlow::SsaNode checked |
DataFlow::localFlow(ptr, checked) or DataFlow::localFlow(err, checked)
checked = definedFrom(ptr) or checked = definedFrom(err)
|
checksValue(node, checked)
)
Expand Down
4 changes: 4 additions & 0 deletions ql/src/semmle/go/dataflow/SSA.qll
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ class SsaExplicitDefinition extends SsaDefinition, TExplicitDef {
) {
getInstruction().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}

IR::Instruction getAFirstUse() { firstUse(this, result) }
}

/** Provides a helper predicate for working with explicit SSA definitions. */
Expand Down Expand Up @@ -405,3 +407,5 @@ DataFlow::Node getASimilarReadNode(DataFlow::Node node) {
result = readFields.similar().getAUse()
)
}

IR::Instruction getAnAdjacentUse(IR::Instruction pred) { adjacentUseUse(pred, result) }
170 changes: 170 additions & 0 deletions ql/src/semmle/go/dataflow/SsaImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ private module Internal {
/**
* Holds if the `i`th node of `bb` is a use or an SSA definition of variable `v`, with
* `k` indicating whether it is the former or the latter.
*
* Note this includes phi nodes, whereas `ref` above only includes explicit writes and captures.
*/
private predicate ssaRef(ReachableBasicBlock bb, int i, SsaSourceVariable v, RefKind k) {
useAt(bb, i, v) and k = ReadRef()
Expand Down Expand Up @@ -290,6 +292,174 @@ private module Internal {
or
rewindReads(bb, i, v) = 1 and result = getDefReachingEndOf(bb.getImmediateDominator(), v)
}

private module AdjacentUsesImpl {
/** Holds if `v` is defined or used in `b`. */
private predicate varOccursInBlock(SsaSourceVariable v, ReachableBasicBlock b) {
ssaRef(b, _, v, _)
}

/** Holds if `v` occurs in `b` or one of `b`'s transitive successors. */
private predicate blockPrecedesVar(SsaSourceVariable v, ReachableBasicBlock b) {
varOccursInBlock(v, b)
or
exists(getDefReachingEndOf(b, v))
}

/**
* Holds if `v` occurs in `b1` and `b2` is one of `b1`'s successors.
*
* Factored out of `varBlockReaches` to force join order compared to the larger
* set `blockPrecedesVar(v, b2)`.
*/
pragma[noinline]
private predicate varBlockReachesBaseCand(
SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock b2
) {
varOccursInBlock(v, b1) and
b2 = b1.getASuccessor()
}

/**
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
* in `b2` or one of its transitive successors but not in any block on the path
* between `b1` and `b2`. Unlike `varBlockReaches` this may include blocks `b2`
* where `v` is dead.
*
* Factored out of `varBlockReaches` to force join order compared to the larger
* set `blockPrecedesVar(v, b2)`.
*/
pragma[noinline]
private predicate varBlockReachesRecCand(
SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock mid, ReachableBasicBlock b2
) {
varBlockReaches(v, b1, mid) and
not varOccursInBlock(v, mid) and
b2 = mid.getASuccessor()
}

/**
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
* in `b2` or one of its transitive successors but not in any block on the path
* between `b1` and `b2`.
*/
private predicate varBlockReaches(
SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock b2
) {
varBlockReachesBaseCand(v, b1, b2) and
blockPrecedesVar(v, b2)
or
exists(ReachableBasicBlock mid |
varBlockReachesRecCand(v, b1, mid, b2) and
blockPrecedesVar(v, b2)
)
}

/**
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
* `b2` but not in any block on the path between `b1` and `b2`.
*/
private predicate varBlockStep(
SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock b2
) {
varBlockReaches(v, b1, b2) and
varOccursInBlock(v, b2)
}

/**
* Gets the maximum rank among all SSA references to `v` in basic block `bb`.
*/
private int maxSsaRefRank(ReachableBasicBlock bb, SsaSourceVariable v) {
result = max(ssaRefRank(bb, _, v, _))
}

/**
* Holds if `v` occurs at index `i1` in `b1` and at index `i2` in `b2` and
* there is a path between them without any occurrence of `v`.
*/
pragma[nomagic]
predicate adjacentVarRefs(
SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2
) {
exists(int rankix |
b1 = b2 and
ssaRefRank(b1, i1, v, _) = rankix and
ssaRefRank(b2, i2, v, _) = rankix + 1
)
or
maxSsaRefRank(b1, v) = ssaRefRank(b1, i1, v, _) and
varBlockStep(v, b1, b2) and
ssaRefRank(b2, i2, v, _) = 1
}

predicate variableUse(SsaSourceVariable v, IR::Instruction use, ReachableBasicBlock bb, int i) {
bb.getNode(i) = use and
exists(SsaVariable sv |
sv.getSourceVariable() = v and
use = sv.getAUse()
)
}
}

private import AdjacentUsesImpl

/**
* Holds if the value defined at `def` can reach `use` without passing through
* any other uses, but possibly through phi nodes.
*/
cached
predicate firstUse(SsaDefinition def, IR::Instruction use) {
exists(SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2 |
adjacentVarRefs(v, b1, i1, b2, i2) and
def.definesAt(b1, i1, v) and
variableUse(v, use, b2, i2)
)
or
exists(
SsaSourceVariable v, SsaPhiNode redef, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2,
int i2
|
adjacentVarRefs(v, b1, i1, b2, i2) and
def.definesAt(b1, i1, v) and
redef.definesAt(b2, i2, v) and
firstUse(redef, use)
)
}

/**
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same SSA
* variable, that is, the value read in `use1` can reach `use2` without passing
* through any other use or any SSA definition of the variable.
*/
cached
predicate adjacentUseUseSameVar(IR::Instruction use1, IR::Instruction use2) {
exists(SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2 |
adjacentVarRefs(v, b1, i1, b2, i2) and
variableUse(v, use1, b1, i1) and
variableUse(v, use2, b2, i2)
)
}

/**
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same
* `SsaSourceVariable`, that is, the value read in `use1` can reach `use2`
* without passing through any other use or any SSA definition of the variable
* except for phi nodes and uncertain implicit updates.
*/
cached
predicate adjacentUseUse(IR::Instruction use1, IR::Instruction use2) {
adjacentUseUseSameVar(use1, use2)
or
exists(
SsaSourceVariable v, SsaPhiNode def, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2,
int i2
|
adjacentVarRefs(v, b1, i1, b2, i2) and
variableUse(v, use1, b1, i1) and
def.definesAt(b2, i2, v) and
firstUse(def, use2)
)
}
}

import Internal
23 changes: 15 additions & 8 deletions ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -1061,29 +1061,36 @@ private predicate basicLocalFlowStep(Node nodeFrom, Node nodeTo) {
else nodeTo.asInstruction() = evalAssert
)
or
// Instruction -> SSA
// Instruction -> SSA defn
exists(IR::Instruction pred, SsaExplicitDefinition succ |
succ.getRhs() = pred and
nodeFrom = MkInstructionNode(pred) and
nodeTo = MkSsaNode(succ)
)
or
// SSA -> SSA
exists(SsaDefinition pred, SsaDefinition succ |
succ.(SsaVariableCapture).getSourceVariable() = pred.(SsaExplicitDefinition).getSourceVariable() or
succ.(SsaPseudoDefinition).getAnInput() = pred
// SSA defn -> SSA capture
exists(SsaExplicitDefinition pred, SsaVariableCapture succ |
// Check: should these flow from PHIs as well? Perhaps they should be included
// in the use-use graph?
succ.(SsaVariableCapture).getSourceVariable() = pred.(SsaExplicitDefinition).getSourceVariable()
|
nodeFrom = MkSsaNode(pred) and
nodeTo = MkSsaNode(succ)
)
or
// SSA -> Instruction
exists(SsaDefinition pred, IR::Instruction succ |
succ = pred.getVariable().getAUse() and
// SSA defn -> first SSA use
exists(SsaExplicitDefinition pred, IR::Instruction succ | succ = pred.getAFirstUse() |
nodeFrom = MkSsaNode(pred) and
nodeTo = MkInstructionNode(succ)
)
or
// SSA use -> successive SSA use
// Note this case includes Phi node traversal
exists(IR::Instruction pred, IR::Instruction succ | succ = getAnAdjacentUse(pred) |
nodeFrom = MkInstructionNode(pred) and
nodeTo = MkInstructionNode(succ)
)
or
// GlobalFunctionNode -> use
nodeFrom = MkGlobalFunctionNode(nodeTo.asExpr().(FunctionName).getTarget())
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ module CleartextLogging {
|
this.asExpr().(Ident).getName() = name
or
this.(DataFlow::SsaNode).getSourceVariable().getName() = name
or
this.(DataFlow::FieldReadNode).getFieldName() = name
or
this.(DataFlow::CallNode).getCalleeName() = name
Expand Down Expand Up @@ -143,7 +145,7 @@ module CleartextLogging {
not this instanceof NonCleartextPassword and
name.regexpMatch(maybePassword()) and
(
this.asExpr().(Ident).getName() = name
this.(DataFlow::SsaNode).getSourceVariable().getName() = name
or
exists(DataFlow::FieldReadNode fn |
fn = this and
Expand Down
22 changes: 22 additions & 0 deletions ql/src/semmle/go/security/CommandInjection.qll
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,27 @@ module CommandInjection {
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}

// Hack: with use-use flow, we might have x (use at line 1) -> x (use at line 2),
// x (use at line 1) -> array at line 1 and x (use at line 2) -> array at line 2,
// in the context
//
// array1 := {"--", x}
// array2 := {x, "--"}
//
// We want to taint array2 but not array1, which suggests excluding the edge x (use 1) -> array1
// However isSanitizer only allows us to remove nodes (isSanitizerIn/Out permit removing all outgoing
// or incoming edges); we can't remove an individual edge, so instead we supply extra edges connecting
// the definition with the next use.
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
exists(
ArgumentArrayWithDoubleDash array, DataFlow::InstructionNode sanitized,
DataFlow::SsaNode defn
|
sanitized = array.getASanitizedElement() and sanitized = defn.getAUse()
|
pred = defn and succ = sanitized.getASuccessor()
)
}
}
}
5 changes: 5 additions & 0 deletions ql/src/semmle/go/security/OpenUrlRedirect.qll
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ module OpenUrlRedirect {
w.writesField(node.getASuccessor(), f, _)
)
or
// Note this blocks other outgoing edges from this node too, so it will
// cause false negatives in combination with use-use flow as subsequent
// uses are incorrectly sanitized. Noting the other end of the
// sanitizing edge as a BarrierIn has the opposite problem, incorrectly
// rejecting taint from the other side of a concatenation for example.
hostnameSanitizingPrefixEdge(node, _)
}

Expand Down