From 2999243e348519613cd12a6afe7f5d1ef4f8710a Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 12 Dec 2022 10:40:25 +0000 Subject: [PATCH 1/4] C++: Add failing IR dataflow testcase. --- .../dataflow/dataflow-tests/dataflow-consistency.expected | 4 ++++ .../test/library-tests/dataflow/dataflow-tests/test.cpp | 8 ++++++++ .../dataflow/dataflow-tests/uninitialized.expected | 3 +++ 3 files changed, 15 insertions(+) diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected index 50415a41f7d1..a5421153a9b3 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected @@ -92,6 +92,10 @@ postWithInFlow | test.cpp:506:3:506:4 | * ... [post update] | PostUpdateNode should not be the target of local flow. | | test.cpp:506:4:506:4 | p [inner post update] | PostUpdateNode should not be the target of local flow. | | test.cpp:512:35:512:35 | x [inner post update] | PostUpdateNode should not be the target of local flow. | +| test.cpp:519:3:519:12 | stackArray [inner post update] | PostUpdateNode should not be the target of local flow. | +| test.cpp:519:3:519:15 | access to array [post update] | PostUpdateNode should not be the target of local flow. | +| test.cpp:520:3:520:12 | stackArray [inner post update] | PostUpdateNode should not be the target of local flow. | +| test.cpp:520:3:520:15 | access to array [post update] | PostUpdateNode should not be the target of local flow. | viableImplInCallContextTooLarge uniqueParameterNodeAtPosition uniqueParameterNodePosition diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp index afce7d3d6370..069c3845f752 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp @@ -512,3 +512,11 @@ void viaOutparamMissingReturn() { intOutparamSourceMissingReturn(&x); sink(x); // $ ast MISSING: ir } + +void uncertain_definition() { + int stackArray[2]; + int clean = 0; + stackArray[0] = source(); + stackArray[1] = clean; + sink(stackArray[0]); // $ ast=519:19 SPURIOUS: ast=517:7 MISSING: ir +} \ No newline at end of file diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/uninitialized.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/uninitialized.expected index 00e4a9a0eb32..459d4f6ea004 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/uninitialized.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/uninitialized.expected @@ -34,3 +34,6 @@ | test.cpp:448:7:448:11 | local | test.cpp:449:18:449:22 | local | | test.cpp:448:7:448:11 | local | test.cpp:450:8:450:12 | local | | test.cpp:448:7:448:11 | local | test.cpp:451:9:451:13 | local | +| test.cpp:517:7:517:16 | stackArray | test.cpp:519:3:519:12 | stackArray | +| test.cpp:517:7:517:16 | stackArray | test.cpp:520:3:520:12 | stackArray | +| test.cpp:517:7:517:16 | stackArray | test.cpp:521:8:521:17 | stackArray | From ad522651ec80f3007e9f6429604bec4d0a704468 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 12 Dec 2022 10:54:26 +0000 Subject: [PATCH 2/4] C++: Flow through uncertain writes. --- .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 2 +- .../cpp/ir/dataflow/internal/SsaInternals.qll | 74 +++++++++++++++---- .../dataflow/internal/SsaInternalsCommon.qll | 50 ++++++++----- .../dataflow/internal/ssa0/SsaInternals.qll | 2 +- 4 files changed, 95 insertions(+), 33 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 39f2c41eab8c..5087dbd5930e 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -718,7 +718,7 @@ class UninitializedNode extends Node { UninitializedNode() { exists(Ssa::Def def | def.getValue().asInstruction() instanceof UninitializedInstruction and - Ssa::nodeToDefOrUse(this, def) and + Ssa::nodeToDefOrUse(this, def, _) and v = def.getSourceVariable().getBaseVariable().(Ssa::BaseIRVariable).getIRVariable().getAst() ) } diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll index 10ec5730c71d..d9c4fd5eb26f 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll @@ -291,10 +291,13 @@ predicate outNodeHasAddressAndIndex( out.getIndirectionIndex() = indirectionIndex } -private predicate defToNode(Node nodeFrom, Def def) { - nodeHasOperand(nodeFrom, def.getValue().asOperand(), def.getIndirectionIndex()) - or - nodeHasInstruction(nodeFrom, def.getValue().asInstruction(), def.getIndirectionIndex()) +private predicate defToNode(Node nodeFrom, Def def, boolean uncertain) { + ( + nodeHasOperand(nodeFrom, def.getValue().asOperand(), def.getIndirectionIndex()) + or + nodeHasInstruction(nodeFrom, def.getValue().asInstruction(), def.getIndirectionIndex()) + ) and + if def.isCertain() then uncertain = false else uncertain = true } /** @@ -302,12 +305,13 @@ private predicate defToNode(Node nodeFrom, Def def) { * * Holds if `nodeFrom` is the node that correspond to the definition or use `defOrUse`. */ -predicate nodeToDefOrUse(Node nodeFrom, SsaDefOrUse defOrUse) { +predicate nodeToDefOrUse(Node nodeFrom, SsaDefOrUse defOrUse, boolean uncertain) { // Node -> Def - defToNode(nodeFrom, defOrUse) + defToNode(nodeFrom, defOrUse, uncertain) or // Node -> Use - useToNode(defOrUse, nodeFrom) + useToNode(defOrUse, nodeFrom) and + uncertain = false } /** @@ -316,7 +320,7 @@ predicate nodeToDefOrUse(Node nodeFrom, SsaDefOrUse defOrUse) { */ private predicate indirectConversionFlowStep(Node nFrom, Node nTo) { not exists(UseOrPhi defOrUse | - nodeToDefOrUse(nTo, defOrUse) and + nodeToDefOrUse(nTo, defOrUse, _) and adjacentDefRead(defOrUse, _) ) and exists(Operand op1, Operand op2, int indirectionIndex, Instruction instr | @@ -342,31 +346,60 @@ private predicate indirectConversionFlowStep(Node nFrom, Node nTo) { * So this predicate recurses back along conversions and `PointerArithmeticInstruction`s to find the * first use that has provides use-use flow, and uses that target as the target of the `nodeFrom`. */ -private predicate adjustForPointerArith(Node nodeFrom, UseOrPhi use) { +private predicate adjustForPointerArith(Node nodeFrom, UseOrPhi use, boolean uncertain) { nodeFrom = any(PostUpdateNode pun).getPreUpdateNode() and exists(DefOrUse defOrUse, Node adjusted | indirectConversionFlowStep*(adjusted, nodeFrom) and - nodeToDefOrUse(adjusted, defOrUse) and + nodeToDefOrUse(adjusted, defOrUse, uncertain) and adjacentDefRead(defOrUse, use) ) } -/** Holds if there is def-use or use-use flow from `nodeFrom` to `nodeTo`. */ -predicate ssaFlow(Node nodeFrom, Node nodeTo) { +private predicate ssaFlowImpl(Node nodeFrom, Node nodeTo, boolean uncertain) { // `nodeFrom = any(PostUpdateNode pun).getPreUpdateNode()` is implied by adjustedForPointerArith. exists(UseOrPhi use | - adjustForPointerArith(nodeFrom, use) and + adjustForPointerArith(nodeFrom, use, uncertain) and useToNode(use, nodeTo) ) or not nodeFrom = any(PostUpdateNode pun).getPreUpdateNode() and exists(DefOrUse defOrUse1, UseOrPhi use | - nodeToDefOrUse(nodeFrom, defOrUse1) and + nodeToDefOrUse(nodeFrom, defOrUse1, uncertain) and adjacentDefRead(defOrUse1, use) and useToNode(use, nodeTo) ) } +/** + * Holds if `def` is the corresponding definition of + * the SSA library's `definition`. + */ +private predicate ssaDefinition(Def def, Definition definition) { + exists(IRBlock block, int i, SourceVariable sv | + def.hasIndexInBlock(block, i, sv) and + definition.definesAt(sv, block, i) + ) +} + +/** Gets a node that represents the prior definition of `node`. */ +private Node getAPriorDefinition(Node node) { + exists(Def def, Definition definition, Definition inp, Def input | + defToNode(node, def, true) and + ssaDefinition(def, definition) and + uncertainWriteDefinitionInput(pragma[only_bind_into](definition), pragma[only_bind_into](inp)) and + ssaDefinition(input, inp) and + defToNode(result, input, _) + ) +} + +/** Holds if there is def-use or use-use flow from `nodeFrom` to `nodeTo`. */ +predicate ssaFlow(Node nodeFrom, Node nodeTo) { + exists(Node nFrom, boolean uncertain | + ssaFlowImpl(nFrom, nodeTo, uncertain) and + if uncertain = true then nodeFrom = [nFrom, getAPriorDefinition(nFrom)] else nodeFrom = nFrom + ) +} + /** * Holds if `use` is a use of `sv` and is a next adjacent use of `phi` in * index `i1` in basic block `bb1`. @@ -460,6 +493,11 @@ module SsaCached { predicate lastRefRedef(Definition def, IRBlock bb, int i, Definition next) { SsaImpl::lastRefRedef(def, bb, i, next) } + + cached + predicate uncertainWriteDefinitionInput(SsaImpl::UncertainWriteDefinition def, Definition inp) { + SsaImpl::uncertainWriteDefinitionInput(def, inp) + } } cached @@ -498,6 +536,10 @@ class DefOrUse extends TDefOrUse, SsaDefOrUse { final SourceVariable getSourceVariable() { result = defOrUse.getSourceVariable() } override string toString() { result = defOrUse.toString() } + + predicate hasIndexInBlock(IRBlock block, int index, SourceVariable sv) { + defOrUse.hasIndexInBlock(block, index, sv) + } } class Phi extends TPhi, SsaDefOrUse { @@ -541,6 +583,8 @@ class Def extends DefOrUse { } Node0Impl getValue() { result = defOrUse.getValue() } + + predicate isCertain() { defOrUse.isCertain() } } private module SsaImpl = SsaImplCommon::Make; @@ -549,4 +593,6 @@ class PhiNode = SsaImpl::PhiNode; class Definition = SsaImpl::Definition; +class UncertainWriteDefinition = SsaImpl::UncertainWriteDefinition; + import SsaCached diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll index 1fcd02c51f6c..2cb262afec12 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll @@ -428,9 +428,12 @@ private module Cached { boolean certain, Node0Impl value, Operand address, BaseSourceVariableInstruction base, int ind, int indirectionIndex ) { - exists(int ind0, CppType type, int lower, int upper | - isWrite(value, address, certain) and - isDefImpl(address, base, ind0) and + exists( + boolean writeIsCertain, boolean addressIsCertain, int ind0, CppType type, int lower, int upper + | + isWrite(value, address, writeIsCertain) and + isDefImpl(address, base, ind0, addressIsCertain) and + certain = writeIsCertain.booleanAnd(addressIsCertain) and type = getLanguageType(address) and upper = countIndirectionsForCppType(type) and ind = ind0 + [lower .. upper] and @@ -439,6 +442,16 @@ private module Cached { ) } + /** + * Holds if the address computed by `operand` is guarenteed to write + * to a specific address. + */ + private predicate isCertainAddress(Operand operand) { + operand.getDef() instanceof VariableAddressInstruction + or + operand.getType() instanceof Cpp::ReferenceType + } + /** * Holds if `address` is a use of an SSA variable rooted at `base`, and the * path from `base` to `address` passes through `ind` load-like instructions. @@ -446,27 +459,30 @@ private module Cached { * Note: Unlike `isUseImpl`, this predicate recurses through pointer-arithmetic * instructions. */ - private predicate isDefImpl(Operand address, BaseSourceVariableInstruction base, int ind) { + private predicate isDefImpl( + Operand operand, BaseSourceVariableInstruction base, int ind, boolean certain + ) { DataFlowImplCommon::forceCachingInSameStage() and ind = 0 and - address = base.getAUse() + operand = base.getAUse() and + (if isCertainAddress(operand) then certain = true else certain = false) or - exists(Operand mid, Instruction instr | - isDefImpl(mid, base, ind) and - instr = address.getDef() and - conversionFlow(mid, instr, _) + exists(Operand mid, Instruction instr, boolean certain0, boolean isPointerArith | + isDefImpl(mid, base, ind, certain0) and + instr = operand.getDef() and + conversionFlow(mid, instr, isPointerArith) and + if isPointerArith = true then certain = false else certain = certain0 ) or - exists(int ind0 | - exists(Operand operand | - isDereference(address.getDef(), operand) and - isDefImpl(operand, base, ind - 1) - ) - or - isDefImpl(address.getDef().(InitializeParameterInstruction).getAnOperand(), base, ind0) + exists(Operand address, boolean certain0 | + isDereference(operand.getDef(), address) and + isDefImpl(address, base, ind - 1, certain0) | - ind0 = ind - 1 + if isCertainAddress(operand) then certain = certain0 else certain = false ) + or + isDefImpl(operand.getDef().(InitializeParameterInstruction).getAnOperand(), base, ind - 1, _) and + certain = true } } diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll index 7f044b8d8e70..d30938358430 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll @@ -32,7 +32,7 @@ private newtype TDefOrUseImpl = TDefImpl(Operand address) { isDef(_, _, address, _, _, _) } or TUseImpl(Operand operand) { isUse(_, operand, _, _, _) and - not isDef(_, _, operand, _, _, _) + not isDef(true, _, operand, _, _, _) } abstract private class DefOrUseImpl extends TDefOrUseImpl { From 8722fb2cf50759b916bb10d745723531e1292168 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 12 Dec 2022 10:54:40 +0000 Subject: [PATCH 3/4] C++: Accept test changes. --- cpp/ql/test/library-tests/dataflow/dataflow-tests/clang.cpp | 2 +- cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/clang.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/clang.cpp index d17be545e4f8..4ffbd8c2ff7d 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/clang.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/clang.cpp @@ -48,5 +48,5 @@ void following_pointers( int stackArray[2] = { source(), source() }; stackArray[0] = source(); - sink(stackArray); // $ ast,ir + sink(stackArray); // $ ast ir ir=49:35 ir=50:19 } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp index 069c3845f752..a14d61ae7807 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp @@ -518,5 +518,5 @@ void uncertain_definition() { int clean = 0; stackArray[0] = source(); stackArray[1] = clean; - sink(stackArray[0]); // $ ast=519:19 SPURIOUS: ast=517:7 MISSING: ir + sink(stackArray[0]); // $ ast=519:19 ir SPURIOUS: ast=517:7 } \ No newline at end of file From a161dddbbf8e76bbc5d0005ae559a9df30e4053f Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 12 Dec 2022 13:39:09 +0000 Subject: [PATCH 4/4] C++: Accept test changes. These happen because these remote flow sources specify that the remote source is both 'isReturnValue' and 'isReturnValueDeref'. --- .../dataflow/source-sink-tests/sources-and-sinks.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/ql/test/library-tests/dataflow/source-sink-tests/sources-and-sinks.cpp b/cpp/ql/test/library-tests/dataflow/source-sink-tests/sources-and-sinks.cpp index 423061b80c96..b23049d171ca 100644 --- a/cpp/ql/test/library-tests/dataflow/source-sink-tests/sources-and-sinks.cpp +++ b/cpp/ql/test/library-tests/dataflow/source-sink-tests/sources-and-sinks.cpp @@ -3,9 +3,9 @@ char *secure_getenv(const char *name); wchar_t *_wgetenv(const wchar_t *name); void test_getenv() { - void *var1 = getenv("VAR"); // $ local_source - void *var2 = secure_getenv("VAR"); // $ local_source - void *var3 = _wgetenv(L"VAR"); // $ local_source + void *var1 = getenv("VAR"); // $ local_source=6:18 local_source=6:18 + void *var2 = secure_getenv("VAR"); // $ local_source=7:18 local_source=7:18 + void *var3 = _wgetenv(L"VAR"); // $ local_source=8:18 local_source=8:18 } int send(int, const void*, int, int);