github · MathiasVP · May 18, 2023 · May 16, 2023 · May 16, 2023 · May 16, 2023
@@ -657,24 +657,16 @@ private predicate indirectConversionFlowStep(Node nFrom, Node nTo) {
  * So this predicate recurses back along conversions and `PointerArithmeticInstruction`s to find the
  * first use that has provides use-use flow, and uses that target as the target of the `nodeFrom`.
  */
-private predicate adjustForPointerArith(
-  DefOrUse defOrUse, Node nodeFrom, UseOrPhi use, boolean uncertain
-) {
-  nodeFrom = any(PostUpdateNode pun).getPreUpdateNode() and
-  exists(Node adjusted |
-    indirectConversionFlowStep*(adjusted, nodeFrom) and
-    nodeToDefOrUse(adjusted, defOrUse, uncertain) and
+private predicate adjustForPointerArith(PostUpdateNode pun, UseOrPhi use) {
+  exists(DefOrUse defOrUse, Node adjusted |
+    indirectConversionFlowStep*(adjusted, pun.getPreUpdateNode()) and
+    nodeToDefOrUse(adjusted, defOrUse, _) and
     adjacentDefRead(defOrUse, use)
   )
 }
 
 private predicate ssaFlowImpl(SsaDefOrUse defOrUse, Node nodeFrom, Node nodeTo, boolean uncertain) {
-  // `nodeFrom = any(PostUpdateNode pun).getPreUpdateNode()` is implied by adjustedForPointerArith.
   exists(UseOrPhi use |
-    adjustForPointerArith(defOrUse, nodeFrom, use, uncertain) and
-    useToNode(use, nodeTo)
-    or
-    not nodeFrom = any(PostUpdateNode pun).getPreUpdateNode() and
     nodeToDefOrUse(nodeFrom, defOrUse, uncertain) and
     adjacentDefRead(defOrUse, use) and
     useToNode(use, nodeTo) and
@@ -719,14 +711,19 @@ predicate ssaFlow(Node nodeFrom, Node nodeTo) {
   )
 }
 
+private predicate isArgumentOfCallable(DataFlowCall call, ArgumentNode arg) {
+  arg.argumentOf(call, _)
+}
+
+/** Holds if there is def-use or use-use flow from `pun` to `nodeTo`. */
 predicate postUpdateFlow(PostUpdateNode pun, Node nodeTo) {
-  exists(Node preUpdate, Node nFrom, boolean uncertain, SsaDefOrUse defOrUse |
+  exists(UseOrPhi use, Node preUpdate |
+    adjustForPointerArith(pun, use) and
+    useToNode(use, nodeTo) and
     preUpdate = pun.getPreUpdateNode() and
-    ssaFlowImpl(defOrUse, nFrom, nodeTo, uncertain)
-  |
-    if uncertain = true
-    then preUpdate = [nFrom, getAPriorDefinition(defOrUse)]
-    else preUpdate = nFrom
+    not exists(DataFlowCall call |
+      isArgumentOfCallable(call, preUpdate) and isArgumentOfCallable(call, nodeTo)
+    )
   )
 }
 

@@ -1,7 +1,7 @@
 // semmle-extractor-options: --edg --clang
 
 int source();
-void sink(int); void sink(const int *); void sink(int **);
+void sink(int); void sink(const int *); void sink(int **); void indirect_sink(...);
 
 struct twoIntFields {
   int m1, m2;
@@ -19,7 +19,8 @@ void following_pointers( // $ ast-def=sourceStruct1_ptr
 
   sink(sourceArray1[0]); // no flow
   sink(*sourceArray1); // no flow
-  sink(&sourceArray1); // $ ast,ir // [should probably be taint only]
+  sink(&sourceArray1); // $ ast // [should probably be taint only]
+  indirect_sink(&sourceArray1); // $ ast,ir
 
   sink(sourceStruct1.m1); // no flow
   sink(sourceStruct1_ptr->m1); // no flow
@@ -48,5 +49,6 @@ void following_pointers( // $ ast-def=sourceStruct1_ptr
 
   int stackArray[2] = { source(), source() };
   stackArray[0] = source();
-  sink(stackArray); // $ ast ir ir=49:25 ir=49:35 ir=50:19
+  sink(stackArray); // $ ast,ir
+  indirect_sink(stackArray); // $ ast ir=50:25 ir=50:35 ir=51:19
 }
@@ -28,9 +28,10 @@ postWithInFlow
 | BarrierGuard.cpp:49:6:49:6 | x [post update] | PostUpdateNode should not be the target of local flow. |
 | BarrierGuard.cpp:60:7:60:7 | x [post update] | PostUpdateNode should not be the target of local flow. |
 | clang.cpp:22:9:22:20 | sourceArray1 [inner post update] | PostUpdateNode should not be the target of local flow. |
-| clang.cpp:28:22:28:23 | m1 [post update] | PostUpdateNode should not be the target of local flow. |
-| clang.cpp:50:3:50:12 | stackArray [inner post update] | PostUpdateNode should not be the target of local flow. |
-| clang.cpp:50:3:50:15 | access to array [post update] | PostUpdateNode should not be the target of local flow. |
+| clang.cpp:23:18:23:29 | sourceArray1 [inner post update] | PostUpdateNode should not be the target of local flow. |
+| clang.cpp:29:22:29:23 | m1 [post update] | PostUpdateNode should not be the target of local flow. |
+| clang.cpp:51:3:51:12 | stackArray [inner post update] | PostUpdateNode should not be the target of local flow. |
+| clang.cpp:51:3:51:15 | access to array [post update] | PostUpdateNode should not be the target of local flow. |
 | dispatch.cpp:60:3:60:14 | globalBottom [post update] | PostUpdateNode should not be the target of local flow. |
 | dispatch.cpp:61:3:61:14 | globalMiddle [post update] | PostUpdateNode should not be the target of local flow. |
 | dispatch.cpp:78:24:78:37 | call to allocateBottom [inner post update] | PostUpdateNode should not be the target of local flow. |

@@ -1,5 +1,5 @@
 int source();
-void sink(int); void sink(const int *); void sink(int **);
+void sink(int); void sink(const int *); void sink(int **); void indirect_sink(...);
 
 void intraprocedural_with_local_flow() {
   int t2;
@@ -626,7 +626,7 @@ void test_def_via_phi_read(bool b)
     use(buffer);
   }
   intPointerSource(buffer);
-  sink(buffer); // $ ast,ir
+  indirect_sink(buffer); // $ ast,ir
 }
 
 void test_static_local_1() {
@@ -692,7 +692,7 @@ void test_static_local_9() {
 
 void increment_buf(int** buf) { // $ ast-def=buf ir-def=*buf ir-def=**buf
   *buf += 10;
-  sink(buf); // $ SPURIOUS: ast,ir // should only be flow to the indirect argument, but there's also flow to the non-indirect argument
+  sink(buf); // $ SPURIOUS: ast
 }
 
 void call_increment_buf(int** buf) { // $ ast-def=buf

@@ -34,7 +34,7 @@ module AstTest {
 
     override predicate isSink(DataFlow::Node sink) {
       exists(FunctionCall call |
-        call.getTarget().getName() = "sink" and
+        call.getTarget().getName() = ["sink", "indirect_sink"] and
         sink.asExpr() = call.getAnArgument()
       )
     }
@@ -83,9 +83,12 @@ module IRTest {
     }
 
     override predicate isSink(DataFlow::Node sink) {
-      exists(FunctionCall call |
+      exists(FunctionCall call, Expr e | e = call.getAnArgument() |
         call.getTarget().getName() = "sink" and
-        call.getAnArgument() in [sink.asExpr(), sink.asIndirectExpr()]
+        sink.asExpr() = e
+        or
+        call.getTarget().getName() = "indirect_sink" and
+        sink.asIndirectExpr() = e
       )
     }
 

@@ -1,29 +1,16 @@
 edges
 | tests.cpp:26:15:26:23 | badSource indirection | tests.cpp:51:12:51:20 | call to badSource indirection |
-| tests.cpp:26:32:26:35 | data indirection | tests.cpp:26:15:26:23 | badSource indirection |
-| tests.cpp:26:32:26:35 | data indirection | tests.cpp:38:25:38:36 | strncat output argument |
 | tests.cpp:33:34:33:39 | call to getenv indirection | tests.cpp:38:39:38:49 | environment indirection |
 | tests.cpp:38:25:38:36 | strncat output argument | tests.cpp:26:15:26:23 | badSource indirection |
-| tests.cpp:38:25:38:36 | strncat output argument | tests.cpp:26:15:26:23 | badSource indirection |
-| tests.cpp:38:25:38:36 | strncat output argument | tests.cpp:51:22:51:25 | badSource output argument |
 | tests.cpp:38:39:38:49 | environment indirection | tests.cpp:38:25:38:36 | strncat output argument |
 | tests.cpp:51:12:51:20 | call to badSource indirection | tests.cpp:53:16:53:19 | data indirection |
-| tests.cpp:51:22:51:25 | badSource output argument | tests.cpp:51:22:51:25 | data indirection |
-| tests.cpp:51:22:51:25 | data indirection | tests.cpp:26:32:26:35 | data indirection |
-| tests.cpp:51:22:51:25 | data indirection | tests.cpp:51:12:51:20 | call to badSource indirection |
 nodes
 | tests.cpp:26:15:26:23 | badSource indirection | semmle.label | badSource indirection |
-| tests.cpp:26:15:26:23 | badSource indirection | semmle.label | badSource indirection |
-| tests.cpp:26:32:26:35 | data indirection | semmle.label | data indirection |
 | tests.cpp:33:34:33:39 | call to getenv indirection | semmle.label | call to getenv indirection |
 | tests.cpp:38:25:38:36 | strncat output argument | semmle.label | strncat output argument |
-| tests.cpp:38:25:38:36 | strncat output argument | semmle.label | strncat output argument |
 | tests.cpp:38:39:38:49 | environment indirection | semmle.label | environment indirection |
 | tests.cpp:51:12:51:20 | call to badSource indirection | semmle.label | call to badSource indirection |
-| tests.cpp:51:22:51:25 | badSource output argument | semmle.label | badSource output argument |
-| tests.cpp:51:22:51:25 | data indirection | semmle.label | data indirection |
 | tests.cpp:53:16:53:19 | data indirection | semmle.label | data indirection |
 subpaths
-| tests.cpp:51:22:51:25 | data indirection | tests.cpp:26:32:26:35 | data indirection | tests.cpp:26:15:26:23 | badSource indirection | tests.cpp:51:12:51:20 | call to badSource indirection |
 #select
 | tests.cpp:53:16:53:19 | data | tests.cpp:33:34:33:39 | call to getenv indirection | tests.cpp:53:16:53:19 | data indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to system(string). | tests.cpp:33:34:33:39 | call to getenv indirection | user input (an environment variable) | tests.cpp:38:25:38:36 | strncat output argument | strncat output argument |
@@ -45,8 +45,6 @@ edges
 | test.cpp:186:47:186:54 | filename indirection | test.cpp:188:20:188:24 | flags indirection |
 | test.cpp:187:11:187:15 | strncat output argument | test.cpp:188:11:188:17 | strncat output argument |
 | test.cpp:187:18:187:25 | filename indirection | test.cpp:187:11:187:15 | strncat output argument |
-| test.cpp:188:11:188:17 | strncat output argument | test.cpp:188:11:188:17 | strncat output argument |
-| test.cpp:188:11:188:17 | strncat output argument | test.cpp:188:11:188:17 | strncat output argument |
 | test.cpp:188:20:188:24 | flags indirection | test.cpp:188:11:188:17 | strncat output argument |
 | test.cpp:194:9:194:16 | fread output argument | test.cpp:196:26:196:33 | filename indirection |
 | test.cpp:196:10:196:16 | concat output argument | test.cpp:198:32:198:38 | command indirection |