From c62220e0dc71e26a76ecba2dcd066e2598f44383 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Mon, 8 Jun 2020 11:48:40 +0200 Subject: [PATCH] C++: Fix data-flow dispatch perf with globals There wasn't a good join order for the "store to global var" case in the virtual dispatch library. When a global variable had millions of accesses but few stores to it, the `flowsFrom` predicate would join to see all those millions of accesses before filtering down to stores only. The solution is to pull out a `storeIntoGlobal` helper predicate that pre-computes which accesses are stores. To make the code clearer, I've also pulled out a repeated chunk of code into a new `addressOfGlobal` helper predicate. For the kamailio/kamailio project, these are the tuple counts before: Starting to evaluate predicate DataFlowDispatch::VirtualDispatch::DataSensitiveCall::flowsFrom#fff#cur_delta/3[3]@21a1df (iteration 3) Tuple counts for DataFlowDispatch::VirtualDispatch::DataSensitiveCall::flowsFrom#fff#cur_delta: ... 59002 ~0% {3} r17 = SCAN DataFlowDispatch::VirtualDispatch::DataSensitiveCall::flowsFrom#fff#prev_delta AS I OUTPUT I.<1>, true, I.<0> 58260 ~1% {3} r31 = JOIN r17 WITH DataFlowUtil::Node::asVariable_dispred#fb AS R ON FIRST 1 OUTPUT R.<1>, true, r17.<2> 2536187389 ~6% {3} r32 = JOIN r31 WITH Instruction::VariableInstruction::getASTVariable_dispred#fb_10#join_rhs AS R ON FIRST 1 OUTPUT R.<1>, true, r31.<2> 2536187389 ~6% {3} r33 = JOIN r32 WITH project#Instruction::VariableAddressInstruction#class#3#ff AS R ON FIRST 1 OUTPUT r32.<0>, true, r32.<2> 58208 ~0% {3} r34 = JOIN r33 WITH Instruction::StoreInstruction::getDestinationAddress_dispred#ff_10#join_rhs AS R ON FIRST 1 OUTPUT R.<1>, true, r33.<2> Tuple counts after: Starting to evaluate predicate DataFlowDispatch::VirtualDispatch::DataSensitiveCall::flowsFrom#fff#cur_delta/3[3]@6073c5 (iteration 3) Tuple counts for DataFlowDispatch::VirtualDispatch::DataSensitiveCall::flowsFrom#fff#cur_delta: ... 59002 ~0% {3} r17 = SCAN DataFlowDispatch::VirtualDispatch::DataSensitiveCall::flowsFrom#fff#prev_delta AS I OUTPUT I.<1>, true, I.<0> 58260 ~1% {3} r23 = JOIN r17 WITH DataFlowUtil::Node::asVariable_dispred#ff AS R ON FIRST 1 OUTPUT R.<1>, true, r17.<2> 58208 ~0% {3} r24 = JOIN r23 WITH DataFlowDispatch::VirtualDispatch::storeIntoGlobal#ff_10#join_rhs AS R ON FIRST 1 OUTPUT R.<1>, true, r23.<2> 58208 ~0% {3} r25 = JOIN r24 WITH DataFlowUtil::InstructionNode#ff_10#join_rhs AS R ON FIRST 1 OUTPUT true, r24.<2>, R.<1> Notice that the final tuple count, 58208, is the same before and after. The kamailio/kamailio project seems to have been affected by this issue because it has global variables to do with logging policy, and these variables are loaded from in every place where their logging macro is used. --- .../ir/dataflow/internal/DataFlowDispatch.qll | 44 +++++++++---------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index 9ee685630dd7..d8904625e0ea 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -82,47 +82,45 @@ private module VirtualDispatch { exists(LoadInstruction load, GlobalOrNamespaceVariable var | var = src.asVariable() and other.asInstruction() = load and + addressOfGlobal(load.getSourceAddress(), var) and // The `allowFromArg` concept doesn't play a role when `src` is a // global variable, so we just set it to a single arbitrary value for // performance. allowFromArg = true - | - // Load directly from the global variable - load.getSourceAddress().(VariableAddressInstruction).getASTVariable() = var - or - // Load from a field on a global union - exists(FieldAddressInstruction fa | - fa = load.getSourceAddress() and - fa.getObjectAddress().(VariableAddressInstruction).getASTVariable() = var and - fa.getField().getDeclaringType() instanceof Union - ) ) or - // Flow from store to global variable. These cases are similar to the - // above but have `StoreInstruction` instead of `LoadInstruction` and - // have the roles swapped between `other` and `src`. + // Flow from store to global variable. exists(StoreInstruction store, GlobalOrNamespaceVariable var | var = other.asVariable() and store = src.asInstruction() and + storeIntoGlobal(store, var) and // Setting `allowFromArg` to `true` like in the base case means we // treat a store to a global variable like the dispatch itself: flow // may come from anywhere. allowFromArg = true - | - // Store directly to the global variable - store.getDestinationAddress().(VariableAddressInstruction).getASTVariable() = var - or - // Store to a field on a global union - exists(FieldAddressInstruction fa | - fa = store.getDestinationAddress() and - fa.getObjectAddress().(VariableAddressInstruction).getASTVariable() = var and - fa.getField().getDeclaringType() instanceof Union - ) ) ) } } + pragma[noinline] + private predicate storeIntoGlobal(StoreInstruction store, GlobalOrNamespaceVariable var) { + addressOfGlobal(store.getDestinationAddress(), var) + } + + /** Holds if `addressInstr` is an instruction that produces the address of `var`. */ + private predicate addressOfGlobal(Instruction addressInstr, GlobalOrNamespaceVariable var) { + // Access directly to the global variable + addressInstr.(VariableAddressInstruction).getASTVariable() = var + or + // Access to a field on a global union + exists(FieldAddressInstruction fa | + fa = addressInstr and + fa.getObjectAddress().(VariableAddressInstruction).getASTVariable() = var and + fa.getField().getDeclaringType() instanceof Union + ) + } + /** * A ReturnNode with its ReturnKind and its enclosing callable. *