diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll index 0753dfd266e3..e0135e0ad2f0 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll @@ -2,6 +2,7 @@ import cpp import semmle.code.cpp.security.Security private import semmle.code.cpp.ir.dataflow.DataFlow private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.dataflow.internal.DataFlowDispatch as Dispatch /** * A predictable instruction is one where an external user can predict @@ -145,7 +146,8 @@ GlobalOrNamespaceVariable globalVarFromId(string id) { } Function resolveCall(Call call) { - // TODO: improve virtual dispatch. This will help in the test for - // `UncontrolledProcessOperation.ql`. - result = call.getTarget() + exists(CallInstruction callInstruction | + callInstruction.getAST() = call and + result = Dispatch::viableCallable(callInstruction) + ) } diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index d0325a28d3e9..9572639de595 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -1,5 +1,6 @@ private import cpp private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.dataflow.DataFlow Function viableImpl(CallInstruction call) { result = viableCallable(call) } @@ -20,6 +21,58 @@ Function viableCallable(CallInstruction call) { functionSignatureWithBody(qualifiedName, nparams, result) and strictcount(Function other | functionSignatureWithBody(qualifiedName, nparams, other)) = 1 ) + or + // Rudimentary virtual dispatch support. It's essentially local data flow + // where the source is a derived-to-base conversion and the target is the + // qualifier of a call. + exists(Class derived, DataFlow::Node thisArgument | + nodeMayHaveClass(derived, thisArgument) and + overrideMayAffectCall(derived, thisArgument, _, result, call) + ) +} + +/** + * Holds if `call` is a virtual function call with qualifier `thisArgument` in + * `enclosingFunction`, whose static target is overridden by + * `overridingFunction` in `overridingClass`. + */ +pragma[noinline] +private predicate overrideMayAffectCall( + Class overridingClass, DataFlow::Node thisArgument, Function enclosingFunction, + MemberFunction overridingFunction, CallInstruction call +) { + call.getEnclosingFunction() = enclosingFunction and + overridingFunction.getAnOverriddenFunction+() = call.getStaticCallTarget() and + overridingFunction.getDeclaringType() = overridingClass and + thisArgument = DataFlow::instructionNode(call.getThisArgument()) +} + +/** + * Holds if `node` may have dynamic class `derived`, where `derived` is a class + * that may affect virtual dispatch within the enclosing function. + * + * For the sake of performance, this recursion is written out manually to make + * it a relation on `Class x Node` rather than `Node x Node` or `MemberFunction + * x Node`, both of which would be larger. It's a forward search since there + * should usually be fewer classes than calls. + * + * If a value is cast several classes up in the hierarchy, that will be modeled + * as a chain of `ConvertToBaseInstruction`s and will cause the search to start + * from each of them and pass through subsequent ones. There might be + * performance to gain by stopping before a second upcast and reconstructing + * the full chain in a "big-step" recursion after this one. + */ +private predicate nodeMayHaveClass(Class derived, DataFlow::Node node) { + exists(ConvertToBaseInstruction toBase | + derived = toBase.getDerivedClass() and + overrideMayAffectCall(derived, _, toBase.getEnclosingFunction(), _, _) and + node.asInstruction() = toBase + ) + or + exists(DataFlow::Node prev | + nodeMayHaveClass(derived, prev) and + DataFlow::localFlowStep(prev, node) + ) } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index cd989c947103..4e84424fcb7a 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -205,7 +205,8 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction iTo.(CopyInstruction).getSourceValue() = iFrom or iTo.(PhiInstruction).getAnOperand().getDef() = iFrom or // Treat all conversions as flow, even conversions between different numeric types. - iTo.(ConvertInstruction).getUnary() = iFrom + iTo.(ConvertInstruction).getUnary() = iFrom or + iTo.(InheritanceConversionInstruction).getUnary() = iFrom } /** diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp new file mode 100644 index 000000000000..5e4f2f97f465 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp @@ -0,0 +1,46 @@ +int source(); +void sink(int); + +// This class has the opposite behavior of what the member function names suggest. +struct Top { + virtual int isSource1() { return 0; } + virtual int isSource2() { return 0; } + virtual void isSink(int x) { } + virtual int notSource1() { return source(); } + virtual int notSource2() { return source(); } + virtual void notSink(int x) { sink(x); } +}; + +// This class has the correct behavior for just the functions ending in 2. +struct Middle : Top { + int isSource2() override { return source(); } + int notSource2() override { return 0; } +}; + +// This class has all the behavior suggested by the function names. +struct Bottom : Middle { + int isSource1() override { return source(); } + void isSink(int x) override { sink(x); } + int notSource1() override { return 0; } + void notSink(int x) override { } +}; + +void VirtualDispatch(Bottom *bottomPtr, Bottom &bottomRef) { + Top *topPtr = bottomPtr, &topRef = bottomRef; + + sink(topPtr->isSource1()); // flow [NOT DETECTED by AST] + sink(topPtr->isSource2()); // flow [NOT DETECTED by AST] + topPtr->isSink(source()); // flow [NOT DETECTED by AST] + + sink(topPtr->notSource1()); // no flow [FALSE POSITIVE] + sink(topPtr->notSource2()); // no flow [FALSE POSITIVE] + topPtr->notSink(source()); // no flow [FALSE POSITIVE] + + sink(topRef.isSource1()); // flow [NOT DETECTED by AST] + sink(topRef.isSource2()); // flow [NOT DETECTED by AST] + topRef.isSink(source()); // flow [NOT DETECTED by AST] + + sink(topRef.notSource1()); // no flow [FALSE POSITIVE] + sink(topRef.notSource2()); // no flow [FALSE POSITIVE] + topRef.notSink(source()); // no flow [FALSE POSITIVE] +} diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected index 6527db4b77e8..24fa6fdb5bd9 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected @@ -15,6 +15,12 @@ | clang.cpp:30:27:30:34 | call to getFirst | clang.cpp:28:27:28:32 | call to source | | clang.cpp:37:10:37:11 | m2 | clang.cpp:34:32:34:37 | call to source | | clang.cpp:45:17:45:18 | m2 | clang.cpp:43:35:43:40 | call to source | +| dispatch.cpp:11:38:11:38 | x | dispatch.cpp:37:19:37:24 | call to source | +| dispatch.cpp:11:38:11:38 | x | dispatch.cpp:45:18:45:23 | call to source | +| dispatch.cpp:35:16:35:25 | call to notSource1 | dispatch.cpp:9:37:9:42 | call to source | +| dispatch.cpp:36:16:36:25 | call to notSource2 | dispatch.cpp:10:37:10:42 | call to source | +| dispatch.cpp:43:15:43:24 | call to notSource1 | dispatch.cpp:9:37:9:42 | call to source | +| dispatch.cpp:44:15:44:24 | call to notSource2 | dispatch.cpp:10:37:10:42 | call to source | | lambdas.cpp:14:3:14:6 | t | lambdas.cpp:8:10:8:15 | call to source | | lambdas.cpp:18:8:18:8 | call to operator() | lambdas.cpp:8:10:8:15 | call to source | | lambdas.cpp:21:3:21:6 | t | lambdas.cpp:8:10:8:15 | call to source | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected index 7d0d4e7d72e0..9b8be3abd1e9 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected @@ -5,6 +5,12 @@ | clang.cpp:28:27:28:32 | clang.cpp:29:27:29:28 | AST only | | clang.cpp:28:27:28:32 | clang.cpp:30:27:30:34 | AST only | | clang.cpp:39:42:39:47 | clang.cpp:41:18:41:19 | IR only | +| dispatch.cpp:16:37:16:42 | dispatch.cpp:32:16:32:24 | IR only | +| dispatch.cpp:16:37:16:42 | dispatch.cpp:40:15:40:23 | IR only | +| dispatch.cpp:22:37:22:42 | dispatch.cpp:31:16:31:24 | IR only | +| dispatch.cpp:22:37:22:42 | dispatch.cpp:39:15:39:23 | IR only | +| dispatch.cpp:33:18:33:23 | dispatch.cpp:23:38:23:38 | IR only | +| dispatch.cpp:41:17:41:22 | dispatch.cpp:23:38:23:38 | IR only | | lambdas.cpp:8:10:8:15 | lambdas.cpp:14:3:14:6 | AST only | | lambdas.cpp:8:10:8:15 | lambdas.cpp:18:8:18:8 | AST only | | lambdas.cpp:8:10:8:15 | lambdas.cpp:21:3:21:6 | AST only | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected index 9b67a013a58a..651e580a105f 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected @@ -12,6 +12,18 @@ | clang.cpp:37:10:37:11 | Load: m2 | clang.cpp:34:32:34:37 | Call: call to source | | clang.cpp:41:18:41:19 | Load: m2 | clang.cpp:39:42:39:47 | Call: call to source | | clang.cpp:45:17:45:18 | Load: m2 | clang.cpp:43:35:43:40 | Call: call to source | +| dispatch.cpp:11:38:11:38 | Load: x | dispatch.cpp:37:19:37:24 | Call: call to source | +| dispatch.cpp:11:38:11:38 | Load: x | dispatch.cpp:45:18:45:23 | Call: call to source | +| dispatch.cpp:23:38:23:38 | Load: x | dispatch.cpp:33:18:33:23 | Call: call to source | +| dispatch.cpp:23:38:23:38 | Load: x | dispatch.cpp:41:17:41:22 | Call: call to source | +| dispatch.cpp:31:16:31:24 | Call: call to isSource1 | dispatch.cpp:22:37:22:42 | Call: call to source | +| dispatch.cpp:32:16:32:24 | Call: call to isSource2 | dispatch.cpp:16:37:16:42 | Call: call to source | +| dispatch.cpp:35:16:35:25 | Call: call to notSource1 | dispatch.cpp:9:37:9:42 | Call: call to source | +| dispatch.cpp:36:16:36:25 | Call: call to notSource2 | dispatch.cpp:10:37:10:42 | Call: call to source | +| dispatch.cpp:39:15:39:23 | Call: call to isSource1 | dispatch.cpp:22:37:22:42 | Call: call to source | +| dispatch.cpp:40:15:40:23 | Call: call to isSource2 | dispatch.cpp:16:37:16:42 | Call: call to source | +| dispatch.cpp:43:15:43:24 | Call: call to notSource1 | dispatch.cpp:9:37:9:42 | Call: call to source | +| dispatch.cpp:44:15:44:24 | Call: call to notSource2 | dispatch.cpp:10:37:10:42 | Call: call to source | | test.cpp:7:8:7:9 | Load: t1 | test.cpp:6:12:6:17 | Call: call to source | | test.cpp:9:8:9:9 | Load: t1 | test.cpp:6:12:6:17 | Call: call to source | | test.cpp:10:8:10:9 | Load: t2 | test.cpp:6:12:6:17 | Call: call to source |