-
Notifications
You must be signed in to change notification settings - Fork 2k
C++: Rudimentary support for IR data flow virtual dispatch #2262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| private import cpp | ||
| private import semmle.code.cpp.ir.IR | ||
| private import semmle.code.cpp.ir.dataflow.DataFlow | ||
|
|
||
| Function viableImpl(CallInstruction call) { result = viableCallable(call) } | ||
|
|
||
|
|
@@ -20,6 +21,58 @@ Function viableCallable(CallInstruction call) { | |
| functionSignatureWithBody(qualifiedName, nparams, result) and | ||
| strictcount(Function other | functionSignatureWithBody(qualifiedName, nparams, other)) = 1 | ||
| ) | ||
| or | ||
| // Rudimentary virtual dispatch support. It's essentially local data flow | ||
| // where the source is a derived-to-base conversion and the target is the | ||
| // qualifier of a call. | ||
| exists(Class derived, DataFlow::Node thisArgument | | ||
| nodeMayHaveClass(derived, thisArgument) and | ||
| overrideMayAffectCall(derived, thisArgument, _, result, call) | ||
| ) | ||
| } | ||
|
|
||
| /** | ||
| * Holds if `call` is a virtual function call with qualifier `thisArgument` in | ||
| * `enclosingFunction`, whose static target is overridden by | ||
| * `overridingFunction` in `overridingClass`. | ||
| */ | ||
| pragma[noinline] | ||
| private predicate overrideMayAffectCall( | ||
| Class overridingClass, DataFlow::Node thisArgument, Function enclosingFunction, | ||
| MemberFunction overridingFunction, CallInstruction call | ||
| ) { | ||
| call.getEnclosingFunction() = enclosingFunction and | ||
| overridingFunction.getAnOverriddenFunction+() = call.getStaticCallTarget() and | ||
| overridingFunction.getDeclaringType() = overridingClass and | ||
| thisArgument = DataFlow::instructionNode(call.getThisArgument()) | ||
| } | ||
|
|
||
| /** | ||
| * Holds if `node` may have dynamic class `derived`, where `derived` is a class | ||
| * that may affect virtual dispatch within the enclosing function. | ||
| * | ||
| * For the sake of performance, this recursion is written out manually to make | ||
| * it a relation on `Class x Node` rather than `Node x Node` or `MemberFunction | ||
| * x Node`, both of which would be larger. It's a forward search since there | ||
| * should usually be fewer classes than calls. | ||
| * | ||
| * If a value is cast several classes up in the hierarchy, that will be modeled | ||
| * as a chain of `ConvertToBaseInstruction`s and will cause the search to start | ||
| * from each of them and pass through subsequent ones. There might be | ||
| * performance to gain by stopping before a second upcast and reconstructing | ||
| * the full chain in a "big-step" recursion after this one. | ||
| */ | ||
| private predicate nodeMayHaveClass(Class derived, DataFlow::Node node) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given a function declared
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The latter. To make this interprocedural will be follow-up work. I don't think we should make it overapproximating to begin with since the goal for now is just to replace |
||
| exists(ConvertToBaseInstruction toBase | | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should probably also consider |
||
| derived = toBase.getDerivedClass() and | ||
| overrideMayAffectCall(derived, _, toBase.getEnclosingFunction(), _, _) and | ||
| node.asInstruction() = toBase | ||
| ) | ||
| or | ||
| exists(DataFlow::Node prev | | ||
| nodeMayHaveClass(derived, prev) and | ||
| DataFlow::localFlowStep(prev, node) | ||
| ) | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| int source(); | ||
| void sink(int); | ||
|
|
||
| // This class has the opposite behavior of what the member function names suggest. | ||
| struct Top { | ||
| virtual int isSource1() { return 0; } | ||
| virtual int isSource2() { return 0; } | ||
| virtual void isSink(int x) { } | ||
| virtual int notSource1() { return source(); } | ||
| virtual int notSource2() { return source(); } | ||
| virtual void notSink(int x) { sink(x); } | ||
| }; | ||
|
|
||
| // This class has the correct behavior for just the functions ending in 2. | ||
| struct Middle : Top { | ||
| int isSource2() override { return source(); } | ||
| int notSource2() override { return 0; } | ||
| }; | ||
|
|
||
| // This class has all the behavior suggested by the function names. | ||
| struct Bottom : Middle { | ||
| int isSource1() override { return source(); } | ||
| void isSink(int x) override { sink(x); } | ||
| int notSource1() override { return 0; } | ||
| void notSink(int x) override { } | ||
| }; | ||
|
|
||
| void VirtualDispatch(Bottom *bottomPtr, Bottom &bottomRef) { | ||
| Top *topPtr = bottomPtr, &topRef = bottomRef; | ||
|
|
||
| sink(topPtr->isSource1()); // flow [NOT DETECTED by AST] | ||
| sink(topPtr->isSource2()); // flow [NOT DETECTED by AST] | ||
| topPtr->isSink(source()); // flow [NOT DETECTED by AST] | ||
|
|
||
| sink(topPtr->notSource1()); // no flow [FALSE POSITIVE] | ||
| sink(topPtr->notSource2()); // no flow [FALSE POSITIVE] | ||
| topPtr->notSink(source()); // no flow [FALSE POSITIVE] | ||
|
|
||
| sink(topRef.isSource1()); // flow [NOT DETECTED by AST] | ||
| sink(topRef.isSource2()); // flow [NOT DETECTED by AST] | ||
| topRef.isSink(source()); // flow [NOT DETECTED by AST] | ||
|
|
||
| sink(topRef.notSource1()); // no flow [FALSE POSITIVE] | ||
| sink(topRef.notSource2()); // no flow [FALSE POSITIVE] | ||
| topRef.notSink(source()); // no flow [FALSE POSITIVE] | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't there also a precision improvement to be made there, if there's a cast through more than one class that overrides the same virtual function?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes.