Skip to content

Commit

Permalink
C++: Count return dispatch based on 2nd level scopes.
Browse files Browse the repository at this point in the history
  • Loading branch information
aschackmull committed Apr 15, 2024
1 parent 9e39be5 commit db6d27b
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ module CppDataFlow implements InputSig<Location> {

predicate getAdditionalFlowIntoCallNodeTerm = Private::getAdditionalFlowIntoCallNodeTerm/2;

predicate getSecondLevelScope = Private::getSecondLevelScope/1;

predicate validParameterAliasStep = Private::validParameterAliasStep/2;

predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1263,3 +1263,74 @@ predicate validParameterAliasStep(Node node1, Node node2) {
)
)
}

private predicate isTopLevel(Cpp::Stmt s) { any(Function f).getBlock().getAStmt() = s }

private Cpp::Stmt getAChainedBranch(Cpp::IfStmt s) {
result = s.getThen()
or
exists(Cpp::Stmt elseBranch | s.getElse() = elseBranch |
result = getAChainedBranch(elseBranch)
or
result = elseBranch and not elseBranch instanceof Cpp::IfStmt
)
}

private Instruction getInstruction(Node n) {
result = n.asInstruction() or
result = n.asOperand().getUse() or
result = n.(SsaPhiNode).getPhiNode().getBasicBlock().getFirstInstruction() or
n.(IndirectInstruction).hasInstructionAndIndirectionIndex(result, _) or
result = getInstruction(n.(PostUpdateNode).getPreUpdateNode())
}

private newtype TDataFlowSecondLevelScope =
TTopLevelIfBranch(Cpp::Stmt s) {
exists(Cpp::IfStmt ifstmt | s = getAChainedBranch(ifstmt) and isTopLevel(ifstmt))
} or
TTopLevelSwitchCase(Cpp::SwitchCase s) {
exists(Cpp::SwitchStmt switchstmt | s = switchstmt.getASwitchCase() and isTopLevel(switchstmt))
}

/**
* A second-level control-flow scope in a `switch` or a chained `if` statement.
*
* This is a `switch` case or a branch of a chained `if` statement, given that
* the `switch` or `if` statement is top level, that is, it is not nested inside
* other CFG constructs.
*/
class DataFlowSecondLevelScope extends TDataFlowSecondLevelScope {
/** Gets a textual representation of this element. */
string toString() {
exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s.toString())
or
exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.toString())
}

/** Gets the primary location of this element. */
Cpp::Location getLocation() {
exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s.getLocation())
or
exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.getLocation())
}

/**
* Gets a statement directly contained in this scope. For an `if` branch, this
* is the branch itself, and for a `switch case`, this is one the statements
* of that case branch.
*/
private Cpp::Stmt getAStmt() {
exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s)
or
exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.getAStmt())
}

/** Gets a data-flow node nested within this scope. */
Node getANode() {
getInstruction(result).getAst().(Cpp::ControlFlowNode).getEnclosingStmt().getParentStmt*() =
this.getAStmt()
}
}

/** Gets the second-level scope containing the node `n`, if any. */
DataFlowSecondLevelScope getSecondLevelScope(Node n) { result.getANode() = n }
18 changes: 18 additions & 0 deletions shared/dataflow/codeql/dataflow/DataFlow.qll
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,24 @@ signature module InputSig<LocationSig Location> {
*/
default int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { none() }

/**
* A second-level control-flow scope in a callable.
*
* This is used to provide a more fine-grained separation of a callable
* context for the purpose of identifying uncertain control flow. For most
* languages, this is not needed, as this separation is handled through
* virtual dispatch, but for some cases (for example, C++) this can be used to
* identify, for example, large top-level switch statements acting like
* virtual dispatch.
*/
class DataFlowSecondLevelScope {
/** Gets a textual representation of this element. */
string toString();
}

/** Gets the second-level scope containing the node `n`, if any. */
default DataFlowSecondLevelScope getSecondLevelScope(Node n) { none() }

bindingset[call, p, arg]
default predicate golangSpecificParamArgFilter(
DataFlowCall call, ParameterNode p, ArgumentNode arg
Expand Down
30 changes: 24 additions & 6 deletions shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -1113,28 +1113,46 @@ module MakeImpl<LocationSig Location, InputSig<Location> Lang> {
result = getAdditionalFlowIntoCallNodeTerm(arg.projectToNode(), p.projectToNode())
}

private module SndLevelScopeOption = Option<DataFlowSecondLevelScope>;

private class SndLevelScopeOption = SndLevelScopeOption::Option;

pragma[nomagic]
private SndLevelScopeOption getScope(RetNodeEx ret) {
result = SndLevelScopeOption::some(getSecondLevelScope(ret.asNode()))
or
result instanceof SndLevelScopeOption::None and not exists(getSecondLevelScope(ret.asNode()))
}

pragma[nomagic]
private predicate returnCallEdge1(DataFlowCallable c, DataFlowCall call, NodeEx out) {
private predicate returnCallEdge1(
DataFlowCallable c, SndLevelScopeOption scope, DataFlowCall call, NodeEx out
) {
exists(RetNodeEx ret |
flowOutOfCallNodeCand1(call, ret, _, out) and c = ret.getEnclosingCallable()
flowOutOfCallNodeCand1(call, ret, _, out) and
c = ret.getEnclosingCallable() and
scope = getScope(ret)
)
}

private int simpleDispatchFanoutOnReturn(DataFlowCall call, NodeEx out) {
result = strictcount(DataFlowCallable c | returnCallEdge1(c, call, out))
result =
strictcount(DataFlowCallable c, SndLevelScopeOption scope |
returnCallEdge1(c, scope, call, out)
)
}

private int ctxDispatchFanoutOnReturn(NodeEx out, DataFlowCall ctx) {
exists(DataFlowCall call, DataFlowCallable c |
simpleDispatchFanoutOnReturn(call, out) > 1 and
not Stage1::revFlow(out, false) and
call.getEnclosingCallable() = c and
returnCallEdge1(c, ctx, _) and
returnCallEdge1(c, _, ctx, _) and
mayBenefitFromCallContextExt(call, _) and
result =
count(DataFlowCallable tgt |
count(DataFlowCallable tgt, SndLevelScopeOption scope |
tgt = viableImplInCallContextExt(call, ctx) and
returnCallEdge1(tgt, call, out)
returnCallEdge1(tgt, scope, call, out)
)
)
}
Expand Down

0 comments on commit db6d27b

Please sign in to comment.