Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
21eb00a
[CPP-370] Minor fix to QHELP file.
zlaski-semmle Apr 15, 2019
0c86d4c
[CPP-370] Tentative implementation of NonConstantFormat.ql using the …
zlaski-semmle Apr 23, 2019
ffddc5b
[CPP-370] Update the NonConstantFormat.expected result template.
zlaski-semmle Apr 24, 2019
de10598
[CPP-370] NonConstantFormat.expected changed for some reason.
zlaski-semmle Apr 24, 2019
775861c
[CPP-370] Minor textual tweaks.
zlaski-semmle Apr 24, 2019
012140f
[CPP-370] Reformat query.
zlaski-semmle Apr 26, 2019
fae55d5
[CPP-370] First attempt at isAdditionalFlowStep().
zlaski-semmle Apr 28, 2019
a962cff
[CPP-370] Intermediate commit, file not in usable state.
zlaski-semmle Apr 29, 2019
ed67c9f
[CPP-370] Rewrite of NonConstantFormat.ql using the taint tracking l…
zlaski-semmle May 8, 2019
b205951
[CPP-370] Reformat test cases so that the .expect files line up with …
zlaski-semmle May 14, 2019
99047e5
[CPP-370] Exclude UserDefinedFormattingFunction nodes.
zlaski-semmle May 15, 2019
1fce5a5
[CPP-370] Revert accidental changes to Printf.qll (which belong on zl…
zlaski-semmle May 15, 2019
8faf95e
[CPP-370] Tentatively modify CWE consts.cpp file to play nice with th…
zlaski-semmle May 16, 2019
f6903c7
[CPP-370] Remove prohibition against UserDefinedFormattingFunction
zlaski-semmle May 16, 2019
6025c03
[CPP-370] Add `nested.cpp` test case, for nested calls to `...printf`…
zlaski-semmle May 16, 2019
dbec17f
[CPP-370] Tentative implementation of NonConstantFormat.ql using the …
zlaski-semmle Apr 23, 2019
d8b8dda
[CPP-370] First attempt at isAdditionalFlowStep().
zlaski-semmle Apr 28, 2019
91902e5
[CPP-370] Intermediate commit, file not in usable state.
zlaski-semmle Apr 29, 2019
098b654
[CPP-370] Rewrite of NonConstantFormat.ql using the taint tracking l…
zlaski-semmle May 8, 2019
92054e2
[CPP-370] Reformat test cases so that the .expect files line up with …
zlaski-semmle May 14, 2019
a49d82d
[CPP-370] Exclude UserDefinedFormattingFunction nodes.
zlaski-semmle May 15, 2019
f19f48d
[CPP-370] Revert accidental changes to Printf.qll (which belong on zl…
zlaski-semmle May 15, 2019
ae55b7b
[CPP-370] Add new test file for testing procedurally nested format
zlaski-semmle May 20, 2019
81bfbc2
[CPP-370] Forgot to update an .expected file.
zlaski-semmle May 21, 2019
154b9aa
[CPP-370] Reformat both Print.qll files.
zlaski-semmle May 22, 2019
46b6eac
[CPP-370] An .expected file is mismatched again. Not sure why
zlaski-semmle May 22, 2019
51e543a
Merge branch 'master' into zlaski/cpp370
zlaski-semmle Jun 4, 2019
8f79cdb
[CPP-370] Add an additional test case.
zlaski-semmle Jun 4, 2019
0f5a4a7
[CPP-370] Improve handling of `_` macros by using taint sanitizers.
zlaski-semmle Jun 10, 2019
88a39d9
[CPP-370] Fix up `// GOOD` and `// BAD` test annotations so that they…
zlaski-semmle Jun 12, 2019
e99c688
C++: Demonstrate ArrayExpr FP
jbj Jun 20, 2019
cace411
C++: NonConstantFormat taint only for string types
jbj Jun 20, 2019
bc98a80
Merge pull request #1 from jbj/NonConstantFormat-ArrayExpr
zlaski-semmle Jun 28, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/ql/src/Likely Bugs/Format/NonConstantFormat.qhelp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<qhelp>
<overview>
<p>The <code>printf</code> function, related functions like <code>sprintf</code> and <code>fprintf</code>,
and other functions built atop <code>vprintf</code> all accept a format string as their first argument.
and other functions built atop <code>vprintf</code> all accept a format string as one of their arguments.
When such format strings are literal constants, it is easy for the programmer (and static analysis tools)
to verify that the format specifiers (such as <code>%s</code> and <code>%02x</code>) in the format string
are compatible with the trailing arguments of the function call. When such format strings are not literal
Expand Down
256 changes: 90 additions & 166 deletions cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql
Original file line number Diff line number Diff line change
Expand Up @@ -13,48 +13,37 @@
* security
* external/cwe/cwe-134
*/
import cpp

/**
* Holds if `t` is a character array or pointer, where `charType` is the type of
* characters in `t`.
*/
predicate stringType(Type t, Type charType) {
(
( charType = t.(PointerType).getBaseType() or
charType = t.(ArrayType).getBaseType()
) and (
charType.getUnspecifiedType() instanceof CharType or
charType.getUnspecifiedType() instanceof Wchar_t
)
)
or
stringType(t.getUnderlyingType(), charType)
or
stringType(t.(SpecifiedType).getBaseType(), charType)
}
import semmle.code.cpp.dataflow.TaintTracking
import semmle.code.cpp.commons.Printf

predicate gettextFunction(Function f, int arg) {
// For the following `...gettext` functions, we assume that
// all translations preserve the type and order of `%` specifiers
// (and hence are safe to use as format strings). This
// assumption is hard-coded into the query.
predicate whitelistFunction(Function f, int arg) {
// basic variations of gettext
(f.getName() = "_" and arg = 0) or
(f.getName() = "gettext" and arg = 0) or
(f.getName() = "dgettext" and arg = 1) or
(f.getName() = "dcgettext" and arg = 1) or
f.getName() = "_" and arg = 0
or
f.getName() = "gettext" and arg = 0
or
f.getName() = "dgettext" and arg = 1
or
f.getName() = "dcgettext" and arg = 1
or
// plural variations of gettext that take one format string for singular and another for plural form
(f.getName() = "ngettext" and (arg = 0 or arg = 1)) or
(f.getName() = "dngettext" and (arg = 1 or arg = 2)) or
(f.getName() = "dcngettext" and (arg = 1 or arg = 2))
}

predicate stringArray(Variable arr, AggregateLiteral init) {
arr.getInitializer().getExpr() = init and
stringType(arr.getUnspecifiedType().(ArrayType).getBaseType(), _)
// Ideally, this predicate should also check that no item of `arr` is ever
// reassigned, but such an analysis could get fairly complicated. Instead, we
// just hope that nobody would initialize an array of constants and then
// overwrite some of them with untrusted data.
f.getName() = "ngettext" and
(arg = 0 or arg = 1)
or
f.getName() = "dngettext" and
(arg = 1 or arg = 2)
or
f.getName() = "dcngettext" and
(arg = 1 or arg = 2)
}

// we assume that ALL uses of the `_` macro
// return constant string literals
predicate underscoreMacro(Expr e) {
exists(MacroInvocation mi |
mi.getMacroName() = "_" and
Expand All @@ -63,156 +52,91 @@ predicate underscoreMacro(Expr e) {
}

/**
* Holds if a value of character pointer type may flow _directly_ from `src` to
* `dst`.
* Holds if `t` cannot hold a character array, directly or indirectly.
*/
predicate stringFlowStep(Expr src, Expr dst) {
not underscoreMacro(dst)
and
stringType(dst.getType(), _)
and
(
src = dst.(VariableAccess).getTarget().getAnAssignedValue()
predicate cannotContainString(Type t) {
t.getUnspecifiedType() instanceof BuiltInType
or
t.getUnspecifiedType() instanceof IntegralOrEnumType
}

predicate isNonConst(DataFlow::Node node) {
exists(Expr e | e = node.asExpr() |
exists(FunctionCall fc | fc = e.(FunctionCall) |
not (
whitelistFunction(fc.getTarget(), _) or
fc.getTarget().hasDefinition()
)
)
or
src = dst.(ConditionalExpr).getThen()
exists(Parameter p | p = e.(VariableAccess).getTarget().(Parameter) |
p.getFunction().getName() = "main" and p.getType() instanceof PointerType
)
or
src = dst.(ConditionalExpr).getElse()
e instanceof CrementOperation
or
src = dst.(AssignExpr).getRValue()
e instanceof AddressOfExpr
or
src = dst.(CommaExpr).getRightOperand()
e instanceof ReferenceToExpr
or
src = dst.(UnaryPlusExpr).getOperand()
e instanceof AssignPointerAddExpr
or
stringArray(dst.(ArrayExpr).getArrayBase().(VariableAccess).getTarget(),
src.getParent())
e instanceof AssignPointerSubExpr
or
// Follow a parameter to its arguments in all callers.
exists(Parameter p | p = dst.(VariableAccess).getTarget() |
src = p.getFunction().getACallToThisFunction().getArgument(p.getIndex())
)
e instanceof PointerArithmeticOperation
or
// Follow a call to a gettext function without looking at its body even if
// the body is known. This ensures that we report the error in the relevant
// location rather than inside the body of some `_` function.
exists(Function gettext, FunctionCall call, int idx |
gettextFunction(gettext, idx) and
dst = call and
gettext = call.getTarget()
| src = call.getArgument(idx)
)
e instanceof FieldAccess
or
// Follow a call to a non-gettext function through its return statements.
exists(Function f, ReturnStmt retStmt |
f = dst.(FunctionCall).getTarget() and
f = retStmt.getEnclosingFunction() and
not gettextFunction(f, _)
| src = retStmt.getExpr()
e instanceof PointerDereferenceExpr
or
e instanceof AddressOfExpr
or
e instanceof ExprCall
or
e instanceof NewArrayExpr
or
e instanceof AssignExpr
or
exists(Variable v | v = e.(VariableAccess).getTarget() |
v.getType().(ArrayType).getBaseType() instanceof CharType and
exists(AssignExpr ae |
ae.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v
)
)
)
}

/** Holds if `v` may be written to, other than through `AssignExpr`. */
predicate nonConstVariable(Variable v) {
exists(Type charType |
stringType(v.getType(), charType) and not charType.isConst()
)
or
exists(AssignPointerAddExpr e |
e.getLValue().(VariableAccess).getTarget() = v
)
or
exists(AssignPointerSubExpr e |
e.getLValue().(VariableAccess).getTarget() = v
)
or
exists(CrementOperation e | e.getOperand().(VariableAccess).getTarget() = v)
or
exists(AddressOfExpr e | e.getOperand().(VariableAccess).getTarget() = v)
or
exists(ReferenceToExpr e | e.getExpr().(VariableAccess).getTarget() = v)
node instanceof DataFlow::DefinitionByReferenceNode
}

/**
* Holds if this expression is _directly_ considered non-constant for the
* purpose of this query.
*
* Each case of `Expr` that may have string type should be listed either in
* `nonConst` or `stringFlowStep`. Omitting a case leads to false negatives.
* Having a case in both places leads to unnecessary computation.
*/
predicate nonConst(Expr e) {
nonConstVariable(e.(VariableAccess).getTarget())
or
e instanceof CrementOperation
or
e instanceof AssignPointerAddExpr
or
e instanceof AssignPointerSubExpr
or
e instanceof PointerArithmeticOperation
or
e instanceof FieldAccess
or
e instanceof PointerDereferenceExpr
pragma[noinline]
predicate isSanitizerNode(DataFlow::Node node) {
underscoreMacro(node.asExpr())
or
e instanceof AddressOfExpr
or
e instanceof ExprCall
or
exists(ArrayExpr ae | ae = e |
not stringArray(ae.getArrayBase().(VariableAccess).getTarget(), _)
)
or
e instanceof NewArrayExpr
or
// e is a call to a function whose definition we cannot see. We assume it is
// not constant.
exists(Function f | f = e.(FunctionCall).getTarget() |
not f.hasDefinition()
)
or
// e is a parameter of a function that's never called. If it were called, we
// would instead have followed the call graph in `stringFlowStep`.
exists(Function f
| f = e.(VariableAccess).getTarget().(Parameter).getFunction()
| not exists(f.getACallToThisFunction())
)
cannotContainString(node.getType())
}

predicate formattingFunctionArgument(
FormattingFunction ff, FormattingFunctionCall fc, Expr arg)
{
fc.getTarget() = ff and
fc.getArgument(ff.getFormatParameterIndex()) = arg and
// Don't look for errors inside functions that are themselves formatting
// functions. We expect that the interesting errors will be in their callers.
not fc.getEnclosingFunction() instanceof FormattingFunction
}
class NonConstFlow extends TaintTracking::Configuration {
NonConstFlow() { this = "NonConstFlow" }

// Reflexive-transitive closure of `stringFlow`, restricting the base case to
// only consider destination expressions that are arguments to formatting
// functions.
predicate stringFlow(Expr src, Expr dst) {
formattingFunctionArgument(_, _, dst) and src = dst
or
exists(Expr mid | stringFlowStep(src, mid) and stringFlow(mid, dst))
}
override predicate isSource(DataFlow::Node source) {
isNonConst(source) and
not cannotContainString(source.getType())
}

predicate whitelisted(Expr e) {
gettextFunction(e.(FunctionCall).getTarget(), _)
or
underscoreMacro(e)
}
override predicate isSink(DataFlow::Node sink) {
exists(FormattingFunctionCall fc | sink.asExpr() = fc.getArgument(fc.getFormatParameterIndex()))
}

predicate flowFromNonConst(Expr src, Expr dst) {
stringFlow(src, dst) and
nonConst(src) and
not whitelisted(src)
override predicate isSanitizer(DataFlow::Node node) { isSanitizerNode(node) }
}

from FormattingFunctionCall fc, FormattingFunction ff, Expr format
where formattingFunctionArgument(ff, fc, format) and
flowFromNonConst(_, format) and
not fc.isInMacroExpansion()
select format, "The format string argument to " + ff.getName() + " should be constant to prevent security issues and other potential errors."
from FormattingFunctionCall call, Expr formatString
where
call.getArgument(call.getFormatParameterIndex()) = formatString and
exists(NonConstFlow cf, DataFlow::Node source, DataFlow::Node sink |
cf.hasFlow(source, sink) and
sink.asExpr() = formatString
)
select formatString,
"The format string argument to " + call.getTarget().getName() +
" should be constant to prevent security issues and other potential errors."
Loading