Skip to content

Commit ae352a4

Browse files
committed
[CPP-370] Tentative implementation of NonConstantFormat.ql using the global
DataFlow library. This is intended solely for further discussion.
1 parent 5b9482d commit ae352a4

File tree

4 files changed

+77
-213
lines changed

4 files changed

+77
-213
lines changed

cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql

Lines changed: 40 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -13,206 +13,62 @@
1313
* security
1414
* external/cwe/cwe-134
1515
*/
16-
import cpp
1716

18-
/**
19-
* Holds if `t` is a character array or pointer, where `charType` is the type of
20-
* characters in `t`.
21-
*/
22-
predicate stringType(Type t, Type charType) {
23-
(
24-
( charType = t.(PointerType).getBaseType() or
25-
charType = t.(ArrayType).getBaseType()
26-
) and (
27-
charType.getUnspecifiedType() instanceof CharType or
28-
charType.getUnspecifiedType() instanceof Wchar_t
29-
)
30-
)
31-
or
32-
stringType(t.getUnderlyingType(), charType)
33-
or
34-
stringType(t.(SpecifiedType).getBaseType(), charType)
35-
}
17+
import semmle.code.cpp.dataflow.DataFlow
18+
import semmle.code.cpp.commons.Printf
3619

37-
predicate gettextFunction(Function f, int arg) {
20+
predicate whitelistFunction(Function f, int arg) {
3821
// basic variations of gettext
39-
(f.getName() = "_" and arg = 0) or
40-
(f.getName() = "gettext" and arg = 0) or
41-
(f.getName() = "dgettext" and arg = 1) or
42-
(f.getName() = "dcgettext" and arg = 1) or
43-
// plural variations of gettext that take one format string for singular and another for plural form
44-
(f.getName() = "ngettext" and (arg = 0 or arg = 1)) or
45-
(f.getName() = "dngettext" and (arg = 1 or arg = 2)) or
46-
(f.getName() = "dcngettext" and (arg = 1 or arg = 2))
47-
}
48-
49-
predicate stringArray(Variable arr, AggregateLiteral init) {
50-
arr.getInitializer().getExpr() = init and
51-
stringType(arr.getType().getUnspecifiedType().(ArrayType).getBaseType(), _)
52-
// Ideally, this predicate should also check that no item of `arr` is ever
53-
// reassigned, but such an analysis could get fairly complicated. Instead, we
54-
// just hope that nobody would initialize an array of constants and then
55-
// overwrite some of them with untrusted data.
56-
}
57-
58-
predicate underscoreMacro(Expr e) {
59-
exists(MacroInvocation mi |
60-
mi.getMacroName() = "_" and
61-
mi.getExpr() = e
62-
)
63-
}
64-
65-
/**
66-
* Holds if a value of character pointer type may flow _directly_ from `src` to
67-
* `dst`.
68-
*/
69-
predicate stringFlowStep(Expr src, Expr dst) {
70-
not underscoreMacro(dst)
71-
and
72-
stringType(dst.getType(), _)
73-
and
74-
(
75-
src = dst.(VariableAccess).getTarget().getAnAssignedValue()
76-
or
77-
src = dst.(ConditionalExpr).getThen()
78-
or
79-
src = dst.(ConditionalExpr).getElse()
80-
or
81-
src = dst.(AssignExpr).getRValue()
82-
or
83-
src = dst.(CommaExpr).getRightOperand()
84-
or
85-
src = dst.(UnaryPlusExpr).getOperand()
86-
or
87-
stringArray(dst.(ArrayExpr).getArrayBase().(VariableAccess).getTarget(),
88-
src.getParent())
89-
or
90-
// Follow a parameter to its arguments in all callers.
91-
exists(Parameter p | p = dst.(VariableAccess).getTarget() |
92-
src = p.getFunction().getACallToThisFunction().getArgument(p.getIndex())
93-
)
94-
or
95-
// Follow a call to a gettext function without looking at its body even if
96-
// the body is known. This ensures that we report the error in the relevant
97-
// location rather than inside the body of some `_` function.
98-
exists(Function gettext, FunctionCall call, int idx |
99-
gettextFunction(gettext, idx) and
100-
dst = call and
101-
gettext = call.getTarget()
102-
| src = call.getArgument(idx)
103-
)
104-
or
105-
// Follow a call to a non-gettext function through its return statements.
106-
exists(Function f, ReturnStmt retStmt |
107-
f = dst.(FunctionCall).getTarget() and
108-
f = retStmt.getEnclosingFunction() and
109-
not gettextFunction(f, _)
110-
| src = retStmt.getExpr()
111-
)
112-
)
113-
}
114-
115-
/** Holds if `v` may be written to, other than through `AssignExpr`. */
116-
predicate nonConstVariable(Variable v) {
117-
exists(Type charType |
118-
stringType(v.getType(), charType) and not charType.isConst()
119-
)
22+
f.getName() = "_" and arg = 0
12023
or
121-
exists(AssignPointerAddExpr e |
122-
e.getLValue().(VariableAccess).getTarget() = v
123-
)
124-
or
125-
exists(AssignPointerSubExpr e |
126-
e.getLValue().(VariableAccess).getTarget() = v
127-
)
24+
f.getName() = "gettext" and arg = 0
12825
or
129-
exists(CrementOperation e | e.getOperand().(VariableAccess).getTarget() = v)
26+
f.getName() = "dgettext" and arg = 1
13027
or
131-
exists(AddressOfExpr e | e.getOperand().(VariableAccess).getTarget() = v)
28+
f.getName() = "dcgettext" and arg = 1
13229
or
133-
exists(ReferenceToExpr e | e.getExpr().(VariableAccess).getTarget() = v)
134-
}
135-
136-
/**
137-
* Holds if this expression is _directly_ considered non-constant for the
138-
* purpose of this query.
139-
*
140-
* Each case of `Expr` that may have string type should be listed either in
141-
* `nonConst` or `stringFlowStep`. Omitting a case leads to false negatives.
142-
* Having a case in both places leads to unnecessary computation.
143-
*/
144-
predicate nonConst(Expr e) {
145-
nonConstVariable(e.(VariableAccess).getTarget())
146-
or
147-
e instanceof CrementOperation
148-
or
149-
e instanceof AssignPointerAddExpr
150-
or
151-
e instanceof AssignPointerSubExpr
152-
or
153-
e instanceof PointerArithmeticOperation
154-
or
155-
e instanceof FieldAccess
156-
or
157-
e instanceof PointerDereferenceExpr
158-
or
159-
e instanceof AddressOfExpr
160-
or
161-
e instanceof ExprCall
162-
or
163-
exists(ArrayExpr ae | ae = e |
164-
not stringArray(ae.getArrayBase().(VariableAccess).getTarget(), _)
165-
)
166-
or
167-
e instanceof NewArrayExpr
30+
// plural variations of gettext that take one format string for singular and another for plural form
31+
f.getName() = "ngettext" and
32+
(arg = 0 or arg = 1)
16833
or
169-
// e is a call to a function whose definition we cannot see. We assume it is
170-
// not constant.
171-
exists(Function f | f = e.(FunctionCall).getTarget() |
172-
not f.hasDefinition()
173-
)
34+
f.getName() = "dngettext" and
35+
(arg = 1 or arg = 2)
17436
or
175-
// e is a parameter of a function that's never called. If it were called, we
176-
// would instead have followed the call graph in `stringFlowStep`.
177-
exists(Function f
178-
| f = e.(VariableAccess).getTarget().(Parameter).getFunction()
179-
| not exists(f.getACallToThisFunction())
180-
)
37+
f.getName() = "dcngettext" and
38+
(arg = 1 or arg = 2)
18139
}
18240

183-
predicate formattingFunctionArgument(
184-
FormattingFunction ff, FormattingFunctionCall fc, Expr arg)
185-
{
186-
fc.getTarget() = ff and
187-
fc.getArgument(ff.getFormatParameterIndex()) = arg and
188-
// Don't look for errors inside functions that are themselves formatting
189-
// functions. We expect that the interesting errors will be in their callers.
190-
not fc.getEnclosingFunction() instanceof FormattingFunction
41+
predicate whitelisted(Expr e) {
42+
exists(FunctionCall fc, int arg | fc = e.(FunctionCall) |
43+
whitelistFunction(fc.getTarget(), arg) and
44+
isConst(fc.getArgument(arg))
45+
)
19146
}
19247

193-
// Reflexive-transitive closure of `stringFlow`, restricting the base case to
194-
// only consider destination expressions that are arguments to formatting
195-
// functions.
196-
predicate stringFlow(Expr src, Expr dst) {
197-
formattingFunctionArgument(_, _, dst) and src = dst
48+
predicate isConst(Expr e) {
49+
e instanceof StringLiteral
19850
or
199-
exists(Expr mid | stringFlowStep(src, mid) and stringFlow(mid, dst))
51+
whitelisted(e)
20052
}
20153

202-
predicate whitelisted(Expr e) {
203-
gettextFunction(e.(FunctionCall).getTarget(), _)
204-
or
205-
underscoreMacro(e)
206-
}
54+
class ConstFlow extends DataFlow::Configuration {
55+
ConstFlow() { this = "ConstFlow" }
20756

208-
predicate flowFromNonConst(Expr src, Expr dst) {
209-
stringFlow(src, dst) and
210-
nonConst(src) and
211-
not whitelisted(src)
57+
override predicate isSource(DataFlow::Node source) {
58+
isConst(source.asExpr())
59+
}
60+
61+
override predicate isSink(DataFlow::Node sink) {
62+
exists(FormattingFunctionCall fc |
63+
sink.asExpr() = fc.getArgument(fc.getFormatParameterIndex())
64+
)
65+
}
21266
}
21367

214-
from FormattingFunctionCall fc, FormattingFunction ff, Expr format
215-
where formattingFunctionArgument(ff, fc, format) and
216-
flowFromNonConst(_, format) and
217-
not fc.isInMacroExpansion()
218-
select format, "The format string argument to " + ff.getName() + " should be constant to prevent security issues and other potential errors."
68+
from FormattingFunctionCall call, Expr formatString
69+
where call.getArgument(call.getFormatParameterIndex()) = formatString
70+
and not exists(ConstFlow cf, DataFlow::Node source, DataFlow::Node sink |
71+
cf.hasFlow(source, sink) and
72+
sink.asExpr() = formatString
73+
)
74+
select call, "The format string argument to " + call.getTarget().getQualifiedName() + " should be constant to prevent security issues and other potential errors."

cpp/ql/test/query-tests/Likely Bugs/Format/NonConstantFormat/NonConstantFormat.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
extern int printf(const char *fmt, ...);
22

3+
// For the following `...gettext` functions, we assume that
4+
// all translations preserve the type and order of `%` specifiers
5+
// (and hence are safe to use as format strings). This is
6+
// assumption is hard-coded into the query.
37

48
extern char *gettext (const char *__msgid);
59

@@ -8,7 +12,6 @@ extern char *dgettext (const char *__domainname, const char *__msgid);
812
extern char *dcgettext (const char *__domainname,
913
const char *__msgid, int __category);
1014

11-
1215
extern char *ngettext (const char *__msgid1, const char *__msgid2,
1316
unsigned long int __n);
1417

@@ -23,7 +26,9 @@ extern char *dcngettext (const char *__domainname, const char *__msgid1,
2326
extern char *any_random_function(const char *);
2427

2528
#define NULL ((void*)0)
26-
#define _(X) any_random_function((X))
29+
30+
// The following is the recommended use for the `_` macro.
31+
#define _(X) gettext(X)
2732

2833
int main(int argc, char **argv) {
2934
if(argc > 1)
@@ -40,10 +45,10 @@ int main(int argc, char **argv) {
4045
printf(gettext("%d arguments\n"), argc-1); // ok
4146
printf(any_random_function("%d arguments\n"), argc-1); // not ok
4247

43-
// Our query can't look inside the argument to a macro, so it fails to
44-
// flag this call.
48+
// Our query also supports looking for `_` as a function.
49+
#undef _
4550
printf(_(any_random_function("%d arguments\n")),
46-
argc-1); // not ok [NOT REPORTED]
51+
argc-1); // not ok
4752

4853
return 0;
4954
}
Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,22 @@
1-
| NonConstantFormat.c:30:10:30:16 | access to array | The format string argument to printf should be constant to prevent security issues and other potential errors. |
2-
| NonConstantFormat.c:41:9:41:27 | call to any_random_function | The format string argument to printf should be constant to prevent security issues and other potential errors. |
3-
| test.cpp:45:10:45:21 | call to make_message | The format string argument to printf should be constant to prevent security issues and other potential errors. |
4-
| test.cpp:50:12:50:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
5-
| test.cpp:51:12:51:12 | call to _ | The format string argument to printf should be constant to prevent security issues and other potential errors. |
6-
| test.cpp:52:12:52:18 | call to gettext | The format string argument to printf should be constant to prevent security issues and other potential errors. |
7-
| test.cpp:53:12:53:21 | call to const_wash | The format string argument to printf should be constant to prevent security issues and other potential errors. |
8-
| test.cpp:54:12:54:26 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
9-
| test.cpp:55:12:55:17 | + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
10-
| test.cpp:56:12:56:18 | * ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
11-
| test.cpp:57:12:57:18 | & ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
12-
| test.cpp:58:12:58:39 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
13-
| test.cpp:60:10:60:35 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
14-
| test.cpp:63:12:63:20 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
15-
| test.cpp:69:12:69:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
16-
| test.cpp:75:12:75:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
17-
| test.cpp:81:12:81:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
18-
| test.cpp:86:12:86:18 | ++ ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
19-
| test.cpp:93:12:93:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
20-
| test.cpp:100:12:100:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
21-
| test.cpp:103:12:103:24 | new[] | The format string argument to printf should be constant to prevent security issues and other potential errors. |
22-
| test.cpp:108:12:108:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
23-
| test.cpp:117:10:117:19 | call to const_wash | The format string argument to printf should be constant to prevent security issues and other potential errors. |
1+
| NonConstantFormat.c:35:3:35:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
2+
| NonConstantFormat.c:46:2:46:7 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
3+
| NonConstantFormat.c:50:2:50:7 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
4+
| test.cpp:45:3:45:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
5+
| test.cpp:46:3:46:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
6+
| test.cpp:47:3:47:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
7+
| test.cpp:48:3:48:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
8+
| test.cpp:54:5:54:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
9+
| test.cpp:56:5:56:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
10+
| test.cpp:57:5:57:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
11+
| test.cpp:58:5:58:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
12+
| test.cpp:59:5:59:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
13+
| test.cpp:60:5:60:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
14+
| test.cpp:61:5:61:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
15+
| test.cpp:62:5:62:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
16+
| test.cpp:64:3:64:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
17+
| test.cpp:67:5:67:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
18+
| test.cpp:73:5:73:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
19+
| test.cpp:79:5:79:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
20+
| test.cpp:85:5:85:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
21+
| test.cpp:90:5:90:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
22+
| test.cpp:107:5:107:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |

cpp/ql/test/query-tests/Likely Bugs/Format/NonConstantFormat/test.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ const char *choose_message(unsigned int n) {
2323

2424
const char *make_message(unsigned int n) {
2525
static char buf[64];
26-
sprintf(buf, "%d tasks left\n", n);
26+
sprintf(buf, "%d tasks left\n", n); // ok
2727
return buf;
2828
}
2929

@@ -41,8 +41,12 @@ const char *const_wash(char *str) {
4141
}
4242

4343
int main(int argc, char **argv) {
44+
const char *message = messages[2];
4445
printf(choose_message(argc - 1), argc - 1); // OK
46+
printf(messages[1]); // OK
47+
printf(message); // OK
4548
printf(make_message(argc - 1)); // NOT OK
49+
printf("Hello, World\n"); // OK
4650
printf(_("Hello, World\n")); // OK
4751
{
4852
char hello[] = "hello, World\n";

0 commit comments

Comments
 (0)