Skip to content

C++: Add a new query for calling c_str on temporary objects #14928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,25 @@ private class StdSequenceContainerData extends TaintFunction {
/**
* The standard container functions `push_back` and `push_front`.
*/
private class StdSequenceContainerPush extends TaintFunction {
class StdSequenceContainerPush extends MemberFunction {
StdSequenceContainerPush() {
this.getClassAndName("push_back") instanceof Vector or
this.getClassAndName(["push_back", "push_front"]) instanceof Deque or
this.getClassAndName("push_front") instanceof ForwardList or
this.getClassAndName(["push_back", "push_front"]) instanceof List
}

/**
* Gets the index of a parameter to this function that is a reference to the
* value type of the container.
*/
int getAValueTypeParameterIndex() {
this.getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
this.getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector<T>`
}
}

private class StdSequenceContainerPushModel extends StdSequenceContainerPush, TaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from parameter to qualifier
input.isParameterDeref(0) and
Expand Down Expand Up @@ -160,7 +171,7 @@ private class StdSequenceContainerFrontBack extends TaintFunction {
/**
* The standard container functions `insert` and `insert_after`.
*/
private class StdSequenceContainerInsert extends TaintFunction {
class StdSequenceContainerInsert extends MemberFunction {
StdSequenceContainerInsert() {
this.getClassAndName("insert") instanceof Deque or
this.getClassAndName("insert") instanceof List or
Expand All @@ -181,7 +192,9 @@ private class StdSequenceContainerInsert extends TaintFunction {
* Gets the index of a parameter to this function that is an iterator.
*/
int getAnIteratorParameterIndex() { this.getParameter(result).getType() instanceof Iterator }
}

private class StdSequenceContainerInsertModel extends StdSequenceContainerInsert, TaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from parameter to container itself (qualifier) and return value
(
Expand Down Expand Up @@ -253,11 +266,28 @@ private class StdSequenceContainerAt extends TaintFunction {
}

/**
* The standard vector `emplace` function.
* The standard `emplace` function.
*/
class StdVectorEmplace extends TaintFunction {
StdVectorEmplace() { this.getClassAndName("emplace") instanceof Vector }
class StdSequenceEmplace extends MemberFunction {
StdSequenceEmplace() {
this.getClassAndName("emplace") instanceof Vector
or
this.getClassAndName("emplace") instanceof List
or
this.getClassAndName("emplace") instanceof Deque
}

/**
* Gets the index of a parameter to this function that is a reference to the
* value type of the container.
*/
int getAValueTypeParameterIndex() {
this.getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
this.getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector<T>`
}
}

private class StdSequenceEmplaceModel extends StdSequenceEmplace, TaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from any parameter except the position iterator to qualifier and return value
// (here we assume taint flow from any constructor parameter to the constructed object)
Expand All @@ -269,16 +299,47 @@ class StdVectorEmplace extends TaintFunction {
}
}

/**
* The standard vector `emplace` function.
*/
class StdVectorEmplace extends StdSequenceEmplace {
StdVectorEmplace() { this.getDeclaringType() instanceof Vector }
}

/**
* The standard vector `emplace_back` function.
*/
class StdVectorEmplaceBack extends TaintFunction {
StdVectorEmplaceBack() { this.getClassAndName("emplace_back") instanceof Vector }
class StdSequenceEmplaceBack extends MemberFunction {
StdSequenceEmplaceBack() {
this.getClassAndName("emplace_back") instanceof Vector
or
this.getClassAndName("emplace_back") instanceof List
or
this.getClassAndName("emplace_back") instanceof Deque
}

/**
* Gets the index of a parameter to this function that is a reference to the
* value type of the container.
*/
int getAValueTypeParameterIndex() {
this.getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
this.getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector<T>`
}
}

private class StdSequenceEmplaceBackModel extends StdSequenceEmplaceBack, TaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from any parameter to qualifier
// (here we assume taint flow from any constructor parameter to the constructed object)
input.isParameterDeref([0 .. this.getNumberOfParameters() - 1]) and
output.isQualifierObject()
}
}

/**
* The standard vector `emplace_back` function.
*/
class StdVectorEmplaceBack extends StdSequenceEmplaceBack {
StdVectorEmplaceBack() { this.getDeclaringType() instanceof Vector }
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,11 @@ private class StdStringConstructor extends Constructor, StdStringTaintFunction {
/**
* The `std::string` function `c_str`.
*/
private class StdStringCStr extends StdStringTaintFunction {
class StdStringCStr extends MemberFunction {
StdStringCStr() { this.getClassAndName("c_str") instanceof StdBasicString }
}

private class StdStringCStrModel extends StdStringCStr, StdStringTaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from string itself (qualifier) to return value
input.isQualifierObject() and
Expand All @@ -112,9 +114,11 @@ private class StdStringCStr extends StdStringTaintFunction {
/**
* The `std::string` function `data`.
*/
private class StdStringData extends StdStringTaintFunction {
class StdStringData extends MemberFunction {
StdStringData() { this.getClassAndName("data") instanceof StdBasicString }
}

private class StdStringDataModel extends StdStringData, StdStringTaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from string itself (qualifier) to return value
input.isQualifierObject() and
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>

<overview>
<p>Calling <code>c_str</code> on a <code>std::string</code> object returns a pointer to the underlying character array.
When the <code>std::string</code> object is destroyed, the pointer returned by <code>c_str</code> is no
longer valid. If the pointer is used after the <code>std::string</code> object is destroyed, then the behavior is undefined.
</p>
</overview>

<recommendation>
<p>
Ensure that the pointer returned by <code>c_str</code> does not outlive the underlying <code>std::string</code> object.
</p>
</recommendation>

<example>
<p>
The following example concatenates two <code>std::string</code> objects, and then converts the resulting string to a
C string using <code>c_str</code> so that it can be passed to the <code>work</code> function.

However, the underlying <code>std::string</code> object that represents the concatenated string is destroyed as soon as the call
to <code>c_str</code> returns. This means that <code>work</code> is given a pointer to invalid memory.
</p>

<sample src="UseOfStringAfterLifetimeEndsBad.cpp" />

<p>
The following example fixes the above code by ensuring that the pointer returned by the call to <code>c_str</code> does
not outlive the underlying <code>std::string</code> objects. This ensures that the pointer passed to <code>work</code>
points to valid memory.
</p>

<sample src="UseOfStringAfterLifetimeEndsGood.cpp" />

</example>
<references>

<li><a href="https://wiki.sei.cmu.edu/confluence/display/cplusplus/MEM50-CPP.+Do+not+access+freed+memory">MEM50-CPP. Do not access freed memory</a>.</li>

</references>
</qhelp>
100 changes: 100 additions & 0 deletions cpp/ql/src/Security/CWE/CWE-416/UseOfStringAfterLifetimeEnds.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/**
* @name Use of string after lifetime ends
* @description If the value of a call to 'c_str' outlives the underlying object it may lead to unexpected behavior.
* @kind problem
* @precision high
* @id cpp/use-of-string-after-lifetime-ends
* @problem.severity warning
* @security-severity 8.8
* @tags reliability
* security
* external/cwe/cwe-416
* external/cwe/cwe-664
*/

import cpp
import semmle.code.cpp.models.implementations.StdString
import semmle.code.cpp.models.implementations.StdContainer

/**
* Holds if `e` will be consumed by its parent as a glvalue and does not have
* an lvalue-to-rvalue conversion. This means that it will be materialized into
* a temporary object.
*/
predicate isTemporary(Expr e) {
e instanceof TemporaryObjectExpr
or
e.isPRValueCategory() and
e.getUnspecifiedType() instanceof Class and
not e.hasLValueToRValueConversion()
}

/** Holds if `e` is written to a container. */
predicate isStoredInContainer(Expr e) {
exists(StdSequenceContainerInsert insert, Call call, int index |
call = insert.getACallToThisFunction() and
index = insert.getAValueTypeParameterIndex() and
call.getArgument(index) = e
)
or
exists(StdSequenceContainerPush push, Call call, int index |
call = push.getACallToThisFunction() and
index = push.getAValueTypeParameterIndex() and
call.getArgument(index) = e
)
or
exists(StdSequenceEmplace emplace, Call call, int index |
call = emplace.getACallToThisFunction() and
index = emplace.getAValueTypeParameterIndex() and
call.getArgument(index) = e
)
or
exists(StdSequenceEmplaceBack emplaceBack, Call call, int index |
call = emplaceBack.getACallToThisFunction() and
index = emplaceBack.getAValueTypeParameterIndex() and
call.getArgument(index) = e
)
}

/**
* Holds if the value of `e` outlives the enclosing full expression. For
* example, because the value is stored in a local variable.
*/
predicate outlivesFullExpr(Expr e) {
any(Assignment assign).getRValue() = e
or
any(Variable v).getInitializer().getExpr() = e
or
any(ReturnStmt ret).getExpr() = e
or
exists(ConditionalExpr cond |
outlivesFullExpr(cond) and
[cond.getThen(), cond.getElse()] = e
)
or
exists(BinaryOperation bin |
outlivesFullExpr(bin) and
bin.getAnOperand() = e
)
or
exists(ClassAggregateLiteral aggr |
outlivesFullExpr(aggr) and
aggr.getAFieldExpr(_) = e
)
or
exists(ArrayAggregateLiteral aggr |
outlivesFullExpr(aggr) and
aggr.getAnElementExpr(_) = e
)
or
isStoredInContainer(e)
}

from Call c
where
outlivesFullExpr(c) and
not c.isFromUninstantiatedTemplate(_) and
(c.getTarget() instanceof StdStringCStr or c.getTarget() instanceof StdStringData) and
isTemporary(c.getQualifier().getFullyConverted())
select c,
"The underlying string object is destroyed after the call to '" + c.getTarget() + "' returns."
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include <string>
void work(const char*);

// BAD: the concatenated string is deallocated when `c_str` returns. So `work`
// is given a pointer to invalid memory.
void work_with_combined_string_bad(std::string s1, std::string s2) {
const char* combined_string = (s1 + s2).c_str();
work(combined_string);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include <string>
void work(const char*);

// GOOD: the concatenated string outlives the call to `work`. So the pointer
// obtainted from `c_str` is valid.
void work_with_combined_string_good(std::string s1, std::string s2) {
auto combined_string = s1 + s2;
work(combined_string.c_str());
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: newQuery
---
* Added a new query, `cpp/use-of-string-after-lifetime-ends`, to detect calls to `c_str` on strings that will be destroyed immediately.
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
| test.cpp:165:34:165:38 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:166:39:166:43 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:167:44:167:48 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:169:29:169:33 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:178:37:178:41 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:181:39:181:43 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:183:37:183:41 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:187:34:187:37 | call to data | The underlying string object is destroyed after the call to 'data' returns. |
| test.cpp:188:39:188:42 | call to data | The underlying string object is destroyed after the call to 'data' returns. |
| test.cpp:189:44:189:47 | call to data | The underlying string object is destroyed after the call to 'data' returns. |
| test.cpp:191:29:191:32 | call to data | The underlying string object is destroyed after the call to 'data' returns. |
| test.cpp:193:31:193:35 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

Security/CWE/CWE-416/UseOfStringAfterLifetimeEnds.ql
Loading