From f1d76e3558cd3531155a2b324b98ad4006a3f54d Mon Sep 17 00:00:00 2001 From: Anton Lindqvist Date: Mon, 22 Sep 2025 07:14:00 +0200 Subject: [PATCH] Fix #14150 (Add unionzeroinit check) This has bitten me before and is definitely a foot gun. GCC 15[1] changed their semantics related to zero initialization of unions; from initializing the complete union (sizeof union) to only zero initializing the first member. If the same first member is not the largest one, the state of the remaining storage is considered undefined and in practice most likely stack garbage. The unionzeroinit checker can detect such scenarios and emit a warning. It does not cover the designated initializers as I would interpret those as being intentional. Example output from one of my projects: ``` x86-decoder.c:294:7: warning: Zero initializing union Evex does not guarantee its complete storage to be zero initialized as its largest member is not declared as the first member. Consider making u32 the first member or favor memset(). [unionzeroinit-unionzeroinit] Evex evex = {0}; ^ ``` [1] https://trofi.github.io/posts/328-c-union-init-and-gcc-15.html --- lib/checkother.cpp | 136 +++++++++++++++++++++++++++++++++++++++++++++ lib/checkother.h | 5 ++ releasenotes.txt | 5 +- test/testother.cpp | 101 +++++++++++++++++++++++++++++++++ 4 files changed, 246 insertions(+), 1 deletion(-) diff --git a/lib/checkother.cpp b/lib/checkother.cpp index 440fde971f8..0db35539723 100644 --- a/lib/checkother.cpp +++ b/lib/checkother.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include //--------------------------------------------------------------------------- @@ -4358,6 +4359,139 @@ void CheckOther::checkModuloOfOneError(const Token *tok) reportError(tok, Severity::style, "moduloofone", "Modulo of one is always equal to zero"); } +static const std::string noname; + +struct UnionMember { + UnionMember() + : name(noname) + , size(0) {} + + UnionMember(const std::string &name, size_t size) + : name(name) + , size(size) {} + + const std::string &name; + size_t size; +}; + +struct Union { + explicit Union(const Scope &scope) + : scope(&scope) + , name(scope.className) {} + + const Scope *scope; + const std::string &name; + std::vector members; + + const UnionMember *largestMember() const { + const UnionMember *largest = nullptr; + for (const UnionMember &m : members) { + if (m.size == 0) + return nullptr; + if (largest == nullptr || m.size > largest->size) + largest = &m; + } + return largest; + } + + bool isLargestMemberFirst() const { + const UnionMember *largest = largestMember(); + return largest == nullptr || largest == &members[0]; + } +}; + +static UnionMember parseUnionMember(const Variable &var, + const Settings &settings) +{ + const Token *nameToken = var.nameToken(); + if (nameToken == nullptr) + return UnionMember(); + + const ValueType *vt = nameToken->valueType(); + size_t size = 0; + if (var.isArray()) { + size = var.dimension(0); + } else if (vt != nullptr) { + size = ValueFlow::getSizeOf(*vt, settings, + ValueFlow::Accuracy::ExactOrZero); + } + return UnionMember(nameToken->str(), size); +} + +static std::vector parseUnions(const SymbolDatabase &symbolDatabase, + const Settings &settings) +{ + std::vector unions; + + for (const Scope &scope : symbolDatabase.scopeList) { + if (scope.type != ScopeType::eUnion) + continue; + + Union u(scope); + for (const Variable &var : scope.varlist) { + u.members.push_back(parseUnionMember(var, settings)); + } + unions.push_back(u); + } + + return unions; +} + +static bool isZeroInitializer(const Token *tok) +{ + return Token::Match(tok, "= { 0| } ;"); +} + + +void CheckOther::checkUnionZeroInit() +{ + if (!mSettings->severity.isEnabled(Severity::portability)) + return; + + logChecker("CheckOther::checkUnionZeroInit"); // portability + + const SymbolDatabase *symbolDatabase = mTokenizer->getSymbolDatabase(); + + std::unordered_map unionsByScopeId; + const std::vector unions = parseUnions(*symbolDatabase, *mSettings); + for (const Union &u : unions) { + unionsByScopeId.emplace(u.scope, u); + } + + for (const Token *tok = mTokenizer->tokens(); tok; tok = tok->next()) { + if (!tok->isName() || !isZeroInitializer(tok->next())) + continue; + + const ValueType *vt = tok->valueType(); + if (vt == nullptr || vt->typeScope == nullptr) + continue; + auto it = unionsByScopeId.find(vt->typeScope); + if (it == unionsByScopeId.end()) + continue; + const Union &u = it->second; + if (!u.isLargestMemberFirst()) { + const UnionMember *largestMember = u.largestMember(); + if (largestMember == nullptr) { + throw InternalError(tok, "Largest union member not found", + InternalError::INTERNAL); + } + assert(largestMember != nullptr); + unionZeroInitError(tok, *largestMember); + } + } +} + +void CheckOther::unionZeroInitError(const Token *tok, + const UnionMember& largestMember) +{ + reportError(tok, Severity::portability, "UnionZeroInit", + (tok != nullptr ? "$symbol:" + tok->str() + "\n" : "") + + "Zero initializing union '$symbol' does not guarantee " + + "its complete storage to be zero initialized as its largest member " + + "is not declared as the first member. Consider making " + + largestMember.name + " the first member or favor memset()."); +} + //----------------------------------------------------------------------------- // Overlapping write (undefined behavior) //----------------------------------------------------------------------------- @@ -4576,6 +4710,7 @@ void CheckOther::runChecks(const Tokenizer &tokenizer, ErrorLogger *errorLogger) checkOther.checkAccessOfMovedVariable(); checkOther.checkModuloOfOne(); checkOther.checkOverlappingWrite(); + checkOther.checkUnionZeroInit(); } void CheckOther::getErrorMessages(ErrorLogger *errorLogger, const Settings *settings) const @@ -4658,4 +4793,5 @@ void CheckOther::getErrorMessages(ErrorLogger *errorLogger, const Settings *sett const std::vector nullvec; c.funcArgOrderDifferent("function", nullptr, nullptr, nullvec, nullvec); c.checkModuloOfOneError(nullptr); + c.unionZeroInitError(nullptr, UnionMember()); } diff --git a/lib/checkother.h b/lib/checkother.h index db907785e45..b10458dc44b 100644 --- a/lib/checkother.h +++ b/lib/checkother.h @@ -40,6 +40,7 @@ class Function; class Variable; class ErrorLogger; class Tokenizer; +struct UnionMember; /// @addtogroup Checks /// @{ @@ -194,6 +195,8 @@ class CPPCHECKLIB CheckOther : public Check { void checkModuloOfOne(); + void checkUnionZeroInit(); + void checkOverlappingWrite(); void overlappingWriteUnion(const Token *tok); void overlappingWriteFunction(const Token *tok, const std::string& funcname); @@ -257,6 +260,7 @@ class CPPCHECKLIB CheckOther : public Check { void knownPointerToBoolError(const Token* tok, const ValueFlow::Value* value); void comparePointersError(const Token *tok, const ValueFlow::Value *v1, const ValueFlow::Value *v2); void checkModuloOfOneError(const Token *tok); + void unionZeroInitError(const Token *tok, const UnionMember& largestMember); void getErrorMessages(ErrorLogger *errorLogger, const Settings *settings) const override; @@ -291,6 +295,7 @@ class CPPCHECKLIB CheckOther : public Check { // portability "- Passing NULL pointer to function with variable number of arguments leads to UB.\n" "- Casting non-zero integer literal in decimal or octal format to pointer.\n" + "- Incorrect zero initialization of unions can lead to access of uninitialized memory.\n" // style "- C-style pointer cast in C++ code\n" diff --git a/releasenotes.txt b/releasenotes.txt index 4ffbaff15b1..f77ef681038 100644 --- a/releasenotes.txt +++ b/releasenotes.txt @@ -1,7 +1,10 @@ Release Notes for Cppcheck 2.19 New checks: -- +- Detect zero initialization of unions in which its largest member is not + declared as the first one. Depending on the compiler, there's no guarantee + that the complete union will be zero initialized in such scenarios leading to + potential access of uninitialized memory. Improved checking: - diff --git a/test/testother.cpp b/test/testother.cpp index 9d0c9ccc12c..ceba40c8849 100644 --- a/test/testother.cpp +++ b/test/testother.cpp @@ -27,6 +27,17 @@ #include #include +static std::string unionZeroInitMessage(int lno, int cno, const std::string &varName, const std::string &largestMemberName) +{ + std::stringstream ss; + ss << "[test.cpp:" << lno << ":" << cno << "]: (portability) "; + ss << "Zero initializing union '" << varName << "' "; + ss << "does not guarantee its complete storage to be zero initialized as its largest member is not declared as the first member. "; + ss << "Consider making " << largestMemberName << " the first member or favor memset(). [UnionZeroInit]"; + ss << std::endl; + return ss.str(); +} + class TestOther : public TestFixture { public: TestOther() : TestFixture("TestOther") {} @@ -307,6 +318,12 @@ class TestOther : public TestFixture { TEST_CASE(knownConditionFloating); TEST_CASE(knownConditionPrefixed); + + TEST_CASE(unionZeroInitBasic); + TEST_CASE(unionZeroInitArrayMember); + TEST_CASE(unionZeroInitStructMember); + TEST_CASE(unionZeroInitUnknownType); + TEST_CASE(unionZeroInitBitfields); } #define check(...) check_(__FILE__, __LINE__, __VA_ARGS__) @@ -13242,6 +13259,90 @@ class TestOther : public TestFixture { "[test.cpp:2:13] -> [test.cpp:3:11]: (style) The comparison 'i > +1' is always false. [knownConditionTrueFalse]\n", errout_str()); } + + void unionZeroInitBasic() { + check( + "union bad_union_0 {\n" + " char c;\n" + " long long i64;\n" + " void *p;\n" + "};\n" + "\n" + "typedef union {\n" + " char c;\n" + " int i;\n" + "} bad_union_1;\n" + "\n" + "extern void e(union bad_union_0 *);\n" + "\n" + "void\n" + "foo(void)\n" + "{\n" + " union { int i; char c; } good0 = {0};\n" + " union { int i; char c; } good1 = {};\n" + "\n" + " union { char c; int i; } bad0 = {0};\n" + " union bad_union_0 bad1 = {0};\n" + " e(&bad1);\n" + " bad_union_1 bad2 = {0};\n" + "}"); + const std::string exp = unionZeroInitMessage(20, 28, "bad0", "i") + + unionZeroInitMessage(21, 21, "bad1", "i64") + + unionZeroInitMessage(23, 15, "bad2", "i"); + ASSERT_EQUALS(exp, errout_str()); + } + + void unionZeroInitArrayMember() { + check( + "void foo(void) {\n" + " union { int c; char s8[2]; } u = {0};\n" + "}"); + ASSERT_EQUALS("", errout_str()); + } + + void unionZeroInitStructMember() { + check( + "void foo(void) {\n" + " union {\n" + " int c;\n" + " struct {\n" + " char x;\n" + " struct {\n" + " char y;\n" + " } s1;\n" + " } s0;\n" + " } u = {0};\n" + "}"); + ASSERT_EQUALS("", errout_str()); + } + + void unionZeroInitUnknownType() { + check( + "union u {\n" + " Unknown x;\n" + "}"); + ASSERT_EQUALS("", errout_str()); + } + + void unionZeroInitBitfields() { + check( + "typedef union Evex {\n" + " int u32;\n" + " struct {\n" + " char mmm:3,\n" + " b4:1,\n" + " r4:1,\n" + " b3:1,\n" + " x3:1,\n" + " r3:1;\n" + " } extended;\n" + "} Evex;\n" + "\n" + "void foo(void) {\n" + " Evex evex = {0};\n" + "}"); + ASSERT_EQUALS("", errout_str()); + } }; REGISTER_TEST(TestOther)