diff --git a/src/ansi-c/c_typecheck_expr.cpp b/src/ansi-c/c_typecheck_expr.cpp index 8c5fba7c4eb..6a455ccdd38 100644 --- a/src/ansi-c/c_typecheck_expr.cpp +++ b/src/ansi-c/c_typecheck_expr.cpp @@ -1893,15 +1893,15 @@ void c_typecheck_baset::typecheck_side_effect_function_call( /// XXX why are we only looking at the type of the first parameter? if(parameters.front().type().id() == ID_pointer) { - identifier_with_type = - id2string(identifier) + "_" + - type2identifier(parameters.front().type().subtype(), *this); + identifier_with_type = id2string(identifier) + "_" + + type_to_partial_identifier( + parameters.front().type().subtype(), *this); } else { identifier_with_type = id2string(identifier) + "_" + - type2identifier(parameters.front().type(), *this); + type_to_partial_identifier(parameters.front().type(), *this); } gcc_polymorphic->set_identifier(identifier_with_type); diff --git a/src/ansi-c/type2name.cpp b/src/ansi-c/type2name.cpp index a3d1cb8e3b7..8fcee0bb37a 100644 --- a/src/ansi-c/type2name.cpp +++ b/src/ansi-c/type2name.cpp @@ -19,6 +19,8 @@ Author: Daniel Kroening, kroening@cs.cmu.edu #include #include +#include + typedef std::unordered_map> symbol_numbert; static std::string type2name( @@ -277,27 +279,48 @@ std::string type2name(const typet &type, const namespacet &ns) /// If we want utilities like dump_c to work properly identifiers /// should ideally always be valid C identifiers /// This replaces some invalid characters that can appear in type2name output. -std::string type2identifier(const typet &type, const namespacet &ns) +std::string type_name2type_identifier(const std::string &name) { - auto type2name_res = type2name(type, ns); - std::string result{}; - for(char c : type2name_res) - { - switch(c) + const auto replace_special_characters = [](const std::string &name) { + std::string result{}; + for(char c : name) { - case '*': - result += "_ptr_"; - break; - case '{': - result += "_start_sub_"; - break; - case '}': - result += "_end_sub_"; - break; - default: - result += c; - break; + switch(c) + { + case '*': + result += "_ptr_"; + break; + case '{': + result += "_start_sub_"; + break; + case '}': + result += "_end_sub_"; + break; + default: + result += c; + break; + } } - } - return result; + return result; + }; + const auto replace_invalid_characters_with_underscore = + [](const std::string &identifier) { + static const std::regex non_alpha_numeric{"[^A-Za-z0-9\x80-\xff]+"}; + return std::regex_replace(identifier, non_alpha_numeric, "_"); + }; + const auto strip_leading_digits = [](const std::string &identifier) { + static const std::regex identifier_regex{ + "[A-Za-z\x80-\xff_][A-Za-z0-9_\x80-\xff]*"}; + std::smatch match_results; + bool found = std::regex_search(identifier, match_results, identifier_regex); + POSTCONDITION(found); + return match_results.str(0); + }; + return strip_leading_digits(replace_invalid_characters_with_underscore( + replace_special_characters(name))); +} + +std::string type_to_partial_identifier(const typet &type, const namespacet &ns) +{ + return type_name2type_identifier(type2name(type, ns)); } diff --git a/src/ansi-c/type2name.h b/src/ansi-c/type2name.h index c5452ea41ba..23ea6a1ec80 100644 --- a/src/ansi-c/type2name.h +++ b/src/ansi-c/type2name.h @@ -20,6 +20,15 @@ class namespacet; std::string type2name(const typet &type, const namespacet &ns); -std::string type2identifier(const typet &type, const namespacet &ns); +/** + * Constructs a string describing the given type, which can be used as part of + * a `C` identifier. The resulting identifier is not guaranteed to uniquely + * identify the type in all cases. + * @param type Internal representation of the type to describe. + * @param ns Namespace for looking up any types on which the `type` parameter + * depends. + * @return An identifying string which can be used as part of a `C` identifier. + */ +std::string type_to_partial_identifier(const typet &type, const namespacet &ns); #endif // CPROVER_ANSI_C_TYPE2NAME_H diff --git a/unit/Makefile b/unit/Makefile index 2b85801f809..d3625d05fa8 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -15,6 +15,7 @@ SRC += analyses/ai/ai.cpp \ analyses/does_remove_const/does_type_preserve_const_correctness.cpp \ analyses/does_remove_const/is_type_at_least_as_const_as.cpp \ ansi-c/max_malloc_size.cpp \ + ansi-c/type2name.cpp \ big-int/big-int.cpp \ compound_block_locations.cpp \ get_goto_model_from_c_test.cpp \ diff --git a/unit/ansi-c/type2name.cpp b/unit/ansi-c/type2name.cpp new file mode 100644 index 00000000000..38a5d08af9c --- /dev/null +++ b/unit/ansi-c/type2name.cpp @@ -0,0 +1,74 @@ +/*******************************************************************\ + +Module: Unit tests for type_name2type_identifier + +Author: Thomas Spriggs + +\*******************************************************************/ + +#include + +#include + +extern std::string type_name2type_identifier(const std::string &name); + +TEST_CASE( + "type_name2type_identifier sanity check", + "[core][ansi-c][type_name2type_identifier]") +{ + CHECK(type_name2type_identifier("char") == "char"); +} + +TEST_CASE( + "type_name2type_identifier special characters", + "[core][ansi-c][type_name2type_identifier]") +{ + CHECK(type_name2type_identifier("char*") == "char_ptr_"); + CHECK(type_name2type_identifier("foo{bar}") == "foo_start_sub_bar_end_sub_"); +} + +/** + * @return A string containing all 7-bit ascii printable characters. + */ +static std::string all_printable_characters() +{ + const char first = 32; + const char last = 127; + std::string printable_characters(last - first, ' '); + std::iota(printable_characters.begin(), printable_characters.end(), first); + return printable_characters; +} + +TEST_CASE( + "type_name2type_identifier invalid characters", + "[core][ansi-c][type_name2type_identifier]") +{ + const std::string printable_characters = all_printable_characters(); + CHECK( + printable_characters == + R"( !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`)" + R"(abcdefghijklmnopqrstuvwxyz{|}~)"); + CHECK( + type_name2type_identifier(printable_characters) == + "_ptr_0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz_" + "start_sub_end_sub_"); +} + +TEST_CASE( + "type_name2type_identifier leading characters", + "[core][ansi-c][type_name2type_identifier]") +{ + CHECK( + type_name2type_identifier("0123456789banana_0123456789_split_0123456789") == + "banana_0123456789_split_0123456789"); + CHECK(type_name2type_identifier("0123456789_banana") == "_banana"); + CHECK(type_name2type_identifier("_0123456789") == "_0123456789"); +} + +TEST_CASE( + "type_name2type_identifier UTF-8 characters", + "[core][ansi-c][type_name2type_identifier]") +{ + const std::string utf8_example = "\xF0\x9F\x8D\x8C\xF0\x9F\x8D\xA8"; + CHECK(type_name2type_identifier(utf8_example) == utf8_example); +}