Skip to content
8 changes: 4 additions & 4 deletions src/ansi-c/c_typecheck_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1893,15 +1893,15 @@ void c_typecheck_baset::typecheck_side_effect_function_call(
/// XXX why are we only looking at the type of the first parameter?
if(parameters.front().type().id() == ID_pointer)
{
identifier_with_type =
id2string(identifier) + "_" +
type2identifier(parameters.front().type().subtype(), *this);
identifier_with_type = id2string(identifier) + "_" +
type_to_partial_identifier(
parameters.front().type().subtype(), *this);
}
else
{
identifier_with_type =
id2string(identifier) + "_" +
type2identifier(parameters.front().type(), *this);
type_to_partial_identifier(parameters.front().type(), *this);
}
gcc_polymorphic->set_identifier(identifier_with_type);

Expand Down
63 changes: 43 additions & 20 deletions src/ansi-c/type2name.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Author: Daniel Kroening, kroening@cs.cmu.edu
#include <util/std_types.h>
#include <util/symbol_table.h>

#include <regex>

typedef std::unordered_map<irep_idt, std::pair<size_t, bool>> symbol_numbert;

static std::string type2name(
Expand Down Expand Up @@ -277,27 +279,48 @@ std::string type2name(const typet &type, const namespacet &ns)
/// If we want utilities like dump_c to work properly identifiers
/// should ideally always be valid C identifiers
/// This replaces some invalid characters that can appear in type2name output.
std::string type2identifier(const typet &type, const namespacet &ns)
std::string type_name2type_identifier(const std::string &name)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don’t like the 2 naming convention here, but changing that isn’t really in scope for this PR.

{
auto type2name_res = type2name(type, ns);
std::string result{};
for(char c : type2name_res)
{
switch(c)
const auto replace_special_characters = [](const std::string &name) {
std::string result{};
for(char c : name)
{
case '*':
result += "_ptr_";
break;
case '{':
result += "_start_sub_";
break;
case '}':
result += "_end_sub_";
break;
default:
result += c;
break;
switch(c)
{
case '*':
result += "_ptr_";
break;
case '{':
result += "_start_sub_";
break;
case '}':
result += "_end_sub_";
break;
default:
result += c;
break;
}
}
}
return result;
return result;
};
const auto replace_invalid_characters_with_underscore =
[](const std::string &identifier) {
static const std::regex non_alpha_numeric{"[^A-Za-z0-9\x80-\xff]+"};
return std::regex_replace(identifier, non_alpha_numeric, "_");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This, I assume, risks generating clashing names?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chrisr-diffblue Yes, in general this isn’t generating a unique identifier. The idea would be to use this as an identifier fragment useful for generating a human-readable-but-unique identifier elsewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We were already risking clashing names, rather than guaranteeing uniqueness. For example fruit* will be replaced with fruit_ptr_, but there was no guarantee that something else in user code wasn't already called fruit_ptr.

};
const auto strip_leading_digits = [](const std::string &identifier) {
static const std::regex identifier_regex{
"[A-Za-z\x80-\xff_][A-Za-z0-9_\x80-\xff]*"};
std::smatch match_results;
bool found = std::regex_search(identifier, match_results, identifier_regex);
POSTCONDITION(found);
return match_results.str(0);
};
return strip_leading_digits(replace_invalid_characters_with_underscore(
replace_special_characters(name)));
}

std::string type_to_partial_identifier(const typet &type, const namespacet &ns)
{
return type_name2type_identifier(type2name(type, ns));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice 👍

}
11 changes: 10 additions & 1 deletion src/ansi-c/type2name.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ class namespacet;

std::string type2name(const typet &type, const namespacet &ns);

std::string type2identifier(const typet &type, const namespacet &ns);
/**
* Constructs a string describing the given type, which can be used as part of
* a `C` identifier. The resulting identifier is not guaranteed to uniquely
* identify the type in all cases.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might need clarifying - in fact, it might actually be worth making this explicit in the function name, e.g. rename it to something like type_to_approximate_identifier or type_to_nonunique_identifier.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get that "identifier is a bit of a misnomer in this case. However nonunique_identifier seems like a bit of an oxymoron to me. How does the term partial_identifier sound to you?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

partial_identifer sounds reasonable to me - it makes it clearer to an unsuspecting caller that they need to understand what is returning :-)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

* @param type Internal representation of the type to describe.
* @param ns Namespace for looking up any types on which the `type` parameter
* depends.
* @return An identifying string which can be used as part of a `C` identifier.
*/
std::string type_to_partial_identifier(const typet &type, const namespacet &ns);

#endif // CPROVER_ANSI_C_TYPE2NAME_H
1 change: 1 addition & 0 deletions unit/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ SRC += analyses/ai/ai.cpp \
analyses/does_remove_const/does_type_preserve_const_correctness.cpp \
analyses/does_remove_const/is_type_at_least_as_const_as.cpp \
ansi-c/max_malloc_size.cpp \
ansi-c/type2name.cpp \
big-int/big-int.cpp \
compound_block_locations.cpp \
get_goto_model_from_c_test.cpp \
Expand Down
74 changes: 74 additions & 0 deletions unit/ansi-c/type2name.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*******************************************************************\

Module: Unit tests for type_name2type_identifier

Author: Thomas Spriggs

\*******************************************************************/

#include <testing-utils/use_catch.h>

#include <numeric>

extern std::string type_name2type_identifier(const std::string &name);

TEST_CASE(
"type_name2type_identifier sanity check",
"[core][ansi-c][type_name2type_identifier]")
{
CHECK(type_name2type_identifier("char") == "char");
}

TEST_CASE(
"type_name2type_identifier special characters",
"[core][ansi-c][type_name2type_identifier]")
{
CHECK(type_name2type_identifier("char*") == "char_ptr_");
CHECK(type_name2type_identifier("foo{bar}") == "foo_start_sub_bar_end_sub_");
}

/**
* @return A string containing all 7-bit ascii printable characters.
*/
static std::string all_printable_characters()
{
const char first = 32;
const char last = 127;
std::string printable_characters(last - first, ' ');
std::iota(printable_characters.begin(), printable_characters.end(), first);
return printable_characters;
}

TEST_CASE(
"type_name2type_identifier invalid characters",
"[core][ansi-c][type_name2type_identifier]")
{
const std::string printable_characters = all_printable_characters();
CHECK(
printable_characters ==
R"( !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`)"
R"(abcdefghijklmnopqrstuvwxyz{|}~)");
CHECK(
type_name2type_identifier(printable_characters) ==
"_ptr_0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz_"
"start_sub_end_sub_");
}

TEST_CASE(
"type_name2type_identifier leading characters",
"[core][ansi-c][type_name2type_identifier]")
{
CHECK(
type_name2type_identifier("0123456789banana_0123456789_split_0123456789") ==
"banana_0123456789_split_0123456789");
CHECK(type_name2type_identifier("0123456789_banana") == "_banana");
CHECK(type_name2type_identifier("_0123456789") == "_0123456789");
}

TEST_CASE(
"type_name2type_identifier UTF-8 characters",
"[core][ansi-c][type_name2type_identifier]")
{
const std::string utf8_example = "\xF0\x9F\x8D\x8C\xF0\x9F\x8D\xA8";
CHECK(type_name2type_identifier(utf8_example) == utf8_example);
}