diff --git a/regression/cbmc/xml-trace2/main.c b/regression/cbmc/xml-trace2/main.c new file mode 100644 index 00000000000..9429678036f --- /dev/null +++ b/regression/cbmc/xml-trace2/main.c @@ -0,0 +1,9 @@ +#include + +int main() +{ + char *c14 = "\x0E"; + assert(c14[0] == 14); + assert(c14[1] == 0); + assert(c14[0] != 14); +} diff --git a/regression/cbmc/xml-trace2/test.desc b/regression/cbmc/xml-trace2/test.desc new file mode 100644 index 00000000000..0bc0b8bad23 --- /dev/null +++ b/regression/cbmc/xml-trace2/test.desc @@ -0,0 +1,8 @@ +CORE +main.c +--xml-ui +\{ 14, 0 \} +^EXIT=10$ +^SIGNAL=0$ +-- +"\\" diff --git a/src/goto-programs/graphml_witness.cpp b/src/goto-programs/graphml_witness.cpp index 012f5db1f0e..c28abd358a8 100644 --- a/src/goto-programs/graphml_witness.cpp +++ b/src/goto-programs/graphml_witness.cpp @@ -18,6 +18,7 @@ Author: Daniel Kroening #include #include #include +#include #include #include @@ -57,6 +58,12 @@ void graphml_witnesst::remove_l0_l1(exprt &expr) return; } + else if(expr.id() == ID_string_constant) + { + std::string output_string = expr_to_string(ns, "", expr); + if(!xmlt::is_printable_xml(output_string)) + expr = to_string_constant(expr).to_array_expr(); + } Forall_operands(it, expr) remove_l0_l1(*it); diff --git a/src/goto-programs/xml_goto_trace.cpp b/src/goto-programs/xml_goto_trace.cpp index abfd167ac4f..826877554eb 100644 --- a/src/goto-programs/xml_goto_trace.cpp +++ b/src/goto-programs/xml_goto_trace.cpp @@ -13,6 +13,7 @@ Author: Daniel Kroening #include "xml_goto_trace.h" +#include #include #include @@ -23,6 +24,38 @@ Author: Daniel Kroening #include "structured_trace_util.h" #include "xml_expr.h" +/// Rewrite all string constants to their array counterparts +static void replace_string_constants_rec(exprt &expr) +{ + for(auto &op : expr.operands()) + replace_string_constants_rec(op); + + if(expr.id() == ID_string_constant) + expr = to_string_constant(expr).to_array_expr(); +} + +/// Given an expression \p expr, produce a string representation that is +/// printable in XML 1.0. Produces an empty string if no valid XML 1.0 string +/// representing \p expr can be generated. +static std::string +get_printable_xml(const namespacet &ns, const irep_idt &id, const exprt &expr) +{ + std::string result = from_expr(ns, id, expr); + + if(!xmlt::is_printable_xml(result)) + { + exprt new_expr = expr; + replace_string_constants_rec(new_expr); + result = from_expr(ns, id, new_expr); + + // give up + if(!xmlt::is_printable_xml(result)) + return {}; + } + + return result; +} + xmlt full_lhs_value(const goto_trace_stept &step, const namespacet &ns) { xmlt value_xml{"full_lhs_value"}; @@ -34,7 +67,7 @@ xmlt full_lhs_value(const goto_trace_stept &step, const namespacet &ns) const auto &lhs_object = step.get_lhs_object(); const irep_idt identifier = lhs_object.has_value() ? lhs_object->get_identifier() : irep_idt(); - value_xml.data = from_expr(ns, identifier, value); + value_xml.data = get_printable_xml(ns, identifier, value); const auto &bv_type = type_try_dynamic_cast(value.type()); const auto &constant = expr_try_dynamic_cast(value); @@ -121,7 +154,7 @@ void convert( std::string full_lhs_string; if(step.full_lhs.is_not_nil()) - full_lhs_string = from_expr(ns, identifier, step.full_lhs); + full_lhs_string = get_printable_xml(ns, identifier, step.full_lhs); xml_assignment.new_element("full_lhs").data = full_lhs_string; xml_assignment.new_element(full_lhs_value(step, ns)); @@ -158,7 +191,7 @@ void convert( for(const auto &arg : step.io_args) { xml_output.new_element("value").data = - from_expr(ns, step.function_id, arg); + get_printable_xml(ns, step.function_id, arg); xml_output.new_element("value_expression").new_element(xml(arg, ns)); } break; @@ -176,7 +209,7 @@ void convert( for(const auto &arg : step.io_args) { xml_input.new_element("value").data = - from_expr(ns, step.function_id, arg); + get_printable_xml(ns, step.function_id, arg); xml_input.new_element("value_expression").new_element(xml(arg, ns)); } diff --git a/src/util/xml.cpp b/src/util/xml.cpp index 949bf8964ec..7a16e4e6ed3 100644 --- a/src/util/xml.cpp +++ b/src/util/xml.cpp @@ -101,12 +101,17 @@ void xmlt::escape(const std::string &s, std::ostream &out) out << '\n'; break; + case 0x9: // TAB + case 0x7F: // DEL + out << "&#" << std::to_string((unsigned char)ch) << ';'; + break; + default: - // � isn't allowed, but what shall we do? - if((ch>=0 && ch<' ') || ch==127) - out << "&#"+std::to_string((unsigned char)ch)+";"; - else - out << ch; + DATA_INVARIANT( + ch >= ' ', + "XML does not support escaping non-printable character " + + std::to_string((unsigned char)ch)); + out << ch; } } } @@ -135,16 +140,34 @@ void xmlt::escape_attribute(const std::string &s, std::ostream &out) out << """; break; + case 0x9: // TAB + case 0xA: // LF + case 0xD: // CR + case 0x7F: // DEL + out << "&#" << std::to_string((unsigned char)ch) << ';'; + break; + default: - // � isn't allowed, but what shall we do? - if((ch>=0 && ch<' ') || ch==127) - out << "&#"+std::to_string((unsigned char)ch)+";"; - else - out << ch; + DATA_INVARIANT( + ch >= ' ', + "XML does not support escaping non-printable character " + + std::to_string((unsigned char)ch)); + out << ch; } } } +bool xmlt::is_printable_xml(const std::string &s) +{ + for(const auto ch : s) + { + if(ch < 0x20 && ch != 0x9 && ch != 0xA && ch != 0xD) + return false; + } + + return true; +} + void xmlt::do_indent(std::ostream &out, unsigned indent) { out << std::string(indent, ' '); diff --git a/src/util/xml.h b/src/util/xml.h index ac788dfd87a..a78642607ad 100644 --- a/src/util/xml.h +++ b/src/util/xml.h @@ -123,6 +123,13 @@ class xmlt static void escape_attribute(const std::string &s, std::ostream &out); + /// Determine whether \p s does not contain any characters that cannot be + /// escaped in XML 1.0. See https://www.w3.org/TR/xml/#charsets for details. + /// \param s: string to verify + /// \return True if, and only if, all characters in \p s are taken from the + /// charset permitted in XML 1.0. + static bool is_printable_xml(const std::string &s); + protected: static void do_indent( std::ostream &out,