From c6c1938de4af1e709c6fb0982ad4ba51fff9004a Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 6 Jul 2018 18:59:26 +0100 Subject: [PATCH] utf8 to utf16 conversion must use native endianness There is no obvious reason why native endianness should not be used, and tests failed on big-endian architectures. --- jbmc/src/java_bytecode/java_string_literals.cpp | 2 +- src/util/unicode.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/jbmc/src/java_bytecode/java_string_literals.cpp b/jbmc/src/java_bytecode/java_string_literals.cpp index b090adfaef4..324988544ac 100644 --- a/jbmc/src/java_bytecode/java_string_literals.cpp +++ b/jbmc/src/java_bytecode/java_string_literals.cpp @@ -106,7 +106,7 @@ symbol_exprt get_or_create_string_literal_symbol( if(string_refinement_enabled) { const array_exprt data = - utf16_to_array(utf8_to_utf16_little_endian(id2string(value))); + utf16_to_array(utf8_to_utf16(id2string(value), false)); struct_exprt literal_init(new_symbol.type); literal_init.operands().resize(jls_struct.components().size()); diff --git a/src/util/unicode.h b/src/util/unicode.h index b8329ae1ba0..aeee035de1b 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -24,6 +24,7 @@ std::wstring widen(const std::string &s); std::string utf32_to_utf8(const std::basic_string &s); +std::wstring utf8_to_utf16(const std::string &in, bool swap_bytes); std::wstring utf8_to_utf16_big_endian(const std::string &); std::wstring utf8_to_utf16_little_endian(const std::string &); std::string utf16_little_endian_to_java(const wchar_t ch);