Permalink
Browse files

Fix utf8 locale on windows

* Add Catch framework
* fix utf8 locale on windows.
  • Loading branch information...
cor3ntin committed Mar 13, 2018
1 parent 3026ec2 commit 83e0b956bbfeeb55b58b24838552ad2bcbff2ff1
@@ -5,6 +5,9 @@
# project name
project(aseba)
add_compile_options("$<$<C_COMPILER_ID:MSVC>:/utf-8>")
add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
# ensure a minimal version of CMake
cmake_minimum_required(VERSION 3.5)
include(FeatureSummary)
@@ -20,6 +23,8 @@ include(CMakeModules/enki.cmake)
include(CMakeModules/deployqt.cmake)
include(CMakeModules/codesign.cmake)
add_subdirectory(third_party/catch2)
# testing and defines
enable_testing()
@@ -36,8 +36,11 @@
#include <vector>
#include <stdexcept>
#include <cstdint>
#include <locale>
#include <codecvt>
#include "utils.h"
// workaround for broken libstdc++ on Android
// see https://github.com/android-ndk/ndk/issues/82
#ifdef __ANDROID__
@@ -195,6 +198,27 @@ namespace Aseba
return os;
}
/* //FIXME:
* Lacking proper unicode facilities,
* we rely on the locale to do the caracter categorization for us.
* But because we want the categorization to obey the unicode specification while the
* current locale may not be based on unicode, we need to force a locale.
*
* Note that this approach does not work reliably
* - There is no reason for us to use wchat_t outside of win32 api calls boundaries
* - A wchar_t may not encode a unicode character at all
* - Even if it does, we may be dealing with a surrogate pair which would be encoded as 2 wchar_t
* - Or a multiple-codepoint grapheme
*/
bool is_utf8_alpha_num(wchar_t c) {
#ifdef _WIN32
return IsCharAlphaNumericW(c);
#else
static std::locale utf8Locale("en_US.UTF-8");
return std::isalnum(c, utf8Locale);
#endif
}
/*
This code is heavily inspired by http://www.cplusplus.com/forum/general/7142/
@@ -177,6 +177,8 @@ namespace Aseba
//! Transform a UTF8 string into a wstring, this function is thread-safe
std::wstring UTF8ToWString(const std::string& s);
bool is_utf8_alpha_num(wchar_t c);
//! Update the XModem CRC (x^16 + x^12 + x^5 + 1 (0x1021)) with a wstring
uint16_t crcXModem(const uint16_t oldCrc, const std::wstring& s);
@@ -20,6 +20,7 @@
#include "compiler.h"
#include "common/utils/FormatableString.h"
#include "common/utils/utils.h"
#include <cstdlib>
#include <sstream>
#include <ostream>
@@ -170,26 +171,6 @@ namespace Aseba
oss << L" : " << sValue;
return oss.str();
}
/* //FIXME:
* Lacking proper unicode facilities,
* we rely on the locale to do the caracter categorization for us.
* But because we want the categorization to obey the unicode specification while the
* current locale may not be based on unicode, we need to force a locale.
*
* Note that this approach does not work reliably
* - There is no reason for us to use wchat_t outside of win32 api calls boundaries
* - A wchar_t may not encode a unicode character at all
* - Even if it does, we may be dealing with a surrogate pair which would be encoded as 2 wchar_t
* - Or a multiple-codepoint grapheme
*/
template <typename CharT>
bool is_utf8_alpha_num(CharT c) {
static std::locale utf8Locale("en_US.UTF-8");
return std::isalnum(c, utf8Locale);
}
//! Parse source and build tokens vector
//! \param source source code
void Compiler::tokenize(std::wistream& source)
@@ -4,3 +4,7 @@ add_executable(aseba-test-invalid-utf8
)
target_link_libraries(aseba-test-invalid-utf8 asebacommon)
add_test(NAME aseba-test-invalid-utf8 COMMAND aseba-test-invalid-utf8)
add_executable(tst_compiler_utf8 utf8.cpp)
add_test(NAME tst_compiler_utf8 COMMAND tst_compiler_utf8)
target_link_libraries(tst_compiler_utf8 asebacommon catch2)
@@ -0,0 +1,14 @@
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>
#include "common/utils/utils.h"
TEST_CASE("Test utf8 chars [utf8]") {
using namespace Aseba;
REQUIRE(is_utf8_alpha_num(L'0'));
REQUIRE(is_utf8_alpha_num(L'a'));
REQUIRE(is_utf8_alpha_num(L'\u00E9'));
REQUIRE(is_utf8_alpha_num(L'é'));
REQUIRE(is_utf8_alpha_num(L'ä'));
REQUIRE(is_utf8_alpha_num(L'ǔ'));
REQUIRE(is_utf8_alpha_num(L'Ĵ'));
}
@@ -0,0 +1,2 @@
add_library(catch2 INTERFACE)
target_include_directories(catch2 INTERFACE include)
Oops, something went wrong.

0 comments on commit 83e0b95

Please sign in to comment.