Skip to content

Commit

Permalink
fix 4byte symbols of utf8 (#302)
Browse files Browse the repository at this point in the history
* fix 4byte symbols of utf8

* fix windows build

* added json test

* update doc

* switch to u16 string

* operator== prototype

* u

* fix operator==

* windows fixes

* windows fixes

* try fix windows
  • Loading branch information
Alex2772 committed Jul 21, 2024
1 parent e500ded commit a00450b
Show file tree
Hide file tree
Showing 41 changed files with 486 additions and 279 deletions.
6 changes: 6 additions & 0 deletions .idea/copyright/AUI.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions .idea/copyright/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion aui.core/src/AUI/Common/AByteBufferBase64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ AString AByteBufferView::toBase64String() const {
size_t resultLength = 4 * ((size() + 2) / 3);
AString result(resultLength, '\0');
size_t i;
auto p = const_cast<wchar_t *>(result.c_str());
auto p = const_cast<char16_t*>(result.c_str());

for (i = 0; i < size() - 2; i += 3) {
*p++ = BASE64_CHARS[(data()[i] >> 2) & 0x3F];
Expand Down
28 changes: 14 additions & 14 deletions aui.core/src/AUI/Common/AColor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,34 +30,34 @@ AColor::AColor(const AString& s)

// #fff
case 4:
r = static_cast<float>(s.substr(1, 1).toNumberHex()) / 15.f;
g = static_cast<float>(s.substr(2, 1).toNumberHex()) / 15.f;
b = static_cast<float>(s.substr(3, 1).toNumberHex()) / 15.f;
r = static_cast<float>(s.substr(1, 1).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 15.f;
g = static_cast<float>(s.substr(2, 1).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 15.f;
b = static_cast<float>(s.substr(3, 1).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 15.f;
a = 1.f;
break;

// #ffff
case 5:
r = static_cast<float>(s.substr(1, 1).toNumberHex()) / 15.f;
g = static_cast<float>(s.substr(2, 1).toNumberHex()) / 15.f;
b = static_cast<float>(s.substr(3, 1).toNumberHex()) / 15.f;
a = static_cast<float>(s.substr(4, 1).toNumberHex()) / 15.f;
r = static_cast<float>(s.substr(1, 1).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 15.f;
g = static_cast<float>(s.substr(2, 1).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 15.f;
b = static_cast<float>(s.substr(3, 1).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 15.f;
a = static_cast<float>(s.substr(4, 1).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 15.f;
break;

// #ffffff
case 7:
r = static_cast<float>(s.substr(1, 2).toNumberHex()) / 255.f;
g = static_cast<float>(s.substr(3, 2).toNumberHex()) / 255.f;
b = static_cast<float>(s.substr(5, 2).toNumberHex()) / 255.f;
r = static_cast<float>(s.substr(1, 2).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 255.f;
g = static_cast<float>(s.substr(3, 2).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 255.f;
b = static_cast<float>(s.substr(5, 2).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 255.f;
a = 1.f;
break;

// #ffffffff
case 9:
r = static_cast<float>(s.substr(1, 2).toNumberHex()) / 255.f;
g = static_cast<float>(s.substr(3, 2).toNumberHex()) / 255.f;
b = static_cast<float>(s.substr(5, 2).toNumberHex()) / 255.f;
a = static_cast<float>(s.substr(7, 2).toNumberHex()) / 255.f;
r = static_cast<float>(s.substr(1, 2).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 255.f;
g = static_cast<float>(s.substr(3, 2).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 255.f;
b = static_cast<float>(s.substr(5, 2).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 255.f;
a = static_cast<float>(s.substr(7, 2).toNumberOrException(AString::TO_NUMBER_BASE_HEX)) / 255.f;
break;
}
}
Expand Down
3 changes: 3 additions & 0 deletions aui.core/src/AUI/Common/AStaticVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ class AStaticVector {
insert(mBegin, std::make_move_iterator(rhs.begin()), std::make_move_iterator(rhs.end()));
rhs.clear();
}
constexpr AStaticVector(std::initializer_list<StoredType> rhs) noexcept: AStaticVector() {
insert(mBegin, std::make_move_iterator(rhs.begin()), std::make_move_iterator(rhs.end()));
}
constexpr ~AStaticVector() {
for (auto& v : *this) {
v.~StoredType();
Expand Down
211 changes: 145 additions & 66 deletions aui.core/src/AUI/Common/AString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,44 +12,95 @@
#include <cstring>
#include "AString.h"
#include "AStringVector.h"
#include "AStaticVector.h"
#include <AUI/Common/AByteBuffer.h>

// utf8 stuff has a lot of magic
// NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers,cppcoreguidelines-pro-bounds-pointer-arithmetic)

inline static AStaticVector<char16_t, 4> toUtf16(char32_t i) {
if (i <= 0xffff) {
return { char16_t(i) };
}

i -= 0x10000;
return { char16_t((i >> 10) + 0xD800),
char16_t((i & 0x3FF) + 0xDC00) };
}

template<typename T>
inline static char32_t fromUtf16(T& iterator, T last) {
auto c1 = *(iterator++);
if (iterator == last) {
// incomplete sequence?
return static_cast<char32_t >(c1);
}
if (*iterator < 0xD800) {
return static_cast<char32_t >(c1);
}
auto c2 = *(iterator++);

if (c2 < 0xDC00) {
// bad entity
}

c1 -= 0xD800;
c2 -= 0xDC00;

return (char32_t(c1) << 10 | char32_t(c2 & 0x3FF)) + 0x10000;
}

inline static void fromUtf8_impl(AString& destination, const char* str, size_t length) {
destination.reserve(length);

// parse utf8
for (; length && *str; --length)
{
if (*str & 0x80)
{
// utf8 symbol
if (*str & 0b00100000)
{
// 3-byte symbol
wchar_t t = *(str++) & 0b1111;
t <<= 6;
t |= *(str++) & 0b111111;
t <<= 6;
t |= *(str++) & 0b111111;
destination.push_back(t);
length -= 2;
} else
{
// 2-byte symbol
wchar_t t = *(str++) & 0b11111;
t <<= 6;
t |= *(str++) & 0b111111;
destination.push_back(t);
length -= 1;
}
} else
{
if ((*str & 0b1000'0000) == 0) {
// ascii symbol
destination.push_back(*(str++));
continue;
}
// utf8 symbol

if ((*str & 0b1110'0000) == 0b1100'0000) {
// 2-byte symbol
char16_t t = *(str++) & 0b11111;
t <<= 6;
t |= *(str++) & 0b111111;
destination.push_back(t);
length -= 1;
continue;
}

if ((*str & 0b1111'0000) == 0b1110'0000) {
// 3-byte symbol
char16_t t = *(str++) & 0b1111;
t <<= 6;
t |= *(str++) & 0b111111;
t <<= 6;
t |= *(str++) & 0b111111;
destination.push_back(t);
length -= 2;
continue;
}

if ((*str & 0b1111'1000) == 0b1111'0000) {
// 4-byte symbol
char32_t t = *(str++) & 0b111;
t <<= 6;
t |= *(str++) & 0b111111;
t <<= 6;
t |= *(str++) & 0b111111;
t <<= 6;
t |= *(str++) & 0b111111;
destination.insertAll(toUtf16(t));
length -= 3;
continue;
}


str++; // bad entity?
}
}

Expand Down Expand Up @@ -82,37 +133,55 @@ AString AString::fromUtf8(const char* buffer, size_t length) {
AByteBuffer AString::toUtf8() const noexcept
{
AByteBuffer buf;
for (wchar_t c : *this)
for (auto it = begin(); it != end();)
{
if (c >= 0x80)
{
if (c >= 0x800)
{
char b[] = {
static_cast<char>(0b11100000 | (c >> 12 & 0b1111)),
static_cast<char>(0b10000000 | (c >> 6 & 0b111111)),
static_cast<char>(0b10000000 | (c & 0b111111)),
0,
};
buf << b;
} else if (c >= 0x80)
{
char b[] = {
static_cast<char>(0b11000000 | (c >> 6 & 0b11111)),
static_cast<char>(0b10000000 | (c & 0b111111)),
0,
};
buf << b;
}
} else
auto c = *it;
if (c < 0x80) {
buf << static_cast<char>(c);
++it;
continue;
}

if (c < 0x800) {
char b[] = {
static_cast<char>(0b11000000 | (c >> 6 & 0b11111)),
static_cast<char>(0b10000000 | (c & 0b111111)),
0,
};
buf << b;
++it;
continue;
}

if (c < 0xD800) {
char b[] = {
static_cast<char>(0b11100000 | (c >> 12 & 0b1111)),
static_cast<char>(0b10000000 | (c >> 6 & 0b111111)),
static_cast<char>(0b10000000 | (c & 0b111111)),
0,
};
buf << b;
++it;
continue;
}

{
buf << *reinterpret_cast<char*>(&c);
const auto c = fromUtf16(it, end());

char b[] = {
static_cast<char>(0b11110000 | (c >> 18 & 0b111)),
static_cast<char>(0b10000000 | (c >> 12 & 0b111111)),
static_cast<char>(0b10000000 | (c >> 6 & 0b111111)),
static_cast<char>(0b10000000 | (c & 0b111111)),
0,
};
buf << b;
}
}
return buf;
}

AStringVector AString::split(wchar_t c) const noexcept
AStringVector AString::split(char16_t c) const noexcept
{
if (empty()) {
return {};
Expand All @@ -121,7 +190,7 @@ AStringVector AString::split(wchar_t c) const noexcept
result.reserve(length() / 10);
for (size_type s = 0;;)
{
auto next = std::wstring::find(c, s);
auto next = super::find(c, s);
if (next == npos)
{
result << substr(s);
Expand All @@ -134,7 +203,7 @@ AStringVector AString::split(wchar_t c) const noexcept
return result;
}

AString AString::trimLeft(wchar_t symbol) const noexcept
AString AString::trimLeft(char16_t symbol) const noexcept
{
for (auto i = begin(); i != end(); ++i)
{
Expand All @@ -146,7 +215,7 @@ AString AString::trimLeft(wchar_t symbol) const noexcept
return {};
}

AString AString::trimRight(wchar_t symbol) const noexcept
AString AString::trimRight(char16_t symbol) const noexcept
{
for (auto i = rbegin(); i != rend(); ++i)
{
Expand All @@ -158,7 +227,7 @@ AString AString::trimRight(wchar_t symbol) const noexcept
return {};
}

AString& AString::replaceAll(wchar_t from, wchar_t to) noexcept {
AString& AString::replaceAll(char16_t from, char16_t to) noexcept {
for (auto& s : *this) {
if (s == from)
s = to;
Expand Down Expand Up @@ -238,22 +307,32 @@ AString AString::fromLatin1(const char* buffer) {
}


int AString::toNumberDec() const noexcept
{
int n;
if (std::swscanf(c_str(), L"%d", &n) < 0)
return -1;
AOptional<int> AString::toNumber(aui::ranged_number<int, 2, 36> base) const noexcept {
int result = 0;
const auto NUMBER_LAST = std::min(int('0' + int(base) - 1), int('9'));
const auto LETTER_LAST = 'a' + int(base) - 11;
const auto LETTER_LAST_CAPITAL = 'A' + int(base) - 11;
for (auto c : *this) {
if (c >= '0' && c <= NUMBER_LAST) {
result = result * base + (c - '0');
continue;
}

return n;
}
if (int(base) > 10) {
if (c >= 'a' && c <= LETTER_LAST) {
result = result * base + (c - 'a' + 10);
continue;
}

int AString::toNumberHex() const noexcept
{
int n;
if (std::swscanf(c_str(), L"%x", &n) < 0)
return -1;
if (c >= 'A' && c <= LETTER_LAST_CAPITAL) {
result = result * base + (c - 'A' + 10);
continue;
}
}
return std::nullopt;
}

return n;
return result;
}

std::string AString::toStdString() const noexcept
Expand Down Expand Up @@ -1080,7 +1159,7 @@ AString AString::lowercase() const {
}

void AString::resizeToNullTerminator() {
wchar_t* i = data();
char16_t* i = data();
for (; *i; ++i);
resize(i - data());
}
Expand Down
Loading

0 comments on commit a00450b

Please sign in to comment.