Skip to content

Commit

Permalink
replace ConvertUTF with ww898 utf-cpp (#1061)
Browse files Browse the repository at this point in the history
 introduce ww898-utfcpp instead of ConvertUTF (touch #692)
  • Loading branch information
elfmz committed Aug 25, 2021
1 parent 8c8a2db commit 50717df
Show file tree
Hide file tree
Showing 31 changed files with 1,230 additions and 1,467 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/artifacts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
- uses: actions/checkout@v2

- name: Dependencies
run: sudo apt-get -y install $(cat dependencies${{ matrix.dependencies }}.txt)
run: sudo apt-get update ; sudo apt-get -y install $(cat dependencies${{ matrix.dependencies }}.txt)

- name: Create Build Environment
# Some projects don't allow in-source building, so create a separate build directory
Expand Down
97 changes: 54 additions & 43 deletions WinPort/src/APIStringCodepages.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,58 +16,72 @@
#include "WinPort.h"
#include "wineguts.h"
#include "PathHelpers.h"
#include "ConvertUTF.h"
#include "UtfConvert.hpp"



// ConversionResult (* fnCalcSpace) (int *out, const SRC_T** src, const SRC_T* src_end, ConversionFlags flags),
// ConversionResult (* fnConvert) (const SRC_T** src, const SRC_T* src_end, DST_T** dst, DST_T* dst_end, ConversionFlags flag
template <class SRC_T, class DST_T>
int utf_translation(
ConversionResult (* fnCalcSpace) (int *out, const SRC_T** src, const SRC_T* src_end, ConversionFlags flags),
ConversionResult (* fnConvert) (const SRC_T** src, const SRC_T* src_end, DST_T** dst, DST_T* dst_end, ConversionFlags flags),
int flags, const SRC_T *src, int srclen, DST_T *dst, int dstlen)
int utf_translation(int flags, const SRC_T *src, int srclen, DST_T *dst, int dstlen)
{
int ret;
const ConversionFlags cf = ((flags&MB_ERR_INVALID_CHARS)!=0) ? strictConversion : lenientConversion;
const SRC_T *source = (const SRC_T *)src, *source_end = (const SRC_T *)src;
if (srclen==-1) {
for(;*source_end;++source_end);
const bool fail_on_illformed = ((flags & MB_ERR_INVALID_CHARS) != 0);
size_t srclen_sz;
if (srclen < 0) {
for (srclen_sz = 0; src[srclen_sz]; ++srclen_sz) {}
// per MSDN - convertion should include terminating NUL char
++srclen_sz;

} else {
for(;srclen;++source_end, --srclen);
srclen_sz = (size_t)srclen;
}

if (dstlen==0) {
if (fnCalcSpace (&ret, &source, source_end, cf)!=conversionOK) {
WINPORT(SetLastError)( ERROR_NO_UNICODE_TRANSLATION );
}

} else {
DST_T *target = (DST_T *)dst;
DST_T *target_end = target + dstlen;

ConversionResult cr = fnConvert(&source, source_end, &target, target_end, cf);
if (cr==targetExhausted) {
ret = 0;
WINPORT(SetLastError)( ERROR_INSUFFICIENT_BUFFER );
} else {
ret = target - (DST_T *)dst;
if (cr!=conversionOK) {
WINPORT(SetLastError)( ERROR_NO_UNICODE_TRANSLATION );
if (dstlen == 0) {
DummyPushBack<DST_T> pb;
try {
const unsigned ucr = UtfConvert(src, srclen_sz, pb, fail_on_illformed);
if (ucr & (CONV_ILLFORMED_CHARS | CONV_NEED_MORE_SRC)) {
WINPORT(SetLastError)( ERROR_NO_UNICODE_TRANSLATION );
}

} catch (std::exception &e) {
fprintf(stderr, "%s: %s\n", __FUNCTION__, e.what());
WINPORT(SetLastError)( ERROR_NO_UNICODE_TRANSLATION );
}
return (int)pb.size();
}

ArrayPushBack<DST_T> pb(dst, dst + dstlen);
try {
const unsigned ucr = UtfConvert(src, srclen_sz, pb, fail_on_illformed);
if (ucr & (CONV_ILLFORMED_CHARS | CONV_NEED_MORE_SRC)) {
WINPORT(SetLastError)( ERROR_NO_UNICODE_TRANSLATION );

} else if (ucr & CONV_NEED_MORE_DST) {
WINPORT(SetLastError)( ERROR_INSUFFICIENT_BUFFER );
return 0;
}

} catch (ArrayPushBackOverflow &e) {
WINPORT(SetLastError)( ERROR_INSUFFICIENT_BUFFER );
return 0;

} catch (std::exception &e) {
fprintf(stderr, "%s: %s\n", __FUNCTION__, e.what());
WINPORT(SetLastError)( ERROR_NO_UNICODE_TRANSLATION );
}
return ret;

return (int)pb.size();
}

static int utf32_utf8_wcstombs( int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
{
return utf_translation<UTF32, UTF8>( CalcSpaceUTF32toUTF8, ConvertUTF32toUTF8,
flags, (const UTF32 *)src, srclen, (UTF8 *)dst, dstlen);
return utf_translation(flags, (const uint32_t *)src, srclen, (uint8_t *)dst, dstlen);
}

static int utf32_utf8_mbstowcs( int flags, const char *src, int srclen, WCHAR *dst, int dstlen)
{
return utf_translation<UTF8, UTF32>( CalcSpaceUTF8toUTF32, ConvertUTF8toUTF32,
flags, (const UTF8 *)src, srclen, (UTF32 *)dst, dstlen);
return utf_translation(flags, (const uint8_t *)src, srclen, (uint32_t *)dst, dstlen);
}

static int wide_cvtstub( int flags, const wchar_t *src, int srclen, wchar_t *dst, int dstlen)
Expand All @@ -88,11 +102,10 @@ static int wide_utf16_wcstombs( int flags, const wchar_t *src, int srclen, char
{
int ret;

if (dstlen > 0) dstlen/= sizeof(UTF16);
if (dstlen > 0) dstlen/= sizeof(uint16_t);

if (sizeof(WCHAR)==4) {
ret = utf_translation<UTF32, UTF16>( CalcSpaceUTF32toUTF16, ConvertUTF32toUTF16,
flags, (const UTF32 *)src, srclen, (UTF16 *)dst, dstlen);
if (sizeof(WCHAR) == 4) {
ret = utf_translation(flags, (const uint32_t *)src, srclen, (uint16_t *)dst, dstlen);
} else
ret = wide_cvtstub( flags, src, srclen, (wchar_t *)dst, dstlen);

Expand All @@ -102,7 +115,7 @@ static int wide_utf16_wcstombs( int flags, const wchar_t *src, int srclen, char
std::swap(dst[i * 2], dst[i * 2 + 1]);
}
}
ret*= sizeof(UTF16);
ret*= sizeof(uint16_t);
}

return ret;
Expand Down Expand Up @@ -145,14 +158,14 @@ static int wide_utf16_mbstowcs( int flags, const char *src, int srclen, WCHAR *d
{
int ret;

if (srclen > 0) srclen/= sizeof(UTF16);
if (srclen > 0) srclen/= sizeof(uint16_t);

char *tmp = NULL;
if (reverse) {
if (srclen==-1) srclen = wcslen((const wchar_t *)src) + 1;

const bool onstack = (srclen < 0x10000);
tmp = (char *) (onstack ? alloca(srclen * sizeof(UTF16)) : malloc(srclen * sizeof(UTF16)));
tmp = (char *) (onstack ? alloca(srclen * sizeof(uint16_t)) : malloc(srclen * sizeof(uint16_t)));

if (!tmp)
return -2;
Expand All @@ -166,9 +179,7 @@ static int wide_utf16_mbstowcs( int flags, const char *src, int srclen, WCHAR *d
}

if (sizeof(WCHAR)==4) {

ret = utf_translation<UTF16, UTF32>( CalcSpaceUTF16toUTF32, ConvertUTF16toUTF32,
flags, (const UTF16 *)src, srclen, (UTF32 *)dst, dstlen);
ret = utf_translation(flags, (const uint16_t *)src, srclen, (uint32_t *)dst, dstlen);
} else
ret = wide_cvtstub( flags, (const wchar_t *)src, srclen, dst, dstlen);

Expand Down
27 changes: 5 additions & 22 deletions WinPort/src/Backend/TTY/TTYFar2lClipboardBackend.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <fcntl.h>
#include <utils.h>
#include <base64.h>
#include <ConvertUTF.h>
#include <UtfConvert.hpp>
#include "TTYFar2lClipboardBackend.h"
#include "FSClipboardBackend.h"

Expand Down Expand Up @@ -162,17 +162,8 @@ void *TTYFar2lClipboardBackend::OnClipboardSetData(UINT format, void *data)
#if (__WCHAR_MAX__ <= 0xffff)
UTF32 *new_data = nullptr;
if (format == CF_UNICODETEXT && len != 0) { // UTF16 -> UTF32
int cnt = 0;
const UTF16 *src = (const UTF16 *)data;
CalcSpaceUTF16toUTF32(&cnt, &src, src + len / sizeof(UTF16), lenientConversion);
new_data = (UTF32 *)malloc((cnt + 1) * sizeof(UTF32));
if (new_data != nullptr) {
new_data[cnt] = 0;
src = (const UTF16 *)data;
UTF32 *dst = new_data;
ConvertUTF16toUTF32( &src, src + len / sizeof(UTF16), &dst, dst + cnt, lenientConversion);
len = cnt * sizeof(UTF32);
}
new_data = UtfConverter<uint16_t, uint32_t>
((const uint16_t*)data, len / sizeof(uint16_t)).MallocedCopy(len);
}
stk_ser.Push(new_data ? new_data : data, len);
stk_ser.PushPOD(len);
Expand Down Expand Up @@ -213,18 +204,10 @@ void *TTYFar2lClipboardBackend::OnClipboardGetData(UINT format)
stk_ser.Pop(data, len);
#if (__WCHAR_MAX__ <= 0xffff)
if (format == CF_UNICODETEXT) { // UTF32 -> UTF16
int cnt = 0;
const UTF32 *src = (const UTF32 *)data;
CalcSpaceUTF32toUTF16(&cnt, &src, src + len / sizeof(UTF32), lenientConversion);
UTF16 *new_data = (UTF16 *)malloc((cnt + 1) * sizeof(UTF16));
void *new_data = UtfConverter<uint32_t, uint16_t>
((const uint32_t*)data, len / sizeof(uint32_t)).MallocedCopy(len);
if (new_data != nullptr) {
new_data[cnt] = 0;
src = (const UTF32 *)data;
UTF16 *dst = new_data;
ConvertUTF32toUTF16( &src, src + len / sizeof(UTF32), &dst, dst + cnt, lenientConversion);
free(data);
data = new_data;
len = cnt * sizeof(UTF16);
}
}
#endif
Expand Down
33 changes: 9 additions & 24 deletions WinPort/src/Backend/TTY/TTYInput.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <assert.h>
#include "TTYInput.h"
#include "ConsoleInput.h"
#include "ConvertUTF.h"
#include "WideMB.h"
#include "WinPort.h"
#include <utils.h>

Expand Down Expand Up @@ -30,30 +30,15 @@ void TTYInput::PostCharEvent(wchar_t ch)

size_t TTYInput::BufTryDecodeUTF8()
{
const UTF8* utf8_start = (const UTF8*) &_buf[0];

#if (__WCHAR_MAX__ > 0xffff)
UTF32 utf32[2] = {}, *utf32_start = &utf32[0];
ConvertUTF8toUTF32 ( &utf8_start, utf8_start + _buf.size(),
&utf32_start, utf32_start + 1, lenientConversion);
if (utf32_start != &utf32[0]) {
PostCharEvent(utf32[0]);
return (utf8_start - (const UTF8*)&_buf[0]);

wchar_t wc;
size_t l = _buf.size();
const auto cr = MB2Wide_Unescaped(_buf.data(), l, wc, false);
if (cr & CONV_NEED_MORE_SRC) {
return TTY_PARSED_WANTMORE;
}
#else
UTF16 utf16[2] = {}, *utf16_start = &utf16[0];
ConvertUTF8toUTF16 ( &utf8_start, utf8_start + _buf.size(),
&utf16_start, utf16_start + 1, lenientConversion);
if (utf16_start != &utf16[0]) {
PostCharEvent(utf16[0]);
return (utf8_start - (const UTF8*)&_buf[0]);

}
#endif
_buf.erase(_buf.begin(), _buf.begin() + (utf8_start - (const UTF8*)&_buf[0]) );

return TTY_PARSED_WANTMORE;
assert(l);
PostCharEvent(wc);
return l;
}

void TTYInput::OnBufUpdated(bool idle)
Expand Down
19 changes: 7 additions & 12 deletions WinPort/src/Backend/TTY/TTYInputSequenceParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "ConsoleInput.h"
#include "WinPort.h"

#include "ConvertUTF.h"
#include "WideMB.h"


//See:
Expand Down Expand Up @@ -279,26 +279,21 @@ size_t TTYInputSequenceParser::ParseNChars2Key(const char *s, size_t l)

} else if ( (s[0] & 0b11000000) == 0b11000000) {
// looks like alt + multibyte UTF8 sequence
const UTF8 *src = (const UTF8 *)&s[0];
#if (__WCHAR_MAX__ > 0xffff)
UTF32 wc_buf[2] = {}, *wc = &wc_buf[0];
ConvertUTF8toUTF32(&src, src + l, &wc, wc + 1, lenientConversion);
#else
UTF16 wc_buf[2] = {}, *wc = &wc_buf[0];
ConvertUTF8toUTF16(&src, src + l, &wc, wc + 1, lenientConversion);
#endif
if (wc != &wc_buf[0]) {
wchar_t wc;
size_t l_used = l;
MB2Wide_Unescaped(s, l_used, wc, true);
if (l_used) {
INPUT_RECORD ir = {};
ir.EventType = KEY_EVENT;
ir.Event.KeyEvent.wRepeatCount = 1;
ir.Event.KeyEvent.uChar.UnicodeChar = wc_buf[0];
ir.Event.KeyEvent.uChar.UnicodeChar = wc;
ir.Event.KeyEvent.wVirtualKeyCode = VK_OEM_PERIOD;
ir.Event.KeyEvent.dwControlKeyState|= LEFT_ALT_PRESSED;
ir.Event.KeyEvent.bKeyDown = TRUE;
_ir_pending.emplace_back(ir); // g_winport_con_in.Enqueue(&ir, 1);
ir.Event.KeyEvent.bKeyDown = FALSE;
_ir_pending.emplace_back(ir); // g_winport_con_in.Enqueue(&ir, 1);
return src - (const UTF8*) &s[0];
return l_used;
}
}

Expand Down
43 changes: 15 additions & 28 deletions WinPort/src/Backend/TTY/TTYOutput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <string>
#include <os_call.hpp>
#include "TTYOutput.h"
#include "ConvertUTF.h"
#include "WideMB.h"

#define ESC "\x1b"

Expand Down Expand Up @@ -73,27 +73,11 @@ void TTYOutput::FinalizeSameChars()
return;
}

char buf[64];
int len = 1;

if (_same_chars.wch >= 0x80) {
UTF8 *dst = (UTF8 *)&buf[0];
#if (__WCHAR_MAX__ > 0xffff)
const UTF32* src = (const UTF32*)&_same_chars.wch;
if (ConvertUTF32toUTF8 (&src, src + 1, &dst,
dst + ARRAYSIZE(buf), lenientConversion) == conversionOK) {
#else
const UTF16* src = (const UTF16*)&_same_chars.wch;
if (ConvertUTF16toUTF8 (&src, src + 1, &dst,
dst + ARRAYSIZE(buf), lenientConversion) == conversionOK) {
#endif
len = (int)(dst - (UTF8 *)&buf[0]);
assert(size_t(len) <= ARRAYSIZE(buf));
} else {
buf[0] = '?';
}
_same_chars.tmp.clear();
Wide2MB_UnescapedAppend(_same_chars.wch, _same_chars.tmp);
} else {
buf[0] = (char)(unsigned char)_same_chars.wch;
_same_chars.tmp = (char)(unsigned char)_same_chars.wch;
}

// When have queued enough count of same characters:
Expand All @@ -102,24 +86,27 @@ void TTYOutput::FinalizeSameChars()
// - Otherwise just output copies of repeated char sequence
if (_same_chars.count <= 5
|| (!_far2l_tty && (_same_chars.wch != L' ' || _same_chars.count <= 8))) {

// output plain <count> copies of repeated char sequence
_rawbuf.reserve(_rawbuf.size() + len * _same_chars.count);
_rawbuf.reserve(_rawbuf.size() + _same_chars.tmp.size() * _same_chars.count);
do {
_rawbuf.insert(_rawbuf.end(), &buf[0], &buf[len]);
_rawbuf.insert(_rawbuf.end(), _same_chars.tmp.begin(), _same_chars.tmp.end());
} while (--_same_chars.count);

} else {
char sz[32];
int sz_len;
if (_far2l_tty) {
_rawbuf.insert(_rawbuf.end(), &buf[0], &buf[len]);
len = sprintf(buf, // repeat last character <count-1> times
_rawbuf.insert(_rawbuf.end(), _same_chars.tmp.begin(), _same_chars.tmp.end());
sz_len = sprintf(sz, // repeat last character <count-1> times
ESC "[%ub", _same_chars.count - 1);
} else {
len = sprintf(buf, // erase <count> chars and move cursor forward by <count>
sz_len = sprintf(sz, // erase <count> chars and move cursor forward by <count>
ESC "[%uX" ESC "[%uC", _same_chars.count, _same_chars.count);
}
if (len >= 0) {
assert(size_t(len) <= ARRAYSIZE(buf));
_rawbuf.insert(_rawbuf.end(), &buf[0], &buf[len]);
if (sz_len >= 0) {
assert(size_t(sz_len) <= ARRAYSIZE(sz));
_rawbuf.insert(_rawbuf.end(), &sz[0], &sz[sz_len]);
}
_same_chars.count = 0;
}
Expand Down
1 change: 1 addition & 0 deletions WinPort/src/Backend/TTY/TTYOutput.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class TTYOutput
struct {
WCHAR wch = 0;
unsigned int count = 0;
std::string tmp;
} _same_chars;
bool _far2l_tty;

Expand Down
Loading

0 comments on commit 50717df

Please sign in to comment.