Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tool_cb_wrt: fix invalid unicode for windows console #10890

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/tool_cb_hdr.c
Expand Up @@ -87,6 +87,12 @@ size_t tool_header_cb(char *ptr, size_t size, size_t nmemb, void *userdata)
}
#endif

#ifdef WIN32
/* Discard incomplete UTF-8 sequence buffered from body */
if(outs->utf8seq[0])
memset(outs->utf8seq, 0, sizeof(outs->utf8seq));
#endif

/*
* Write header data when curl option --dump-header (-D) is given.
*/
Expand Down
136 changes: 116 additions & 20 deletions src/tool_cb_wrt.c
Expand Up @@ -233,35 +233,131 @@ size_t tool_write_cb(char *buffer, size_t sz, size_t nmemb, void *userdata)

#ifdef WIN32
fhnd = _get_osfhandle(fileno(outs->stream));
/* if windows console then UTF-8 must be converted to UTF-16 */
if(isatty(fileno(outs->stream)) &&
GetConsoleScreenBufferInfo((HANDLE)fhnd, &console_info)) {
DWORD in_len = (DWORD)(sz * nmemb);
wchar_t* wc_buf;
wchar_t *wc_buf;
DWORD wc_len;
unsigned char *rbuf = (unsigned char *)buffer;
DWORD rlen = (DWORD)bytes;

/* calculate buffer size for wide characters */
wc_len = MultiByteToWideChar(CP_UTF8, 0, buffer, in_len, NULL, 0);
wc_buf = (wchar_t*) malloc(wc_len * sizeof(wchar_t));
if(!wc_buf)
return CURL_WRITEFUNC_ERROR;
#define IS_TRAILING_BYTE(x) (0x80 <= (x) && (x) < 0xC0)

/* calculate buffer size for multi-byte characters */
wc_len = MultiByteToWideChar(CP_UTF8, 0, buffer, in_len, wc_buf, wc_len);
if(!wc_len) {
free(wc_buf);
return CURL_WRITEFUNC_ERROR;
/* attempt to complete an incomplete UTF-8 sequence from previous call */
if(outs->utf8seq[0] && rlen) {
bool complete = false;
/* two byte sequence (lead byte 110yyyyy) */
if(0xC0 <= outs->utf8seq[0] && outs->utf8seq[0] < 0xE0) {
outs->utf8seq[1] = *rbuf++;
--rlen;
complete = true;
}
/* three byte sequence (lead byte 1110zzzz) */
else if(0xE0 <= outs->utf8seq[0] && outs->utf8seq[0] < 0xF0) {
if(!outs->utf8seq[1]) {
outs->utf8seq[1] = *rbuf++;
--rlen;
}
if(rlen && !outs->utf8seq[2]) {
outs->utf8seq[2] = *rbuf++;
--rlen;
complete = true;
}
}
/* four byte sequence (lead byte 11110uuu) */
else if(0xF0 <= outs->utf8seq[0] && outs->utf8seq[0] < 0xF8) {
if(!outs->utf8seq[1]) {
outs->utf8seq[1] = *rbuf++;
--rlen;
}
if(rlen && !outs->utf8seq[2]) {
outs->utf8seq[2] = *rbuf++;
--rlen;
}
if(rlen && !outs->utf8seq[3]) {
outs->utf8seq[3] = *rbuf++;
--rlen;
complete = true;
}
}

if(complete) {
WCHAR prefix[3] = {0}; /* UTF-16 (1-2 WCHARs) + NUL */

if(MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)outs->utf8seq, -1,
prefix, sizeof(prefix)/sizeof(prefix[0]))) {
DEBUGASSERT(prefix[3] == L'\0');
if(!WriteConsoleW(
(HANDLE) fhnd,
prefix,
prefix[1] ? 2 : 1,
NULL,
NULL)) {
return CURL_WRITEFUNC_ERROR;
}
}
/* else: UTF-8 input was not well formed and OS is pre-Vista which
drops invalid characters instead of writing U+FFFD to output. */

memset(outs->utf8seq, 0, sizeof(outs->utf8seq));
}
}

if(!WriteConsoleW(
(HANDLE) fhnd,
wc_buf,
wc_len,
&wc_len,
NULL)) {
/* suppress an incomplete utf-8 sequence at end of rbuf */
if(!outs->utf8seq[0] && rlen && (rbuf[rlen - 1] & 0x80)) {
/* check for lead byte from a two, three or four byte sequence */
if(0xC0 <= rbuf[rlen - 1] && rbuf[rlen - 1] < 0xF8) {
outs->utf8seq[0] = rbuf[rlen - 1];
rlen -= 1;
}
else if(rlen >= 2 && IS_TRAILING_BYTE(rbuf[rlen - 1])) {
/* check for lead byte from a three or four byte sequence */
if(0xE0 <= rbuf[rlen - 2] && rbuf[rlen - 2] < 0xF8) {
outs->utf8seq[0] = rbuf[rlen - 2];
outs->utf8seq[1] = rbuf[rlen - 1];
rlen -= 2;
}
else if(rlen >= 3 && IS_TRAILING_BYTE(rbuf[rlen - 2])) {
/* check for lead byte from a four byte sequence */
if(0xF0 <= rbuf[rlen - 3] && rbuf[rlen - 3] < 0xF8) {
outs->utf8seq[0] = rbuf[rlen - 3];
outs->utf8seq[1] = rbuf[rlen - 2];
outs->utf8seq[2] = rbuf[rlen - 1];
rlen -= 3;
}
}
}
}

if(rlen) {
/* calculate buffer size for wide characters */
wc_len = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)rbuf, rlen, NULL, 0);
if(!wc_len)
return CURL_WRITEFUNC_ERROR;

wc_buf = (wchar_t*) malloc(wc_len * sizeof(wchar_t));
if(!wc_buf)
return CURL_WRITEFUNC_ERROR;

wc_len = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)rbuf, rlen, wc_buf,
wc_len);
if(!wc_len) {
free(wc_buf);
return CURL_WRITEFUNC_ERROR;
}

if(!WriteConsoleW(
(HANDLE) fhnd,
wc_buf,
wc_len,
NULL,
NULL)) {
free(wc_buf);
return CURL_WRITEFUNC_ERROR;
}
free(wc_buf);
return CURL_WRITEFUNC_ERROR;
}
free(wc_buf);

rc = bytes;
}
else
Expand Down
6 changes: 6 additions & 0 deletions src/tool_operate.c
Expand Up @@ -464,6 +464,12 @@ static CURLcode post_per_transfer(struct GlobalConfig *global,
}
}

#ifdef WIN32
/* Discard incomplete UTF-8 sequence buffered from body */
if(outs->utf8seq[0])
memset(outs->utf8seq, 0, sizeof(outs->utf8seq));
#endif

/* if retry-max-time is non-zero, make sure we haven't exceeded the
time */
if(per->retry_numretries &&
Expand Down
6 changes: 6 additions & 0 deletions src/tool_sdecls.h
Expand Up @@ -57,6 +57,9 @@
* 'init' member holds original file size or offset at which truncation is
* taking place. Always zero unless appending to a non-empty regular file.
*
* [Windows]
* 'utf8seq' member holds an incomplete UTF-8 sequence destined for the console
* until it can be completed (1-4 bytes) + NUL.
*/

struct OutStruct {
Expand All @@ -68,6 +71,9 @@ struct OutStruct {
FILE *stream;
curl_off_t bytes;
curl_off_t init;
#ifdef WIN32
unsigned char utf8seq[5];
#endif
};

/*
Expand Down