From ece1672ae805cc907262ea6850145129db6a9f8b Mon Sep 17 00:00:00 2001 From: aG0aep6G Date: Sat, 21 Apr 2018 14:49:05 +0200 Subject: [PATCH 1/5] fix conversion from char to wchar_t in LockingTextWriter.put --- std/stdio.d | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/std/stdio.d b/std/stdio.d index a1b59db38a7..2c7e7def4d4 100644 --- a/std/stdio.d +++ b/std/stdio.d @@ -2923,7 +2923,7 @@ is empty, throws an `Exception`. In case of an I/O error throws // the file's orientation (byte- or wide-oriented) int orientation_; - // A buffer for when we need to transcode. + // Buffers for when we need to transcode. wchar highSurrogate = '\0'; // '\0' indicates empty void highSurrogateShouldBeEmpty() @safe { @@ -2931,6 +2931,8 @@ is empty, throws an `Exception`. In case of an I/O error throws if (highSurrogate != '\0') throw new UTFException("unpaired surrogate UTF-16 value"); } + char[4] rbuf8; + size_t rbuf8Filled = 0; public: this(ref File f) @trusted @@ -3018,6 +3020,7 @@ is empty, throws an `Exception`. In case of an I/O error throws void put(C)(scope C c) @safe if (isSomeChar!C || is(C : const(ubyte))) { import std.traits : Parameters; + import std.utf : decodeFront, encode, stride; static auto trustedFPUTC(int ch, _iobuf* h) @trusted { return FPUTC(ch, h); @@ -3029,10 +3032,29 @@ is empty, throws an `Exception`. In case of an I/O error throws static if (c.sizeof == 1) { - // simple char highSurrogateShouldBeEmpty(); if (orientation_ <= 0) trustedFPUTC(c, handle_); - else trustedFPUTWC(c, handle_); + else if (c <= 0x7F) trustedFPUTWC(c, handle_); + else if (c >= 0b1100_0000) // start byte of multibyte sequence + { + rbuf8[0] = c; + rbuf8Filled = 1; + } + else // continuation byte of multibyte sequence + { + rbuf8[rbuf8Filled] = c; + ++rbuf8Filled; + if (stride(rbuf8[]) == rbuf8Filled) // sequence is complete + { + char[] str = rbuf8[0 .. rbuf8Filled]; + immutable dchar d = decodeFront(str); + wchar_t[4 / wchar_t.sizeof] wbuf; + immutable size = encode(wbuf, d); + foreach (i; 0 .. size) + trustedFPUTWC(wbuf[i], handle_); + rbuf8Filled = 0; + } + } } else static if (c.sizeof == 2) { @@ -3640,6 +3662,18 @@ void main() } assert(std.file.readText!string(deleteme).stripLeft("\uFEFF") == "foobar"); } +@safe unittest // char -> wchar_t +{ + static import std.file; + auto deleteme = testFilename(); + scope(exit) std.file.remove(deleteme); + { + auto writer = File(deleteme, "w,ccs=UTF-16LE").lockingTextWriter(); + writer.put("ö"); + writer.put("\U0001F608"); + } + assert(std.file.readText!wstring(deleteme) == "ö\U0001F608"w); +} @safe unittest { From 41f78fc1e111323a5097a2f96d8e232cb27e01d1 Mon Sep 17 00:00:00 2001 From: aG0aep6G Date: Sat, 21 Apr 2018 15:39:34 +0200 Subject: [PATCH 2/5] fix conversion from wchar to wchar_t in LockingTextWriter.put This fixes issue 18789. --- std/stdio.d | 63 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/std/stdio.d b/std/stdio.d index 2c7e7def4d4..528dd544273 100644 --- a/std/stdio.d +++ b/std/stdio.d @@ -3058,39 +3058,44 @@ is empty, throws an `Exception`. In case of an I/O error throws } else static if (c.sizeof == 2) { - import std.utf : encode, decode; + import std.utf : decode; - if (orientation_ <= 0) + if (c <= 0x7F) { - if (c <= 0x7F) - { - highSurrogateShouldBeEmpty(); - trustedFPUTC(c, handle_); - } - else if (0xD800 <= c && c <= 0xDBFF) // high surrogate + highSurrogateShouldBeEmpty(); + if (orientation_ <= 0) trustedFPUTC(c, handle_); + else trustedFPUTWC(c, handle_); + } + else if (0xD800 <= c && c <= 0xDBFF) // high surrogate + { + highSurrogateShouldBeEmpty(); + highSurrogate = c; + } + else // standalone or low surrogate + { + dchar d = c; + if (highSurrogate != '\0') { - highSurrogateShouldBeEmpty(); - highSurrogate = c; + immutable wchar[2] rbuf = [highSurrogate, c]; + size_t index = 0; + d = decode(rbuf[], index); + highSurrogate = 0; } - else // standalone or low surrogate + if (orientation_ <= 0) { - dchar d = c; - if (highSurrogate != '\0') - { - immutable wchar[2] rbuf = [highSurrogate, c]; - size_t index = 0; - d = decode(rbuf[], index); - highSurrogate = 0; - } char[4] wbuf; immutable size = encode(wbuf, d); foreach (i; 0 .. size) trustedFPUTC(wbuf[i], handle_); } - } - else - { - trustedFPUTWC(c, handle_); + else + { + wchar_t[4 / wchar_t.sizeof] wbuf; + immutable size = encode(wbuf, d); + foreach (i; 0 .. size) + trustedFPUTWC(wbuf[i], handle_); + } + rbuf8Filled = 0; } } else // 32-bit characters @@ -3674,6 +3679,18 @@ void main() } assert(std.file.readText!wstring(deleteme) == "ö\U0001F608"w); } +@safe unittest // wchar -> wchar_t +{ + static import std.file; + auto deleteme = testFilename(); + scope(exit) std.file.remove(deleteme); + { + auto writer = File(deleteme, "w,ccs=UTF-16LE").lockingTextWriter(); + writer.put("ö"w); + writer.put("\U0001F608"w); + } + assert(std.file.readText!wstring(deleteme) == "ö\U0001F608"w); +} @safe unittest { From b394e321be8b836c5805dae7e39b8184ea59f6cb Mon Sep 17 00:00:00 2001 From: aG0aep6G Date: Fri, 18 Sep 2020 07:22:47 +0200 Subject: [PATCH 3/5] refactor: merge tests --- std/stdio.d | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/std/stdio.d b/std/stdio.d index 528dd544273..7ef3224f706 100644 --- a/std/stdio.d +++ b/std/stdio.d @@ -3667,29 +3667,22 @@ void main() } assert(std.file.readText!string(deleteme).stripLeft("\uFEFF") == "foobar"); } -@safe unittest // char -> wchar_t -{ - static import std.file; - auto deleteme = testFilename(); - scope(exit) std.file.remove(deleteme); - { - auto writer = File(deleteme, "w,ccs=UTF-16LE").lockingTextWriter(); - writer.put("ö"); - writer.put("\U0001F608"); - } - assert(std.file.readText!wstring(deleteme) == "ö\U0001F608"w); -} -@safe unittest // wchar -> wchar_t +@safe unittest // char/wchar -> wchar_t { static import std.file; + auto deleteme = testFilename(); scope(exit) std.file.remove(deleteme); { auto writer = File(deleteme, "w,ccs=UTF-16LE").lockingTextWriter(); + // char -> wchar_t + writer.put("ä"); + writer.put("\U0001F607"); + // wchar -> wchar_t writer.put("ö"w); writer.put("\U0001F608"w); } - assert(std.file.readText!wstring(deleteme) == "ö\U0001F608"w); + assert(std.file.readText!wstring(deleteme) == "ä\U0001F607ö\U0001F608"w); } @safe unittest From 4b416afe1656607ac4f7a1cb704dbc47157d7398 Mon Sep 17 00:00:00 2001 From: aG0aep6G Date: Mon, 21 Sep 2020 13:33:52 +0200 Subject: [PATCH 4/5] ditch the ccs flag; set locale instead --- std/stdio.d | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/std/stdio.d b/std/stdio.d index 7ef3224f706..308dd8291ab 100644 --- a/std/stdio.d +++ b/std/stdio.d @@ -3669,20 +3669,46 @@ void main() } @safe unittest // char/wchar -> wchar_t { + import core.stdc.locale : LC_CTYPE, setlocale; + import core.stdc.wchar_ : fwide; + import std.algorithm.searching : any, endsWith; + import std.conv : text; + import std.meta : AliasSeq; + import std.string : fromStringz, stripLeft; static import std.file; - auto deleteme = testFilename(); scope(exit) std.file.remove(deleteme); + const char* oldCt = () @trusted { + return setlocale(LC_CTYPE, null); + }(); + const utf8 = ["en_US.UTF-8", "C.UTF-8", ".65001"].any!((loc) @trusted { + return setlocale(LC_CTYPE, loc.ptr).fromStringz.endsWith(loc); + }); + scope(exit) () @trusted { setlocale(LC_CTYPE, oldCt); } (); + version (DIGITAL_MARS_STDIO) // DM can't handle Unicode above U+07FF. { - auto writer = File(deleteme, "w,ccs=UTF-16LE").lockingTextWriter(); - // char -> wchar_t - writer.put("ä"); - writer.put("\U0001F607"); - // wchar -> wchar_t - writer.put("ö"w); - writer.put("\U0001F608"w); + alias strs = AliasSeq!("xä\u07FE", "yö\u07FF"w); + } + else + { + alias strs = AliasSeq!("xä\U0001F607", "yö\U0001F608"w); + } + { + auto f = File(deleteme, "w"); + version (MICROSOFT_STDIO) + { + () @trusted { setmode(fileno(f.getFP()), _O_U8TEXT); } (); + } + else + { + assert(fwide(f.getFP(), 1) == 1); + } + auto writer = f.lockingTextWriter(); + assert(writer.orientation_ == 1); + static foreach (s; strs) writer.put(s); } - assert(std.file.readText!wstring(deleteme) == "ä\U0001F607ö\U0001F608"w); + assert(std.file.readText!string(deleteme).stripLeft("\uFEFF") == + text(strs)); } @safe unittest From e7e75cf2343bee5bae24622ef8035cde159b6c52 Mon Sep 17 00:00:00 2001 From: aG0aep6G Date: Mon, 21 Sep 2020 21:12:10 +0200 Subject: [PATCH 5/5] add the original test case of issue 18789 ... to ensure that it doesn't throw any exceptions like it used to. --- std/stdio.d | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/std/stdio.d b/std/stdio.d index 308dd8291ab..dca3e1a8bab 100644 --- a/std/stdio.d +++ b/std/stdio.d @@ -3710,6 +3710,26 @@ void main() assert(std.file.readText!string(deleteme).stripLeft("\uFEFF") == text(strs)); } +@safe unittest // https://issues.dlang.org/show_bug.cgi?id=18789 +{ + static import std.file; + auto deleteme = testFilename(); + scope(exit) std.file.remove(deleteme); + // converting to char + { + auto f = File(deleteme, "w"); + f.writeln("\U0001F608"w); // UTFException + } + // converting to wchar_t + { + auto f = File(deleteme, "w,ccs=UTF-16LE"); + // from char + f.writeln("ö"); // writes garbage + f.writeln("\U0001F608"); // ditto + // from wchar + f.writeln("\U0001F608"w); // leads to ErrnoException + } +} @safe unittest {