-
-
Notifications
You must be signed in to change notification settings - Fork 706
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve std.range.put for strings.
#1000
Conversation
|
I'm getting some bad behavior in regards to unicode. I re-wrote your unittest with: And it's failing when Not entirely sure if it is the unittest, the |
|
Yeah... If R accepts I assert here:
And the contents of The problem (I think) is that this short circuits the later test of: I have to go, so I can't investigate more now. |
|
Calling the code: wstring text = "some text";
text.replace(`/`, `\`);Results in a compile error with the new code, but works with the old. |
|
Well, I just wanted to help validating this pull request, but I ended up coding all day :) I was able to write an improved version of put that:
Another improvement is that it reorders the static ifs. This way, a char is encoded into the stream assap. Wat it used to do, given a char 'a': cast to ['a'], which is seen as a input range, decoded into a 'a'd, and then encoded into into a Finally, I declared the Here is the code: void put(R, E)(ref R r, E e)
{
static if(is(PointerTarget!R == struct))
enum usingPut = hasMember!(PointerTarget!R, "put");
else
enum usingPut = hasMember!(R, "put");
enum usingFront = !usingPut && isInputRange!R;
@property ref E[] EArrayInit(); //@@@9186: Can't use (E[]).init
//doPut: Convenience to have the same code for delegates and put
void doPut(T)(ref R r, auto ref T t)
{
static if (usingPut)
r.put(t);
else
r(t);
}
static if (usingFront)
{
static if (is(typeof(r.front = e, r.popFront())))
{
r.front = e;
r.popFront();
}
else static if (isInputRange!E && is(typeof(put(r, e.front))))
{
for (; !e.empty; e.popFront()) put(r, e.front);
}
else
static assert(false, "Cannot put a "~E.stringof~" into a "~R.stringof);
}
else
{
//normal object
static if (is(typeof(doPut(r, e))))
{
doPut(r, e);
}
//special case for char. !important: do this before trying S[]
else static if (isSomeChar!E && is(typeof(putChar!usingPut(r, e))))
{
putChar!usingPut(r, e);
}
//Standard range operations
else static if (is(typeof(doPut(r, EArrayInit))))
{
doPut(r, (&e)[0..1]);
}
//Decode the string, and cram the chars. !important: do this before isInputRange!E
else static if (isSomeString!E && is(typeof(putChar!usingPut(r, dchar.max))))
{
foreach(dchar c; e)
putChar!usingPut(r, c);
}
//Extract each element from the range
else static if (isInputRange!E && is(typeof(put(r, e.front))))
{
for (; !e.empty; e.popFront()) put(r, e.front);
}
else
static assert(false, "Cannot put a "~E.stringof~" into a "~R.stringof);
}
}
//Helper function to handle chars as quickly and as elegantly as possible
//Assumes r.put(e)/r(e) has already been tested
private void putChar(bool usingPut, R, E)(ref R r, E e)
if (isSomeChar!E)
{
//convenience aliases to keep the static ifs under control
@property ref const(char)[] stringInit();
@property ref const(wchar)[] wstringInit();
@property ref const(dchar)[] dstringInit();
//doPut: Convenience to have the same code for delegates and put
void doPut(T)(ref R r, auto ref T t)
{
static if (usingPut)
r.put(t);
else
r(t);
}
//Quick encode into wider wstring
static if (is(typeof(doPut(r, wstringInit))) && (E.sizeof <= wchar.sizeof))
{
wchar w = cast(wchar)e;
doPut(r, (&w)[0..1]);
}
//Quick encode into the wider dstring
else static if (is(typeof(doPut(r, dstringInit))) && (E.sizeof <= dchar.sizeof))
{
dchar d = cast(dchar)e;
doPut(r, (&d)[0..1]);
}
//Encode into the narrower wstring type
else static if (is(typeof(doPut(r, wstringInit))) || is(typeof(doPut(r, wchar.max))))
{
import std.utf : encode;
static wchar[2] buf;
static if (is(typeof(doPut(r, wstringInit))))
doPut(r, buf[0 .. encode(buf, e)]);
else
foreach (c; buf[0 .. encode(buf, e)])
doPut(r, c);
}
//Encode into the narrower string type
else static if (is(typeof(doPut(r, stringInit))) || is(typeof(doPut(r, char.max))))
{
import std.utf : encode;
static char[4] buf;
static if (is(typeof(doPut(r, stringInit))))
doPut(r, buf[0 .. encode(buf, e)]);
else
foreach (c; buf[0 .. encode(buf, e)])
doPut(r, c);
}
else
static assert(false, "Cannot put a "~E.stringof~" into a "~R.stringof);
}Yeah, do with this code as you wish. I can open my own pull request if you feel that would be more appropriate. |
|
Oh yeah: This is the unittest I used. It is a variation of yours, but tests ALL cross flavors of The only thing missing (AFAIK), would be using characters that encode as surrogate pairs in utf-16, but I don't think that's a big problem. unittest
{
static struct PutC(C)
{
string result;
void put(const(C) c) { result ~= to!string((&c)[0..1]); }
}
static struct PutS(C)
{
string result;
void put(const(C)[] s) { result ~= to!string(s); }
}
static struct PutSS(C)
{
string result;
void put(const(C)[][] ss)
{
foreach(s; ss)
result ~= to!string(s);
}
}
//Source Char
foreach (SC; TypeTuple!(char, wchar, dchar))
{
//writeln(SC.stringof);
SC ch = 'I';
dchar dh = '♥';
immutable(SC)[] s = "日本語!";
immutable(SC)[][] ss = ["日本語", "が", "好き", "ですか", "?"];
//Target Char
foreach (TC; TypeTuple!(char, wchar, dchar))
{
//writeln(TC.stringof);
//Testing PutC and PutS
foreach (Type; TypeTuple!(PutC!TC, PutS!TC))
{
//writeln(Type.stringof);
Type type;
auto sink = new Type();
//Testing put and sink
foreach (value ; tuple(type, sink))
{
put(value, ch);
assert(value.result == "I");
put(value, dh);
assert(value.result == "I♥");
put(value, s);
assert(value.result == "I♥日本語!");
put(value, ss);
assert(value.result == "I♥日本語!日本語が好きですか?");
}
}
}
}
} |
|
Assigning to @andralex. |
May I kindly ask a detail about why this pull was closed? I was planning to open my own fork with the code I submitted. Did you see anything wrong with my code, or this pull, that I should now about? |
|
Because currently I'm not enough maintain this pull request. |
Sometimes we define a sink function which receives
const(char)[], and expect that would work as an output range for all string/character values, but this is not true sostd.range.puthas no special treatments for string types.I think it is inconsistent against the special treatment in
std.array.front/popFrontfor narrow strings.Changes:
r(e.front)When E is an input range,
foreach (; !e.empty; e.popFront()) r.put(e.front);is supported in current. So we should also support that for the functor output range.When E is narrow string type, member put and functor should be able to receive each narrow characters
e[i]directly.When E is dchar type, std.range.put should be able to encode it to one or more narrow characters, and pass to the given output range.
After these improvements,
std.formatwill be improved at the same time against functor output ranges. And then, a workaround which added in #997 can be removed.