Skip to content

Commit

Permalink
Improve speed of find for random access needles (strings)
Browse files Browse the repository at this point in the history
For find a string within a string, std.algorithm.searching.find was
unnecessarily slow. The reason is it created intermediate slices. A
naively written nested-for-loop implementation was a few times
faster.

For random access ranges (which strings are) this uses an index based
algorithm, which does not need to create an intermediate slice. Speed
is now comparable to the nested-for-loop implementation even in rather
pathological cases.

This might help with issue 9646.
  • Loading branch information
Andreas Zwinkau authored and qznc committed Jun 2, 2016
1 parent e216c10 commit a9d5b8c
Showing 1 changed file with 67 additions and 26 deletions.
93 changes: 67 additions & 26 deletions std/algorithm/searching.d
Original file line number Diff line number Diff line change
Expand Up @@ -1773,44 +1773,85 @@ if (isRandomAccessRange!R1 && hasLength!R1 && hasSlicing!R1 && isBidirectionalRa
&& is(typeof(binaryFun!pred(haystack.front, needle.front)) : bool))
{
if (needle.empty) return haystack;
const needleLength = walkLength(needle.save);
if (needleLength > haystack.length)
static if (hasLength!R2)
{
return haystack[haystack.length .. haystack.length];
immutable needleLength = needle.length;
}
// @@@BUG@@@
// auto needleBack = moveBack(needle);
// Stage 1: find the step
size_t step = 1;
auto needleBack = needle.back;
needle.popBack();
for (auto i = needle.save; !i.empty && i.back != needleBack;
i.popBack(), ++step)
else
{
immutable needleLength = walkLength(needle.save);
}
if (needleLength > haystack.length)
{
return haystack[haystack.length .. haystack.length];
}
// Stage 2: linear find
size_t scout = needleLength - 1;
for (;;)
static if (isRandomAccessRange!R2)
{
if (scout >= haystack.length)
immutable lastIndex = needleLength - 1;
auto last = needle[lastIndex];
size_t j = lastIndex, skip = 0;
for (; j < haystack.length;)
{
return haystack[haystack.length .. haystack.length];
if (!binaryFun!pred(haystack[j], last))
{
++j;
continue;
}
immutable k = j - lastIndex;
// last elements match
for (size_t i = 0;; ++i)
{
if (i == lastIndex)
return haystack[k .. haystack.length];
if (!binaryFun!pred(haystack[k + i], needle[i]))
break;
}
if (skip == 0) {
skip = 1;
while (skip < needleLength && needle[needleLength - 1 - skip] != needle[needleLength - 1])
{
++skip;
}
}
j += skip;
}
if (!binaryFun!pred(haystack[scout], needleBack))
}
else
{
// @@@BUG@@@
// auto needleBack = moveBack(needle);
// Stage 1: find the step
size_t step = 1;
auto needleBack = needle.back;
needle.popBack();
for (auto i = needle.save; !i.empty && i.back != needleBack;
i.popBack(), ++step)
{
++scout;
continue;
}
// Found a match with the last element in the needle
auto cand = haystack[scout + 1 - needleLength .. haystack.length];
if (startsWith!pred(cand, needle))
// Stage 2: linear find
size_t scout = needleLength - 1;
for (;;)
{
// found
return cand;
if (scout >= haystack.length)
break;
if (!binaryFun!pred(haystack[scout], needleBack))
{
++scout;
continue;
}
// Found a match with the last element in the needle
auto cand = haystack[scout + 1 - needleLength .. haystack.length];
// This intermediate creation of a slice is why the
// random access variant above is faster.
if (startsWith!pred(cand, needle))
{
// found
return cand;
}
scout += step;
}
// Continue with the stride
scout += step;
}
return haystack[haystack.length .. haystack.length];
}

@safe unittest
Expand Down

0 comments on commit a9d5b8c

Please sign in to comment.