Permalink
Browse files

folly: Range: implement find_first_of and optimize qfind(Range, char)

Summary:
implement ##find_first_of## and optimize ##Range.find(char)##

============================================================================
folly/test/RangeBenchmark.cpp                   relative  time/iter  iters/s
============================================================================
LongFindSingleCharDirect                                     2.76ms   362.63
LongFindSingleCharRange                           15.88%    17.37ms    57.58
ShortFindSingleCharDirect                                   53.41fs   18.72T
ShortFindSingleCharRange                           0.00%    29.22ns   34.22M
============================================================================

Test Plan:
- added new tests

- ran all folly tests

fbconfig -r folly/ && mkk runtests_opt

Reviewed By: tudorb@fb.com

FB internal diff: D576720
  • Loading branch information...
1 parent 4e74526 commit 0010b30e696ac1e68bb3eb7992c328fd3214437c @luciang luciang committed with jdelong Sep 18, 2012
Showing with 210 additions and 9 deletions.
  1. +88 −9 folly/Range.h
  2. +69 −0 folly/test/RangeFindBenchmark.cpp
  3. +53 −0 folly/test/RangeTest.cpp
View
97 folly/Range.h
@@ -22,6 +22,8 @@
#include "folly/FBString.h"
#include <glog/logging.h>
+#include <algorithm>
+#include <cstring>
#include <iostream>
#include <string>
#include <stdexcept>
@@ -37,24 +39,33 @@ namespace folly {
template <class T> class Range;
/**
-Finds the first occurrence of needle in haystack. The algorithm is on
-average faster than O(haystack.size() * needle.size()) but not as fast
-as Boyer-Moore. On the upside, it does not do any upfront
-preprocessing and does not allocate memory.
+ * Finds the first occurrence of needle in haystack. The algorithm is on
+ * average faster than O(haystack.size() * needle.size()) but not as fast
+ * as Boyer-Moore. On the upside, it does not do any upfront
+ * preprocessing and does not allocate memory.
*/
template <class T>
inline size_t qfind(const Range<T> & haystack,
const Range<T> & needle);
/**
-Finds the first occurrence of needle in haystack. The result is the
-offset reported to the beginning of haystack, or string::npos if
-needle wasn't found.
+ * Finds the first occurrence of needle in haystack. The result is the
+ * offset reported to the beginning of haystack, or string::npos if
+ * needle wasn't found.
*/
template <class T>
size_t qfind(const Range<T> & haystack,
const typename Range<T>::value_type& needle);
+
+/**
+ * Finds the first occurrence of any element of needle in
+ * haystack. The algorithm is O(haystack.size() * needle.size()).
+ */
+template <class T>
+inline size_t qfind_first_of(const Range<T> & haystack,
+ const Range<T> & needle);
+
/**
* Small internal helper - returns the value just before an iterator.
*/
@@ -109,7 +120,7 @@ class Range : private boost::totally_ordered<Range<Iter> > {
typedef typename std::iterator_traits<Iter>::reference reference;
typedef std::char_traits<value_type> traits_type;
- static const size_type npos = -1;
+ static const size_type npos = std::string::npos;
// Works for all iterators
Range() : b_(), e_() {
@@ -348,10 +359,12 @@ class Range : private boost::totally_ordered<Range<Iter> > {
return ret == npos ? ret : ret + pos;
}
+ // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
size_type find(const Iter s) const {
return qfind(*this, Range(s));
}
+ // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
size_type find(const Iter s, size_t pos) const {
if (pos > size()) return std::string::npos;
size_type ret = qfind(subpiece(pos), Range(s));
@@ -368,6 +381,38 @@ class Range : private boost::totally_ordered<Range<Iter> > {
return ret == npos ? ret : ret + pos;
}
+ size_type find_first_of(Range needles) const {
+ return qfind_first_of(*this, needles);
+ }
+
+ size_type find_first_of(Range needles, size_t pos) const {
+ if (pos > size()) return std::string::npos;
+ size_type ret = qfind_first_of(pos ? subpiece(pos) : *this, needles);
+ return ret == npos ? ret : ret + pos;
+ }
+
+ // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
+ size_type find_first_of(Iter needles) const {
+ return find_first_of(Range(needles));
+ }
+
+ // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
+ size_type find_first_of(Iter needles, size_t pos) const {
+ return find_first_of(Range(needles), pos);
+ }
+
+ size_type find_first_of(Iter needles, size_t pos, size_t n) const {
+ return find_first_of(Range(needles, n), pos);
+ }
+
+ size_type find_first_of(value_type c) const {
+ return find(c);
+ }
+
+ size_type find_first_of(value_type c, size_t pos) const {
+ return find(c, pos);
+ }
+
void swap(Range& rhs) {
std::swap(b_, rhs.b_);
std::swap(e_, rhs.e_);
@@ -547,6 +592,16 @@ size_t qfind(const Range<T>& haystack,
return std::string::npos;
}
+template <class T, class Comp>
+size_t qfind_first_of(const Range<T> & haystack,
+ const Range<T> & needle,
+ Comp eq) {
+ auto ret = std::find_first_of(haystack.begin(), haystack.end(),
+ needle.begin(), needle.end(),
+ eq);
+ return ret == haystack.end() ? std::string::npos : ret - haystack.begin();
+}
+
struct AsciiCaseSensitive {
bool operator()(char lhs, char rhs) const {
return lhs == rhs;
@@ -571,7 +626,31 @@ size_t qfind(const Range<T>& haystack,
template <class T>
size_t qfind(const Range<T>& haystack,
const typename Range<T>::value_type& needle) {
- return qfind(haystack, makeRange(&needle, &needle + 1));
+ auto pos = std::find(haystack.begin(), haystack.end(), needle);
+ return pos == haystack.end() ? std::string::npos : pos - haystack.data();
+}
+
+// specialization for StringPiece
+template <>
+inline size_t qfind(const Range<const char*>& haystack, const char& needle) {
+ auto pos = static_cast<const char*>(
+ ::memchr(haystack.data(), needle, haystack.size()));
+ return pos == nullptr ? std::string::npos : pos - haystack.data();
+}
+
+// specialization for ByteRange
+template <>
+inline size_t qfind(const Range<const unsigned char*>& haystack,
+ const unsigned char& needle) {
+ auto pos = static_cast<const unsigned char*>(
+ ::memchr(haystack.data(), needle, haystack.size()));
+ return pos == nullptr ? std::string::npos : pos - haystack.data();
+}
+
+template <class T>
+size_t qfind_first_of(const Range<T>& haystack,
+ const Range<T>& needle) {
+ return qfind_first_of(haystack, needle, asciiCaseSensitive);
}
} // !namespace folly
View
69 folly/test/RangeFindBenchmark.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Range.h"
+#include "folly/Benchmark.h"
+#include "folly/Foreach.h"
+#include <algorithm>
+#include <iostream>
+#include <string>
+
+using namespace folly;
+using namespace std;
+
+namespace {
+
+std::string str;
+
+void initStr(int len) {
+ cout << "string length " << len << ':' << endl;
+ str.clear();
+ str.reserve(len + 1);
+ str.append(len, 'a');
+ str.append(1, 'b');
+}
+
+} // anonymous namespace
+
+BENCHMARK(FindSingleCharMemchr, n) {
+ StringPiece haystack(str);
+ FOR_EACH_RANGE (i, 0, n) {
+ doNotOptimizeAway(haystack.find('b'));
+ char x = haystack[0];
+ doNotOptimizeAway(&x);
+ }
+}
+
+BENCHMARK_RELATIVE(FindSingleCharRange, n) {
+ char c = 'b';
+ StringPiece haystack(str);
+ folly::StringPiece needle(&c, &c + 1);
+ FOR_EACH_RANGE (i, 0, n) {
+ doNotOptimizeAway(haystack.find(needle));
+ char x = haystack[0];
+ doNotOptimizeAway(&x);
+ }
+}
+
+int main(int argc, char** argv) {
+ google::ParseCommandLineFlags(&argc, &argv, true);
+
+ for (int len : {10, 256, 10*1024, 10*1024*1024}) {
+ initStr(len);
+ runBenchmarks();
+ }
+ return 0;
+}
View
53 folly/test/RangeTest.cpp
@@ -86,6 +86,59 @@ TEST(StringPiece, All) {
EXPECT_EQ(s.toString().find("notfound", 55), StringPiece::npos);
EXPECT_EQ(s.find("z", s.size()), StringPiece::npos);
EXPECT_EQ(s.find("z", 55), StringPiece::npos);
+ // empty needle
+ EXPECT_EQ(s.find(""), std::string().find(""));
+ EXPECT_EQ(s.find(""), 0);
+
+ // single char finds
+ EXPECT_EQ(s.find('b'), 3);
+ EXPECT_EQ(s.find('b', 3), 3);
+ EXPECT_EQ(s.find('b', 4), 6);
+ EXPECT_EQ(s.find('o', 2), 2);
+ EXPECT_EQ(s.find('y'), StringPiece::npos);
+ EXPECT_EQ(s.find('y', 1), StringPiece::npos);
+ EXPECT_EQ(s.find('o', 4), StringPiece::npos); // starting position too far
+ // starting pos that is obviously past the end -- This works for std::string
+ EXPECT_EQ(s.toString().find('y', 55), StringPiece::npos);
+ EXPECT_EQ(s.find('z', s.size()), StringPiece::npos);
+ EXPECT_EQ(s.find('z', 55), StringPiece::npos);
+ // null char
+ EXPECT_EQ(s.find('\0'), std::string().find('\0'));
+ EXPECT_EQ(s.find('\0'), StringPiece::npos);
+
+ // find_first_of
+ s.reset(foobarbaz, strlen(foobarbaz));
+ EXPECT_EQ(s.find_first_of("bar"), 3);
+ EXPECT_EQ(s.find_first_of("ba", 3), 3);
+ EXPECT_EQ(s.find_first_of("ba", 4), 4);
+ EXPECT_EQ(s.find_first_of("xyxy"), StringPiece::npos);
+ EXPECT_EQ(s.find_first_of("xyxy", 1), StringPiece::npos);
+ // starting position too far
+ EXPECT_EQ(s.find_first_of("foo", 4), StringPiece::npos);
+ // starting pos that is obviously past the end -- This works for std::string
+ EXPECT_EQ(s.toString().find_first_of("xyxy", 55), StringPiece::npos);
+ EXPECT_EQ(s.find_first_of("z", s.size()), StringPiece::npos);
+ EXPECT_EQ(s.find_first_of("z", 55), StringPiece::npos);
+ // empty needle. Note that this returns npos, while find() returns 0!
+ EXPECT_EQ(s.find_first_of(""), std::string().find_first_of(""));
+ EXPECT_EQ(s.find_first_of(""), StringPiece::npos);
+
+ // single char find_first_ofs
+ EXPECT_EQ(s.find_first_of('b'), 3);
+ EXPECT_EQ(s.find_first_of('b', 3), 3);
+ EXPECT_EQ(s.find_first_of('b', 4), 6);
+ EXPECT_EQ(s.find_first_of('o', 2), 2);
+ EXPECT_EQ(s.find_first_of('y'), StringPiece::npos);
+ EXPECT_EQ(s.find_first_of('y', 1), StringPiece::npos);
+ // starting position too far
+ EXPECT_EQ(s.find_first_of('o', 4), StringPiece::npos);
+ // starting pos that is obviously past the end -- This works for std::string
+ EXPECT_EQ(s.toString().find_first_of('y', 55), StringPiece::npos);
+ EXPECT_EQ(s.find_first_of('z', s.size()), StringPiece::npos);
+ EXPECT_EQ(s.find_first_of('z', 55), StringPiece::npos);
+ // null char
+ EXPECT_EQ(s.find_first_of('\0'), std::string().find_first_of('\0'));
+ EXPECT_EQ(s.find_first_of('\0'), StringPiece::npos);
// just "barbaz"
s.reset(foobarbaz + 3, strlen(foobarbaz + 3));

0 comments on commit 0010b30

Please sign in to comment.