Skip to content
This repository
Browse code

folly: Range: implement find_first_of and optimize qfind(Range, char)

Summary:
implement ##find_first_of## and optimize ##Range.find(char)##

============================================================================
folly/test/RangeBenchmark.cpp                   relative  time/iter  iters/s
============================================================================
LongFindSingleCharDirect                                     2.76ms   362.63
LongFindSingleCharRange                           15.88%    17.37ms    57.58
ShortFindSingleCharDirect                                   53.41fs   18.72T
ShortFindSingleCharRange                           0.00%    29.22ns   34.22M
============================================================================

Test Plan:
- added new tests

- ran all folly tests

fbconfig -r folly/ && mkk runtests_opt

Reviewed By: tudorb@fb.com

FB internal diff: D576720
  • Loading branch information...
commit 0010b30e696ac1e68bb3eb7992c328fd3214437c 1 parent 4e74526
Lucian Adrian Grijincu authored September 18, 2012 jdelong committed October 11, 2012
97  folly/Range.h
@@ -22,6 +22,8 @@
22 22
 
23 23
 #include "folly/FBString.h"
24 24
 #include <glog/logging.h>
  25
+#include <algorithm>
  26
+#include <cstring>
25 27
 #include <iostream>
26 28
 #include <string>
27 29
 #include <stdexcept>
@@ -37,24 +39,33 @@ namespace folly {
37 39
 template <class T> class Range;
38 40
 
39 41
 /**
40  
-Finds the first occurrence of needle in haystack. The algorithm is on
41  
-average faster than O(haystack.size() * needle.size()) but not as fast
42  
-as Boyer-Moore. On the upside, it does not do any upfront
43  
-preprocessing and does not allocate memory.
  42
+ * Finds the first occurrence of needle in haystack. The algorithm is on
  43
+ * average faster than O(haystack.size() * needle.size()) but not as fast
  44
+ * as Boyer-Moore. On the upside, it does not do any upfront
  45
+ * preprocessing and does not allocate memory.
44 46
  */
45 47
 template <class T>
46 48
 inline size_t qfind(const Range<T> & haystack,
47 49
                     const Range<T> & needle);
48 50
 
49 51
 /**
50  
-Finds the first occurrence of needle in haystack. The result is the
51  
-offset reported to the beginning of haystack, or string::npos if
52  
-needle wasn't found.
  52
+ * Finds the first occurrence of needle in haystack. The result is the
  53
+ * offset reported to the beginning of haystack, or string::npos if
  54
+ * needle wasn't found.
53 55
  */
54 56
 template <class T>
55 57
 size_t qfind(const Range<T> & haystack,
56 58
              const typename Range<T>::value_type& needle);
57 59
 
  60
+
  61
+/**
  62
+ * Finds the first occurrence of any element of needle in
  63
+ * haystack. The algorithm is O(haystack.size() * needle.size()).
  64
+ */
  65
+template <class T>
  66
+inline size_t qfind_first_of(const Range<T> & haystack,
  67
+                             const Range<T> & needle);
  68
+
58 69
 /**
59 70
  * Small internal helper - returns the value just before an iterator.
60 71
  */
@@ -109,7 +120,7 @@ class Range : private boost::totally_ordered<Range<Iter> > {
109 120
   typedef typename std::iterator_traits<Iter>::reference reference;
110 121
   typedef std::char_traits<value_type> traits_type;
111 122
 
112  
-  static const size_type npos = -1;
  123
+  static const size_type npos = std::string::npos;
113 124
 
114 125
   // Works for all iterators
115 126
   Range() : b_(), e_() {
@@ -348,10 +359,12 @@ class Range : private boost::totally_ordered<Range<Iter> > {
348 359
     return ret == npos ? ret : ret + pos;
349 360
   }
350 361
 
  362
+  // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
351 363
   size_type find(const Iter s) const {
352 364
     return qfind(*this, Range(s));
353 365
   }
354 366
 
  367
+  // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
355 368
   size_type find(const Iter s, size_t pos) const {
356 369
     if (pos > size()) return std::string::npos;
357 370
     size_type ret = qfind(subpiece(pos), Range(s));
@@ -368,6 +381,38 @@ class Range : private boost::totally_ordered<Range<Iter> > {
368 381
     return ret == npos ? ret : ret + pos;
369 382
   }
370 383
 
  384
+  size_type find_first_of(Range needles) const {
  385
+    return qfind_first_of(*this, needles);
  386
+  }
  387
+
  388
+  size_type find_first_of(Range needles, size_t pos) const {
  389
+    if (pos > size()) return std::string::npos;
  390
+    size_type ret = qfind_first_of(pos ? subpiece(pos) : *this, needles);
  391
+    return ret == npos ? ret : ret + pos;
  392
+  }
  393
+
  394
+  // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
  395
+  size_type find_first_of(Iter needles) const {
  396
+    return find_first_of(Range(needles));
  397
+  }
  398
+
  399
+  // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
  400
+  size_type find_first_of(Iter needles, size_t pos) const {
  401
+    return find_first_of(Range(needles), pos);
  402
+  }
  403
+
  404
+  size_type find_first_of(Iter needles, size_t pos, size_t n) const {
  405
+    return find_first_of(Range(needles, n), pos);
  406
+  }
  407
+
  408
+  size_type find_first_of(value_type c) const {
  409
+    return find(c);
  410
+  }
  411
+
  412
+  size_type find_first_of(value_type c, size_t pos) const {
  413
+    return find(c, pos);
  414
+  }
  415
+
371 416
   void swap(Range& rhs) {
372 417
     std::swap(b_, rhs.b_);
373 418
     std::swap(e_, rhs.e_);
@@ -547,6 +592,16 @@ size_t qfind(const Range<T>& haystack,
547 592
   return std::string::npos;
548 593
 }
549 594
 
  595
+template <class T, class Comp>
  596
+size_t qfind_first_of(const Range<T> & haystack,
  597
+                      const Range<T> & needle,
  598
+                      Comp eq) {
  599
+  auto ret = std::find_first_of(haystack.begin(), haystack.end(),
  600
+                                needle.begin(), needle.end(),
  601
+                                eq);
  602
+  return ret == haystack.end() ? std::string::npos : ret - haystack.begin();
  603
+}
  604
+
550 605
 struct AsciiCaseSensitive {
551 606
   bool operator()(char lhs, char rhs) const {
552 607
     return lhs == rhs;
@@ -571,7 +626,31 @@ size_t qfind(const Range<T>& haystack,
571 626
 template <class T>
572 627
 size_t qfind(const Range<T>& haystack,
573 628
              const typename Range<T>::value_type& needle) {
574  
-  return qfind(haystack, makeRange(&needle, &needle + 1));
  629
+  auto pos = std::find(haystack.begin(), haystack.end(), needle);
  630
+  return pos == haystack.end() ? std::string::npos : pos - haystack.data();
  631
+}
  632
+
  633
+// specialization for StringPiece
  634
+template <>
  635
+inline size_t qfind(const Range<const char*>& haystack, const char& needle) {
  636
+  auto pos = static_cast<const char*>(
  637
+    ::memchr(haystack.data(), needle, haystack.size()));
  638
+  return pos == nullptr ? std::string::npos : pos - haystack.data();
  639
+}
  640
+
  641
+// specialization for ByteRange
  642
+template <>
  643
+inline size_t qfind(const Range<const unsigned char*>& haystack,
  644
+                    const unsigned char& needle) {
  645
+  auto pos = static_cast<const unsigned char*>(
  646
+    ::memchr(haystack.data(), needle, haystack.size()));
  647
+  return pos == nullptr ? std::string::npos : pos - haystack.data();
  648
+}
  649
+
  650
+template <class T>
  651
+size_t qfind_first_of(const Range<T>& haystack,
  652
+                      const Range<T>& needle) {
  653
+  return qfind_first_of(haystack, needle, asciiCaseSensitive);
575 654
 }
576 655
 
577 656
 }  // !namespace folly
69  folly/test/RangeFindBenchmark.cpp
... ...
@@ -0,0 +1,69 @@
  1
+/*
  2
+ * Copyright 2012 Facebook, Inc.
  3
+ *
  4
+ * Licensed under the Apache License, Version 2.0 (the "License");
  5
+ * you may not use this file except in compliance with the License.
  6
+ * You may obtain a copy of the License at
  7
+ *
  8
+ *   http://www.apache.org/licenses/LICENSE-2.0
  9
+ *
  10
+ * Unless required by applicable law or agreed to in writing, software
  11
+ * distributed under the License is distributed on an "AS IS" BASIS,
  12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13
+ * See the License for the specific language governing permissions and
  14
+ * limitations under the License.
  15
+ */
  16
+
  17
+#include "folly/Range.h"
  18
+#include "folly/Benchmark.h"
  19
+#include "folly/Foreach.h"
  20
+#include <algorithm>
  21
+#include <iostream>
  22
+#include <string>
  23
+
  24
+using namespace folly;
  25
+using namespace std;
  26
+
  27
+namespace {
  28
+
  29
+std::string str;
  30
+
  31
+void initStr(int len) {
  32
+  cout << "string length " << len << ':' << endl;
  33
+  str.clear();
  34
+  str.reserve(len + 1);
  35
+  str.append(len, 'a');
  36
+  str.append(1, 'b');
  37
+}
  38
+
  39
+}  // anonymous namespace
  40
+
  41
+BENCHMARK(FindSingleCharMemchr, n) {
  42
+  StringPiece haystack(str);
  43
+  FOR_EACH_RANGE (i, 0, n) {
  44
+    doNotOptimizeAway(haystack.find('b'));
  45
+    char x = haystack[0];
  46
+    doNotOptimizeAway(&x);
  47
+  }
  48
+}
  49
+
  50
+BENCHMARK_RELATIVE(FindSingleCharRange, n) {
  51
+  char c = 'b';
  52
+  StringPiece haystack(str);
  53
+  folly::StringPiece needle(&c, &c + 1);
  54
+  FOR_EACH_RANGE (i, 0, n) {
  55
+    doNotOptimizeAway(haystack.find(needle));
  56
+    char x = haystack[0];
  57
+    doNotOptimizeAway(&x);
  58
+  }
  59
+}
  60
+
  61
+int main(int argc, char** argv) {
  62
+  google::ParseCommandLineFlags(&argc, &argv, true);
  63
+
  64
+  for (int len : {10, 256, 10*1024, 10*1024*1024}) {
  65
+    initStr(len);
  66
+    runBenchmarks();
  67
+  }
  68
+  return 0;
  69
+}
53  folly/test/RangeTest.cpp
@@ -86,6 +86,59 @@ TEST(StringPiece, All) {
86 86
   EXPECT_EQ(s.toString().find("notfound", 55), StringPiece::npos);
87 87
   EXPECT_EQ(s.find("z", s.size()), StringPiece::npos);
88 88
   EXPECT_EQ(s.find("z", 55), StringPiece::npos);
  89
+  // empty needle
  90
+  EXPECT_EQ(s.find(""), std::string().find(""));
  91
+  EXPECT_EQ(s.find(""), 0);
  92
+
  93
+  // single char finds
  94
+  EXPECT_EQ(s.find('b'), 3);
  95
+  EXPECT_EQ(s.find('b', 3), 3);
  96
+  EXPECT_EQ(s.find('b', 4), 6);
  97
+  EXPECT_EQ(s.find('o', 2), 2);
  98
+  EXPECT_EQ(s.find('y'), StringPiece::npos);
  99
+  EXPECT_EQ(s.find('y', 1), StringPiece::npos);
  100
+  EXPECT_EQ(s.find('o', 4), StringPiece::npos);  // starting position too far
  101
+  // starting pos that is obviously past the end -- This works for std::string
  102
+  EXPECT_EQ(s.toString().find('y', 55), StringPiece::npos);
  103
+  EXPECT_EQ(s.find('z', s.size()), StringPiece::npos);
  104
+  EXPECT_EQ(s.find('z', 55), StringPiece::npos);
  105
+  // null char
  106
+  EXPECT_EQ(s.find('\0'), std::string().find('\0'));
  107
+  EXPECT_EQ(s.find('\0'), StringPiece::npos);
  108
+
  109
+  // find_first_of
  110
+  s.reset(foobarbaz, strlen(foobarbaz));
  111
+  EXPECT_EQ(s.find_first_of("bar"), 3);
  112
+  EXPECT_EQ(s.find_first_of("ba", 3), 3);
  113
+  EXPECT_EQ(s.find_first_of("ba", 4), 4);
  114
+  EXPECT_EQ(s.find_first_of("xyxy"), StringPiece::npos);
  115
+  EXPECT_EQ(s.find_first_of("xyxy", 1), StringPiece::npos);
  116
+  // starting position too far
  117
+  EXPECT_EQ(s.find_first_of("foo", 4), StringPiece::npos);
  118
+  // starting pos that is obviously past the end -- This works for std::string
  119
+  EXPECT_EQ(s.toString().find_first_of("xyxy", 55), StringPiece::npos);
  120
+  EXPECT_EQ(s.find_first_of("z", s.size()), StringPiece::npos);
  121
+  EXPECT_EQ(s.find_first_of("z", 55), StringPiece::npos);
  122
+  // empty needle. Note that this returns npos, while find() returns 0!
  123
+  EXPECT_EQ(s.find_first_of(""), std::string().find_first_of(""));
  124
+  EXPECT_EQ(s.find_first_of(""), StringPiece::npos);
  125
+
  126
+  // single char find_first_ofs
  127
+  EXPECT_EQ(s.find_first_of('b'), 3);
  128
+  EXPECT_EQ(s.find_first_of('b', 3), 3);
  129
+  EXPECT_EQ(s.find_first_of('b', 4), 6);
  130
+  EXPECT_EQ(s.find_first_of('o', 2), 2);
  131
+  EXPECT_EQ(s.find_first_of('y'), StringPiece::npos);
  132
+  EXPECT_EQ(s.find_first_of('y', 1), StringPiece::npos);
  133
+  // starting position too far
  134
+  EXPECT_EQ(s.find_first_of('o', 4), StringPiece::npos);
  135
+  // starting pos that is obviously past the end -- This works for std::string
  136
+  EXPECT_EQ(s.toString().find_first_of('y', 55), StringPiece::npos);
  137
+  EXPECT_EQ(s.find_first_of('z', s.size()), StringPiece::npos);
  138
+  EXPECT_EQ(s.find_first_of('z', 55), StringPiece::npos);
  139
+  // null char
  140
+  EXPECT_EQ(s.find_first_of('\0'), std::string().find_first_of('\0'));
  141
+  EXPECT_EQ(s.find_first_of('\0'), StringPiece::npos);
89 142
 
90 143
   // just "barbaz"
91 144
   s.reset(foobarbaz + 3, strlen(foobarbaz + 3));

0 notes on commit 0010b30

Please sign in to comment.
Something went wrong with that request. Please try again.