Skip to content

Commit

Permalink
qfind_first_byte_of may suffer from global initialization order
Browse files Browse the repository at this point in the history
Summary: ##static## handling adds ~2 more ns overhead per call (and the first call is kinda slow), but now the logic is correct now. Also inlined ##qfind_first_byte_of##.

Test Plan: unittests

Reviewed By: tudorb@fb.com

FB internal diff: D687947
  • Loading branch information
philippv authored and jdelong committed Feb 4, 2013
1 parent cc86cd3 commit 4988b28
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 51 deletions.
76 changes: 31 additions & 45 deletions folly/Range.cpp
Expand Up @@ -14,14 +14,11 @@
* limitations under the License. * limitations under the License.
*/ */


//
// @author Mark Rabkin (mrabkin@fb.com) // @author Mark Rabkin (mrabkin@fb.com)
// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com) // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
//


#include "folly/Range.h" #include "folly/Range.h"


#include "folly/CpuId.h"
#include "folly/Likely.h" #include "folly/Likely.h"


namespace folly { namespace folly {
Expand Down Expand Up @@ -86,39 +83,6 @@ size_t qfind_first_byte_of_needles16(const StringPiece& haystack,
return StringPiece::npos; return StringPiece::npos;
} }


size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
const StringPiece& needles)
__attribute__ ((__target__("sse4.2"), noinline));

size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
const StringPiece& needles) {
if (UNLIKELY(needles.empty() || haystack.empty())) {
return StringPiece::npos;
} else if (needles.size() <= 16) {
// we can save some unnecessary load instructions by optimizing for
// the common case of needles.size() <= 16
return qfind_first_byte_of_needles16(haystack, needles);
}

size_t index = haystack.size();
for (size_t i = 0; i < haystack.size(); i += 16) {
size_t b = 16;
auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i);
for (size_t j = 0; j < needles.size(); j += 16) {
auto arr2 = __builtin_ia32_loaddqu(needles.data() + j);
auto index = __builtin_ia32_pcmpestri128(arr2, needles.size() - j,
arr1, haystack.size() - i, 0);
b = std::min<size_t>(index, b);
}
if (b < 16) {
return i + b;
}
};
return StringPiece::npos;
}

typedef decltype(qfind_first_byte_of_sse42) Type_qfind_first_byte_of;

// Aho, Hopcroft, and Ullman refer to this trick in "The Design and Analysis // Aho, Hopcroft, and Ullman refer to this trick in "The Design and Analysis
// of Computer Algorithms" (1974), but the best description is here: // of Computer Algorithms" (1974), but the best description is here:
// http://research.swtch.com/sparse // http://research.swtch.com/sparse
Expand Down Expand Up @@ -163,6 +127,37 @@ size_t qfind_first_byte_of_byteset(const StringPiece& haystack,
return StringPiece::npos; return StringPiece::npos;
} }


size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
const StringPiece& needles)
__attribute__ ((__target__("sse4.2"), noinline));

size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
const StringPiece& needles) {
if (UNLIKELY(needles.empty() || haystack.empty())) {
return StringPiece::npos;
} else if (needles.size() <= 16) {
// we can save some unnecessary load instructions by optimizing for
// the common case of needles.size() <= 16
return qfind_first_byte_of_needles16(haystack, needles);
}

size_t index = haystack.size();
for (size_t i = 0; i < haystack.size(); i += 16) {
size_t b = 16;
auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i);
for (size_t j = 0; j < needles.size(); j += 16) {
auto arr2 = __builtin_ia32_loaddqu(needles.data() + j);
auto index = __builtin_ia32_pcmpestri128(arr2, needles.size() - j,
arr1, haystack.size() - i, 0);
b = std::min<size_t>(index, b);
}
if (b < 16) {
return i + b;
}
};
return StringPiece::npos;
}

size_t qfind_first_byte_of_nosse(const StringPiece& haystack, size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
const StringPiece& needles) { const StringPiece& needles) {
if (UNLIKELY(needles.empty() || haystack.empty())) { if (UNLIKELY(needles.empty() || haystack.empty())) {
Expand All @@ -183,14 +178,5 @@ size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
return qfind_first_byte_of_memchr(haystack, needles); return qfind_first_byte_of_memchr(haystack, needles);
} }


auto const qfind_first_byte_of_fn =
folly::CpuId().sse42() ? qfind_first_byte_of_sse42
: qfind_first_byte_of_nosse;

size_t qfind_first_byte_of(const StringPiece& haystack,
const StringPiece& needles) {
return qfind_first_byte_of_fn(haystack, needles);
}

} // namespace detail } // namespace detail
} // namespace folly } // namespace folly
18 changes: 16 additions & 2 deletions folly/Range.h
Expand Up @@ -32,6 +32,7 @@
#include <boost/utility/enable_if.hpp> #include <boost/utility/enable_if.hpp>
#include <boost/type_traits.hpp> #include <boost/type_traits.hpp>
#include <bits/c++config.h> #include <bits/c++config.h>
#include "folly/CpuId.h"
#include "folly/Traits.h" #include "folly/Traits.h"


namespace folly { namespace folly {
Expand Down Expand Up @@ -593,8 +594,21 @@ size_t qfind(const Range<T>& haystack,
} }


namespace detail { namespace detail {
size_t qfind_first_byte_of(const StringPiece& haystack,
const StringPiece& needles); size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
const StringPiece& needles);

size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
const StringPiece& needles);

inline size_t qfind_first_byte_of(const StringPiece& haystack,
const StringPiece& needles) {
static auto const qfind_first_byte_of_fn =
folly::CpuId().sse42() ? qfind_first_byte_of_sse42
: qfind_first_byte_of_nosse;
return qfind_first_byte_of_fn(haystack, needles);
}

} // namespace detail } // namespace detail


template <class T, class Comp> template <class T, class Comp>
Expand Down
6 changes: 2 additions & 4 deletions folly/test/RangeTest.cpp
Expand Up @@ -14,7 +14,6 @@
* limitations under the License. * limitations under the License.
*/ */


//
// @author Kristina Holst (kholst@fb.com) // @author Kristina Holst (kholst@fb.com)
// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com) // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)


Expand All @@ -25,16 +24,15 @@
#include "folly/Range.h" #include "folly/Range.h"


namespace folly { namespace detail { namespace folly { namespace detail {

// declaration of functions in Range.cpp // declaration of functions in Range.cpp
size_t qfind_first_byte_of_memchr(const StringPiece& haystack, size_t qfind_first_byte_of_memchr(const StringPiece& haystack,
const StringPiece& needles); const StringPiece& needles);


size_t qfind_first_byte_of_byteset(const StringPiece& haystack, size_t qfind_first_byte_of_byteset(const StringPiece& haystack,
const StringPiece& needles); const StringPiece& needles);


size_t qfind_first_byte_of_nosse(const StringPiece& haystack, }} // namespaces
const StringPiece& needles);
}}


using namespace folly; using namespace folly;
using namespace std; using namespace std;
Expand Down

0 comments on commit 4988b28

Please sign in to comment.