Skip to content

Commit

Permalink
Backed out changeset 09c1712854b3
Browse files Browse the repository at this point in the history
Summary: Removing COW from fbstring had adverse memory consequences when sync'd with libgcc. Revert this diff to keep folly and libgcc in sync.

Reviewed By: yfeldblum, luciang

Differential Revision: D4019604

fbshipit-source-id: 80bd31c220098bfab37f0effc90f67876432369d
  • Loading branch information
Gownta authored and Facebook Github Bot committed Oct 14, 2016
1 parent 2c0ece4 commit fc838f2
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 41 deletions.
111 changes: 72 additions & 39 deletions folly/FBString.h
Expand Up @@ -289,13 +289,12 @@ class fbstring_core_model {
* The storage is selected as follows (assuming we store one-byte
* characters on a 64-bit machine): (a) "small" strings between 0 and
* 23 chars are stored in-situ without allocation (the rightmost byte
* stores the size); (b) "medium" strings (> 23 chars) are stored in
* malloc-allocated memory that is copied eagerly.
* There exists a third storage category: (c) "large", which has the
* copy-on-write optimization. COW was disallowed in C++11, so large is
* now deprecated in fbstring_core. fbstring_core no longer creates large
* strings, though still works with them. Later, large strings will be
* completely removed.
* stores the size); (b) "medium" strings from 24 through 254 chars
* are stored in malloc-allocated memory that is copied eagerly; (c)
* "large" strings of 255 chars and above are stored in a similar
* structure as medium arrays, except that the string is
* reference-counted and copied lazily. the reference count is
* allocated right before the character array.
*
* The discriminator between these three strategies sits in two
* bits of the rightmost char of the storage. If neither is set, then the
Expand Down Expand Up @@ -326,10 +325,18 @@ template <class Char> class fbstring_core {

fbstring_core(const fbstring_core & rhs) {
FBSTRING_ASSERT(&rhs != this);
if (rhs.category() == Category::isSmall) {
makeSmall(rhs);
} else {
makeMedium(rhs);
switch (rhs.category()) {
case Category::isSmall:
copySmall(rhs);
break;
case Category::isMedium:
copyMedium(rhs);
break;
case Category::isLarge:
copyLarge(rhs);
break;
default:
fbstring_detail::assume_unreachable();
}
FBSTRING_ASSERT(size() == rhs.size());
FBSTRING_ASSERT(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0);
Expand All @@ -347,8 +354,10 @@ template <class Char> class fbstring_core {
bool disableSSO = FBSTRING_DISABLE_SSO) {
if (!disableSSO && size <= maxSmallSize) {
initSmall(data, size);
} else {
} else if (size <= maxMediumSize) {
initMedium(data, size);
} else {
initLarge(data, size);
}
FBSTRING_ASSERT(this->size() == size);
FBSTRING_ASSERT(
Expand Down Expand Up @@ -608,7 +617,6 @@ template <class Char> class fbstring_core {
}

void setCapacity(size_t cap, Category cat) {
FBSTRING_ASSERT(cat != Category::isLarge);
capacity_ = kIsLittleEndian
? cap | (static_cast<size_t>(cat) << kCategoryShift)
: (cap << 2) | static_cast<size_t>(cat);
Expand Down Expand Up @@ -652,9 +660,9 @@ template <class Char> class fbstring_core {
FBSTRING_ASSERT(category() == Category::isSmall && size() == s);
}

void makeSmall(const fbstring_core&);
void makeMedium(const fbstring_core&);
void makeLarge(const fbstring_core&);
void copySmall(const fbstring_core&);
void copyMedium(const fbstring_core&);
void copyLarge(const fbstring_core&);

void initSmall(const Char* data, size_t size);
void initMedium(const Char* data, size_t size);
Expand All @@ -673,7 +681,7 @@ template <class Char> class fbstring_core {
};

template <class Char>
inline void fbstring_core<Char>::makeSmall(const fbstring_core& rhs) {
inline void fbstring_core<Char>::copySmall(const fbstring_core& rhs) {
static_assert(offsetof(MediumLarge, data_) == 0, "fbstring layout failure");
static_assert(
offsetof(MediumLarge, size_) == sizeof(ml_.data_),
Expand All @@ -692,7 +700,7 @@ inline void fbstring_core<Char>::makeSmall(const fbstring_core& rhs) {
}

template <class Char>
FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::makeMedium(
FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::copyMedium(
const fbstring_core& rhs) {
// Medium strings are copied eagerly. Don't forget to allocate
// one extra Char for the null terminator.
Expand All @@ -707,7 +715,7 @@ FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::makeMedium(
}

template <class Char>
FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::makeLarge(
FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::copyLarge(
const fbstring_core& rhs) {
// Large strings are just refcounted
ml_ = rhs.ml_;
Expand Down Expand Up @@ -844,16 +852,29 @@ FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::reserveMedium(
if (minCapacity <= ml_.capacity()) {
return; // nothing to do, there's enough room
}
// Keep the string at medium size. Don't forget to allocate
// one extra Char for the terminating null.
size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char));
// Also copies terminator.
ml_.data_ = static_cast<Char*>(smartRealloc(
ml_.data_,
(ml_.size_ + 1) * sizeof(Char),
(ml_.capacity() + 1) * sizeof(Char),
capacityBytes));
ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium);
if (minCapacity <= maxMediumSize) {
// Keep the string at medium size. Don't forget to allocate
// one extra Char for the terminating null.
size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char));
// Also copies terminator.
ml_.data_ = static_cast<Char*>(smartRealloc(
ml_.data_,
(ml_.size_ + 1) * sizeof(Char),
(ml_.capacity() + 1) * sizeof(Char),
capacityBytes));
ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium);
} else {
// Conversion from medium to large string
fbstring_core nascent;
// Will recurse to another branch of this function
nascent.reserve(minCapacity);
nascent.ml_.size_ = ml_.size_;
// Also copies terminator.
fbstring_detail::podCopy(
ml_.data_, ml_.data_ + ml_.size_ + 1, nascent.ml_.data_);
nascent.swap(*this);
FBSTRING_ASSERT(capacity() >= minCapacity);
}
}

template <class Char>
Expand All @@ -863,17 +884,29 @@ FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::reserveSmall(
if (!disableSSO && minCapacity <= maxSmallSize) {
// small
// Nothing to do, everything stays put
return;
} else if (minCapacity <= maxMediumSize) {
// medium
// Don't forget to allocate one extra Char for the terminating null
auto const allocSizeBytes =
goodMallocSize((1 + minCapacity) * sizeof(Char));
auto const pData = static_cast<Char*>(checkedMalloc(allocSizeBytes));
auto const size = smallSize();
// Also copies terminator.
fbstring_detail::podCopy(small_, small_ + size + 1, pData);
ml_.data_ = pData;
ml_.size_ = size;
ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium);
} else {
// large
auto const newRC = RefCounted::create(&minCapacity);
auto const size = smallSize();
// Also copies terminator.
fbstring_detail::podCopy(small_, small_ + size + 1, newRC->data_);
ml_.data_ = newRC->data_;
ml_.size_ = size;
ml_.setCapacity(minCapacity, Category::isLarge);
FBSTRING_ASSERT(capacity() >= minCapacity);
}
// Don't forget to allocate one extra Char for the terminating null
auto const allocSizeBytes = goodMallocSize((1 + minCapacity) * sizeof(Char));
auto const pData = static_cast<Char*>(checkedMalloc(allocSizeBytes));
auto const size = smallSize();
// Also copies terminator.
fbstring_detail::podCopy(small_, small_ + size + 1, pData);
ml_.data_ = pData;
ml_.size_ = size;
ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium);
}

template <class Char>
Expand Down
10 changes: 8 additions & 2 deletions folly/docs/FBString.md
Expand Up @@ -4,7 +4,7 @@
`fbstring` is a drop-in replacement for `std::string`. The main
benefit of `fbstring` is significantly increased performance on
virtually all important primitives. This is achieved by using a
two-tiered storage strategy and by cooperating with the memory
three-tiered storage strategy and by cooperating with the memory
allocator. In particular, `fbstring` is designed to detect use of
jemalloc and cooperate with it to achieve significant improvements in
speed and memory usage.
Expand All @@ -18,14 +18,20 @@ architectures.
* Small strings (<= 23 chars) are stored in-situ without memory
allocation.

* Other strings (> 23 chars) are stored in malloc-allocated
* Medium strings (24 - 255 chars) are stored in malloc-allocated
memory and copied eagerly.

* Large strings (> 255 chars) are stored in malloc-allocated memory and
copied lazily.

### Implementation highlights
***

* 100% compatible with `std::string`.

* Thread-safe reference counted copy-on-write for strings "large"
strings (> 255 chars).

* Uses `malloc` instead of allocators.

* Jemalloc-friendly. `fbstring` automatically detects if application
Expand Down

0 comments on commit fc838f2

Please sign in to comment.