Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Stats::RawStatData into a StatsOptions struct #3629

Merged
merged 24 commits into from
Jul 13, 2018
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
bb349a1
Refactor Stats::RawStatData into a StatsOptions struct.
ambuc Jun 14, 2018
7bfa762
Store StatsOptions values as size_t
ambuc Jun 14, 2018
27990bf
Add heapAllocator stat name truncation in ThreadLocalStore, plus nits
ambuc Jun 25, 2018
b92ab75
Remove unused BlockMemoryHashSet accessor function
ambuc Jun 25, 2018
3a7aff1
Rename safeInitialize() to truncateAndInit()
ambuc Jun 26, 2018
3f38407
Change interface of sizeGivenName()
ambuc Jun 26, 2018
19f9f3c
Use new translateBootstrap interface in v1_to_bootstrap
ambuc Jul 6, 2018
06bd4c2
Fix formatting
ambuc Jul 6, 2018
217f72f
Add documentation, re-trigger ci
ambuc Jul 6, 2018
1ae932c
Rework some interfaces to pass statsOptions rather than Scope at lowe…
ambuc Jul 10, 2018
b07d5c6
Prefer UNREFERENCED_PARAMETER to (void)
ambuc Jul 10, 2018
732af4f
Better inline docs for stat name / object / suffix terminology
ambuc Jul 10, 2018
7328dc0
Merge remote-tracking branch 'upstream/master' into refactor-StatsOpt…
ambuc Jul 10, 2018
71f6b24
Edit stats inline documentation
ambuc Jul 11, 2018
2aa9d8c
Provide safe copy assertion in RawStatData::initialize()
ambuc Jul 11, 2018
911fc5d
Abstract checkAndInit() and truncateAndInit() with shared initializat…
ambuc Jul 11, 2018
dbcdb72
Formatting fixes and nits
ambuc Jul 11, 2018
9a30244
Merge remote-tracking branch 'upstream/master' into refactor-StatsOpt…
ambuc Jul 12, 2018
aeb9251
Refactor sizeGivenName(), sizeGivenStatsOptions() into structSize(), …
ambuc Jul 12, 2018
cb0fd8a
Add todo addressing embedded assumption about allocator fallback
ambuc Jul 12, 2018
19e0311
Merge remote-tracking branch 'upstream/master' into refactor-StatsOpt…
ambuc Jul 12, 2018
4cad816
Throw std::bad_alloc() in HeapRawStatDataAllocator::alloc()
ambuc Jul 13, 2018
9c6b880
Merge remote-tracking branch 'upstream/master' into refactor-StatsOpt…
ambuc Jul 13, 2018
c1fa6fb
Merge remote-tracking branch 'upstream/master' into refactor-StatsOpt…
ambuc Jul 13, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions source/common/common/block_memory_hash_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ template <class Value> class BlockMemoryHashSet : public Logger::Loggable<Logger

/** Examines the data structures to see if they are sane, assert-failing on any trouble. */
void sanityCheck() {
RELEASE_ASSERT(control_->size <= control_->hash_set_options.capacity);
RELEASE_ASSERT(control_->size <= control_->hash_set_options.capacity, "");

// As a sanity check, make sure there are control_->size values
// reachable from the slots, each of which has a valid
Expand All @@ -114,7 +114,7 @@ template <class Value> class BlockMemoryHashSet : public Logger::Loggable<Logger
uint32_t next = 0; // initialized to silence compilers.
for (uint32_t cell_index = slots_[slot];
(cell_index != Sentinal) && (num_values <= control_->size); cell_index = next) {
RELEASE_ASSERT(cell_index < control_->hash_set_options.capacity);
RELEASE_ASSERT(cell_index < control_->hash_set_options.capacity, "");
Cell& cell = getCell(cell_index);
absl::string_view key = cell.value.key();
RELEASE_ASSERT(computeSlot(key) == slot, "");
Expand Down Expand Up @@ -330,7 +330,7 @@ template <class Value> class BlockMemoryHashSet : public Logger::Loggable<Logger
// sizeof(Cell) includes 'sizeof Value' which may not be accurate. So we need to
// subtract that off, and add the template method's view of the actual value-size.
uint64_t cell_size =
align(sizeof(Cell) + Value::sizeGivenStatsOptions(stats_options) - sizeof(Value));
align(sizeof(Cell) + Value::structSizeWithOptions(stats_options) - sizeof(Value));
return cell_index * cell_size;
}

Expand All @@ -340,7 +340,7 @@ template <class Value> class BlockMemoryHashSet : public Logger::Loggable<Logger
Cell& getCell(uint32_t cell_index) {
// Because the key-size is parameteriziable, an array-lookup on sizeof(Cell) does not work.
char* ptr = reinterpret_cast<char*>(cells_) + cellOffset(cell_index, stats_options_);
RELEASE_ASSERT((reinterpret_cast<uint64_t>(ptr) & (calculateAlignment() - 1)) == 0);
RELEASE_ASSERT((reinterpret_cast<uint64_t>(ptr) & (calculateAlignment() - 1)) == 0, "");
return *reinterpret_cast<Cell*>(ptr);
}

Expand Down
31 changes: 17 additions & 14 deletions source/common/stats/stats_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ bool regexStartsWithDot(absl::string_view regex) {
// Normally the compiler would do this, but because name_ is a flexible-array-length
// element, the compiler can't. RawStatData is put into an array in HotRestartImpl, so
// it's important that each element starts on the required alignment for the type.
uint64_t RawStatData::sizeGivenName(const absl::string_view name) {
return roundUpMultipleNaturalAlignment(sizeof(RawStatData) + name.size() + 1);
uint64_t RawStatData::structSize(uint64_t name_size) {
return roundUpMultipleNaturalAlignment(sizeof(RawStatData) + name_size + 1);
}

uint64_t RawStatData::sizeGivenStatsOptions(const StatsOptions& stats_options) {
return roundUpMultipleNaturalAlignment(sizeof(RawStatData) + stats_options.maxNameLength() + 1);
uint64_t RawStatData::structSizeWithOptions(const StatsOptions& stats_options) {
return structSize(stats_options.maxNameLength());
}

std::string Utility::sanitizeStatsName(const std::string& name) {
Expand Down Expand Up @@ -136,9 +136,12 @@ bool TagExtractorImpl::extractTag(const std::string& stat_name, std::vector<Tag>
}

RawStatData* HeapRawStatDataAllocator::alloc(const std::string& name) {
uint64_t num_bytes_to_allocate = RawStatData::sizeGivenName(name);
uint64_t num_bytes_to_allocate = RawStatData::structSize(name.size());
RawStatData* data = static_cast<RawStatData*>(::calloc(num_bytes_to_allocate, 1));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this wasn't being done before, but shouldn't we return early if calloc returns a nullptr so we don't segfault by trying to write to it? If we're not equipped to handle this failure at the callsite, then maybe we should just throw instead of returning nullptr?

data->initialize(name, num_bytes_to_allocate);
if (data == nullptr) {
throw EnvoyException("HeapRawStatDataAllocator: unable to allocate a new stat");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be caught. Can you throw some other type of exception? (Like whatever type new() throws).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed it to std::bad_alloc() in 4cad816.

}
data->checkAndInit(name, num_bytes_to_allocate);

Thread::ReleasableLockGuard lock(mutex_);
auto ret = stats_.insert(data);
Expand Down Expand Up @@ -332,31 +335,31 @@ void HeapRawStatDataAllocator::free(RawStatData& data) {
::free(&data);
}

void RawStatData::initialize(absl::string_view key, uint64_t num_bytes_allocated) {
void RawStatData::initialize(absl::string_view key, uint64_t xfer_size) {
ASSERT(!initialized());
ref_count_ = 1;
memcpy(name_, key.data(), xfer_size);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so where is initialize called now? Does this need some safety guard for cope size? If not, can you add a comment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So there are two initialize fns now -- one, RawStatData::initialize(key), which is used in cases where no truncation is performed (like in HeapRawStatDataAllocator::alloc(name)). The other, RawStatData::truncateAndInit(key, stats_options), gets called in BlockMemoryHashSet::insert(key), which has access to a local stats_options_ struct. So the former case doesn't guard for some max size.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we somehow assert that we aren't going to overwrite memory here? It's not super clear to me how we know that this is a safe copy (or if heap allocated pass in the size of the heap allocation and assert that)?

name_[xfer_size] = '\0';
}

void RawStatData::checkAndInit(absl::string_view key, uint64_t num_bytes_allocated) {
uint64_t xfer_size = key.size();
ASSERT(xfer_size <= num_bytes_allocated);
ASSERT(structSize(xfer_size) <= num_bytes_allocated);

memcpy(name_, key.data(), xfer_size);
name_[xfer_size] = '\0';
initialize(key, xfer_size);
}

void RawStatData::truncateAndInit(absl::string_view key, const StatsOptions& stats_options) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There may be a subtlety here that I'm missing here, but can we just truncate the string_view, and pass it to initialize()? It seems like the majority of initialize() and truncateAndInit() are the same.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually really like this. I will refactor truncateAndInit() and checkAndInit() to use the same (private) initialize() method under the hood -- this makes the difference in logic very explicit.

ASSERT(!initialized());
if (key.size() > stats_options.maxNameLength()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe be more explicit here naming this function:

  truncateAndInit()

ENVOY_LOG_MISC(
warn,
"Statistic '{}' is too long with {} characters, it will be truncated to {} characters", key,
key.size(), stats_options.maxNameLength());
}
ref_count_ = 1;

// key is not necessarily nul-terminated, but we want to make sure name_ is.
uint64_t xfer_size = std::min(stats_options.maxNameLength(), key.size());
memcpy(name_, key.data(), xfer_size);
name_[xfer_size] = '\0';
initialize(key, xfer_size);
}

HistogramStatisticsImpl::HistogramStatisticsImpl(const histogram_t* histogram_ptr)
Expand Down
14 changes: 7 additions & 7 deletions source/common/stats/stats_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class Utility {
* it can be allocated from shared memory if needed.
*
* @note Due to name_ being variable size, sizeof(RawStatData) probably isn't useful. Use
* RawStatData::sizeGivenName() or RawStatData::sizeGivenStatsOptions() instead.
* RawStatData::structSize() or RawStatData::structSizeWithOptions() instead.
*/
struct RawStatData {

Expand All @@ -207,22 +207,21 @@ struct RawStatData {
* and padding for alignment. Required for the HeapRawStatDataAllocator, which does not truncate
* at a maximum stat name length.
*/
static uint64_t sizeGivenName(const absl::string_view name);
static uint64_t structSize(uint64_t name_size);

/**
* Returns the size of this struct, accounting for the length of name_
* and padding for alignment. Required by BlockMemoryHashSet, which has the context to report
* StatsOptions.
* Wrapper for structSize, taking a StatsOptions struct.
* Required by BlockMemoryHashSet, which has the context to supply StatsOptions.
*/
static uint64_t sizeGivenStatsOptions(const StatsOptions& stats_options);
static uint64_t structSizeWithOptions(const StatsOptions& stats_options);

/**
* Initializes this object to have the specified key,
* a refcount of 1, and all other values zero. Required for the HeapRawStatDataAllocator, which
* does not expect stat name truncation. We pass in the number of bytes allocated in order to
* assert the copy is safe inline.
*/
void initialize(absl::string_view key, uint64_t num_bytes_allocated);
void checkAndInit(absl::string_view key, uint64_t num_bytes_allocated);

/**
* Initializes this object to have the specified key,
Expand Down Expand Up @@ -254,6 +253,7 @@ struct RawStatData {
char name_[];

private:
void initialize(absl::string_view key, uint64_t num_bytes_allocated);
};

/**
Expand Down
5 changes: 5 additions & 0 deletions source/common/stats/thread_local_store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ StatType& ThreadLocalStoreImpl::ScopeImpl::safeMakeStat(
make_stat(parent_.alloc_, name, std::move(tag_extracted_name), std::move(tags));
if (stat == nullptr) {
parent_.num_last_resort_stats_.inc();
// TODO(jbuckland) Performing the fallback from non-heap allocator to heap allocator should be
// the responsibility of the non-heap allocator, not the client of the non-heap allocator.
// This branch will never be used in the non-hot-restart case, since the parent_.alloc_ object
// will throw instead of returning a nullptr; we should remove the assumption that this
// branching case is always available.
stat =
make_stat(parent_.heap_allocator_, name.substr(0, parent_.statsOptions().maxNameLength()),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems fishy. Since we now throw if the heap allocator fails to allocate, this statement will never be called in the non-hot restart case. However, it's still an embedded assumption about the specific behavior of all allocators - fixed length if they can fail and non-fixed length if they cannot fail. Is there any problem with changing the hot restart allocator to perform the fallback rather than relying on the caller? Since this is technically correct, I think it's fine to leave it this way for now, but please toss in a TODO to get rid of this assumption.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added this todo in cb0fd8a.

std::move(tag_extracted_name), std::move(tags));
Expand Down
1 change: 0 additions & 1 deletion source/extensions/filters/http/dynamo/dynamo_utility.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ std::string Utility::buildPartitionStatString(const std::string& stat_prefix,
if (stats_table_prefix.size() > remaining_size) {
stats_table_prefix = stats_table_prefix.substr(0, remaining_size);
}

return fmt::format("{}{}", stats_table_prefix, stats_partition_postfix);
}

Expand Down
4 changes: 2 additions & 2 deletions source/server/hot_restart_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ static BlockMemoryHashSetOptions blockMemHashOptions(uint64_t max_stats) {
SharedMemory& SharedMemory::initialize(uint64_t stats_set_size, Options& options) {
Api::OsSysCalls& os_sys_calls = Api::OsSysCallsSingleton::get();

const uint64_t entry_size = Stats::RawStatData::sizeGivenStatsOptions(options.statsOptions());
const uint64_t entry_size = Stats::RawStatData::structSizeWithOptions(options.statsOptions());
const uint64_t total_size = sizeof(SharedMemory) + stats_set_size;

int flags = O_RDWR;
Expand Down Expand Up @@ -172,7 +172,7 @@ void HotRestartImpl::free(Stats::RawStatData& data) {
bool key_removed = stats_set_->remove(data.key());
ASSERT(key_removed);
memset(static_cast<void*>(&data), 0,
Stats::RawStatData::sizeGivenStatsOptions(options_.statsOptions()));
Stats::RawStatData::structSizeWithOptions(options_.statsOptions()));
}

int HotRestartImpl::bindDomainSocket(uint64_t id) {
Expand Down
2 changes: 1 addition & 1 deletion test/common/common/block_memory_hash_set_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class BlockMemoryHashSetTest : public testing::Test {
memcpy(name, key.data(), xfer);
name[xfer] = '\0';
}
static uint64_t sizeGivenStatsOptions(const Stats::StatsOptions& stats_options) {
static uint64_t structSizeWithOptions(const Stats::StatsOptions& stats_options) {
UNREFERENCED_PARAMETER(stats_options);
return sizeof(TestValue);
}
Expand Down
29 changes: 28 additions & 1 deletion test/common/stats/stats_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,33 @@ TEST(TagProducerTest, CheckConstructor) {
"No regex specified for tag specifier and no default regex for name: 'test_extractor'");
}

// Check consistency of internal stat representation
TEST(RawStatDataTest, Consistency) {
HeapRawStatDataAllocator alloc;
// Generate a stat, encode it to hex, and take the hash of that hex string. We expect the hash to
// vary only when the internal representation of a stat has been intentionally changed, in which
// case SharedMemory::VERSION should be incremented as well.
uint64_t expected_hash = 1874506077228772558;
Stats::StatsOptionsImpl stats_options;
uint64_t max_name_length = stats_options.maxNameLength();

const std::string name_1(max_name_length, 'A');
RawStatData* stat_1 = alloc.alloc(name_1);
std::string stat_hex_dump_1 =
Hex::encode(reinterpret_cast<uint8_t*>(stat_1), sizeof(RawStatData) + max_name_length);
EXPECT_EQ(HashUtil::xxHash64(stat_hex_dump_1), expected_hash);
alloc.free(*stat_1);

// If a stat name is truncated, we expect that its internal representation is the same as if it
// had been initialized with the already-truncated name.
const std::string name_2(max_name_length + 1, 'A');
RawStatData* stat_2 = alloc.alloc(name_2);
std::string stat_hex_dump_2 =
Hex::encode(reinterpret_cast<uint8_t*>(stat_2), sizeof(RawStatData) + max_name_length);
EXPECT_EQ(HashUtil::xxHash64(stat_hex_dump_2), expected_hash);
alloc.free(*stat_2);
}

TEST(RawStatDataTest, HeapAlloc) {
HeapRawStatDataAllocator alloc;
RawStatData* stat_1 = alloc.alloc("ref_name");
Expand All @@ -501,7 +528,7 @@ TEST(RawStatDataTest, Truncate) {
Stats::StatsOptionsImpl stats_options;
const std::string long_string(stats_options.maxNameLength() + 1, 'A');
RawStatData* stat =
static_cast<RawStatData*>(::calloc(RawStatData::sizeGivenStatsOptions(stats_options), 1));
static_cast<RawStatData*>(::calloc(RawStatData::structSizeWithOptions(stats_options), 1));
EXPECT_LOG_CONTAINS("warning", "is too long with",
stat->truncateAndInit(long_string, stats_options));
::free(stat);
Expand Down
2 changes: 1 addition & 1 deletion test/common/stats/thread_local_store_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class StatsThreadLocalStoreTest : public testing::Test, public RawStatDataAlloca
}));

EXPECT_CALL(*this, alloc("stats.overflow"));
store_.reset(new ThreadLocalStoreImpl(options_, *this));
store_ = std::make_unique<ThreadLocalStoreImpl>(options_, *this);
store_->addSink(sink_);
}

Expand Down
2 changes: 1 addition & 1 deletion test/test_common/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ class TestAllocator : public RawStatDataAllocator {
CSmartPtr<RawStatData, freeAdapter>& stat_ref = stats_[name];
if (!stat_ref) {
stat_ref.reset(static_cast<RawStatData*>(
::calloc(RawStatData::sizeGivenStatsOptions(stats_options), 1)));
::calloc(RawStatData::structSizeWithOptions(stats_options), 1)));
stat_ref->truncateAndInit(name, stats_options);
} else {
stat_ref->ref_count_++;
Expand Down