From c69d0467bb75252d74bcb9304ff0db98ff9d1c6e Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 3 Oct 2023 17:32:43 -0500 Subject: [PATCH 01/11] Initial work to add refresh interval configuration option --- include/cassandra.h | 4 ++++ src/cluster_config.cpp | 8 ++++++++ src/config.hpp | 10 +++++++++- src/constants.hpp | 1 + 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/include/cassandra.h b/include/cassandra.h index bbe9c6c1c..a23a2da5d 100644 --- a/include/cassandra.h +++ b/include/cassandra.h @@ -2934,6 +2934,10 @@ CASS_EXPORT void cass_cluster_set_monitor_reporting_interval(CassCluster* cluster, unsigned interval_secs); +CASS_EXPORT CassError +cass_cluster_set_histogram_refresh_interval(CassCluster* cluster, + unsigned refresh_interval); + /*********************************************************************************** * * Session diff --git a/src/cluster_config.cpp b/src/cluster_config.cpp index 9fac5b326..408e3f82e 100644 --- a/src/cluster_config.cpp +++ b/src/cluster_config.cpp @@ -577,6 +577,14 @@ void cass_cluster_set_monitor_reporting_interval(CassCluster* cluster, unsigned cluster->config().set_monitor_reporting_interval_secs(interval_secs); } +CassError cass_cluster_set_histogram_refresh_interval(CassCluster* cluster, unsigned refresh_interval) { + if (refresh_interval <= 0) { + return CASS_ERROR_LIB_BAD_PARAMS; + } + cluster->config().set_cluster_histogram_refresh_interval(refresh_interval); + return CASS_OK; +} + void cass_cluster_free(CassCluster* cluster) { delete cluster->from(); } } // extern "C" diff --git a/src/config.hpp b/src/config.hpp index 6d34cd6b2..7a30f7913 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -74,7 +74,8 @@ class Config { , is_client_id_set_(false) , host_listener_(new DefaultHostListener()) , monitor_reporting_interval_secs_(CASS_DEFAULT_CLIENT_MONITOR_EVENTS_INTERVAL_SECS) - , cluster_metadata_resolver_factory_(new DefaultClusterMetadataResolverFactory()) { + , cluster_metadata_resolver_factory_(new DefaultClusterMetadataResolverFactory()) + , histogram_refresh_interval_(CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL) { profiles_.set_empty_key(String()); // Assign the defaults to the cluster profile @@ -392,6 +393,12 @@ class Config { } } + unsigned cluster_histogram_refresh_interval() const { return histogram_refresh_interval_; } + + void set_cluster_histogram_refresh_interval(unsigned refresh_interval) { + histogram_refresh_interval_ = refresh_interval; + } + private: void init_profiles(); @@ -441,6 +448,7 @@ class Config { unsigned monitor_reporting_interval_secs_; CloudSecureConnectionConfig cloud_secure_connection_config_; ClusterMetadataResolverFactory::Ptr cluster_metadata_resolver_factory_; + unsigned histogram_refresh_interval_; }; }}} // namespace datastax::internal::core diff --git a/src/constants.hpp b/src/constants.hpp index 2d2a743a7..72636c8df 100644 --- a/src/constants.hpp +++ b/src/constants.hpp @@ -142,6 +142,7 @@ #define CASS_DEFAULT_MAX_TRACING_DATA_WAIT_TIME_MS 15 #define CASS_DEFAULT_RETRY_TRACING_DATA_WAIT_TIME_MS 3 #define CASS_DEFAULT_TRACING_CONSISTENCY CASS_CONSISTENCY_ONE +#define CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL 0 // Request-level defaults #define CASS_DEFAULT_CONSISTENCY CASS_CONSISTENCY_LOCAL_ONE From fd808019daa0898b327ce57186810a7e4d15094d Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 4 Oct 2023 14:16:33 -0500 Subject: [PATCH 02/11] Pass histogram refresh interval through to the histogram --- src/config.hpp | 2 +- src/constants.hpp | 2 +- src/metrics.hpp | 14 ++++++++++---- src/session_base.cpp | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/config.hpp b/src/config.hpp index 7a30f7913..b267bb64e 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -75,7 +75,7 @@ class Config { , host_listener_(new DefaultHostListener()) , monitor_reporting_interval_secs_(CASS_DEFAULT_CLIENT_MONITOR_EVENTS_INTERVAL_SECS) , cluster_metadata_resolver_factory_(new DefaultClusterMetadataResolverFactory()) - , histogram_refresh_interval_(CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL) { + , histogram_refresh_interval_(CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_CACHING) { profiles_.set_empty_key(String()); // Assign the defaults to the cluster profile diff --git a/src/constants.hpp b/src/constants.hpp index 72636c8df..bc7691f8a 100644 --- a/src/constants.hpp +++ b/src/constants.hpp @@ -142,7 +142,7 @@ #define CASS_DEFAULT_MAX_TRACING_DATA_WAIT_TIME_MS 15 #define CASS_DEFAULT_RETRY_TRACING_DATA_WAIT_TIME_MS 3 #define CASS_DEFAULT_TRACING_CONSISTENCY CASS_CONSISTENCY_ONE -#define CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL 0 +#define CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_CACHING 0 // Request-level defaults #define CASS_DEFAULT_CONSISTENCY CASS_CONSISTENCY_LOCAL_ONE diff --git a/src/metrics.hpp b/src/metrics.hpp index 59bee6f07..71ce22e3c 100644 --- a/src/metrics.hpp +++ b/src/metrics.hpp @@ -268,9 +268,10 @@ class Metrics : public Allocated { int64_t percentile_999th; }; - Histogram(ThreadState* thread_state) + Histogram(ThreadState* thread_state, unsigned refresh_interval = CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_CACHING) : thread_state_(thread_state) , histograms_(new PerThreadHistogram[thread_state->max_threads()]) { + refresh_interval_ = refresh_interval; hdr_init(1LL, HIGHEST_TRACKABLE_VALUE, 3, &histogram_); uv_mutex_init(&mutex_); } @@ -318,6 +319,9 @@ class Metrics : public Allocated { } private: + + unsigned refresh_interval_; + class WriterReaderPhaser { public: WriterReaderPhaser() @@ -413,10 +417,10 @@ class Metrics : public Allocated { DISALLOW_COPY_AND_ASSIGN(Histogram); }; - Metrics(size_t max_threads) + Metrics(size_t max_threads, unsigned histogram_refresh_interval) : thread_state_(max_threads) - , request_latencies(&thread_state_) - , speculative_request_latencies(&thread_state_) + , request_latencies(&thread_state_, histogram_refresh_interval) + , speculative_request_latencies(&thread_state_, histogram_refresh_interval) , request_rates(&thread_state_) , total_connections(&thread_state_) , connection_timeouts(&thread_state_) @@ -447,6 +451,8 @@ class Metrics : public Allocated { Counter connection_timeouts; Counter request_timeouts; + unsigned histogram_refresh_interval; + private: DISALLOW_COPY_AND_ASSIGN(Metrics); }; diff --git a/src/session_base.cpp b/src/session_base.cpp index 6ccb4f7c5..4cdc29ea2 100644 --- a/src/session_base.cpp +++ b/src/session_base.cpp @@ -97,7 +97,7 @@ Future::Ptr SessionBase::connect(const Config& config, const String& keyspace) { random_.reset(); } - metrics_.reset(new Metrics(config.thread_count_io() + 1)); + metrics_.reset(new Metrics(config.thread_count_io() + 1, config.cluster_histogram_refresh_interval())); cluster_.reset(); ClusterConnector::Ptr connector( From dd7f5787b2ce79835af7eb2783ac849a650063bd Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 5 Oct 2023 15:20:12 -0500 Subject: [PATCH 03/11] Initial impl which passes the existing unit test. Up next: add a new unit test or two to confirm that this impl actually does what we think it does. --- src/metrics.hpp | 81 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/src/metrics.hpp b/src/metrics.hpp index 71ce22e3c..e310c0f93 100644 --- a/src/metrics.hpp +++ b/src/metrics.hpp @@ -23,6 +23,7 @@ #include "allocated.hpp" #include "atomic.hpp" #include "constants.hpp" +#include "get_time.hpp" #include "scoped_lock.hpp" #include "scoped_ptr.hpp" #include "utils.hpp" @@ -271,7 +272,11 @@ class Metrics : public Allocated { Histogram(ThreadState* thread_state, unsigned refresh_interval = CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_CACHING) : thread_state_(thread_state) , histograms_(new PerThreadHistogram[thread_state->max_threads()]) { + refresh_interval_ = refresh_interval; + refresh_timestamp_ = get_time_since_epoch_ms(); + cached_snapshot_ = Snapshot {}; + hdr_init(1LL, HIGHEST_TRACKABLE_VALUE, 3, &histogram_); uv_mutex_init(&mutex_); } @@ -287,40 +292,62 @@ class Metrics : public Allocated { void get_snapshot(Snapshot* snapshot) const { ScopedMutex l(&mutex_); - hdr_histogram* h = histogram_; - for (size_t i = 0; i < thread_state_->max_threads(); ++i) { - histograms_[i].add(h); + + uint64_t now = get_time_since_epoch_ms(); + if (now - refresh_timestamp_ >= refresh_interval_) { + + // Reset the combined histogram back to a zero state + hdr_reset(histogram_); + + // Add individual per-thread histograms to the combined histogram + for (size_t i = 0; i < thread_state_->max_threads(); ++i) { + histograms_[i].add(histogram_); + + // TODO: Reset per-thread histograms as well + } + + cached_snapshot_ = build_new_snapshot(histogram_); + refresh_timestamp_ = now; } - if (h->total_count == 0) { + // Processing continues from here whether we updated the cached snapshot or not + if (histogram_->total_count == 0) { // There is no data; default to 0 for the stats. - snapshot->max = 0; - snapshot->min = 0; - snapshot->mean = 0; - snapshot->stddev = 0; - snapshot->median = 0; - snapshot->percentile_75th = 0; - snapshot->percentile_95th = 0; - snapshot->percentile_98th = 0; - snapshot->percentile_99th = 0; - snapshot->percentile_999th = 0; + copy_snapshot(Snapshot {}, snapshot); } else { - snapshot->max = hdr_max(h); - snapshot->min = hdr_min(h); - snapshot->mean = static_cast(hdr_mean(h)); - snapshot->stddev = static_cast(hdr_stddev(h)); - snapshot->median = hdr_value_at_percentile(h, 50.0); - snapshot->percentile_75th = hdr_value_at_percentile(h, 75.0); - snapshot->percentile_95th = hdr_value_at_percentile(h, 95.0); - snapshot->percentile_98th = hdr_value_at_percentile(h, 98.0); - snapshot->percentile_99th = hdr_value_at_percentile(h, 99.0); - snapshot->percentile_999th = hdr_value_at_percentile(h, 99.9); + copy_snapshot(cached_snapshot_, snapshot); } } private: - unsigned refresh_interval_; + void copy_snapshot(Snapshot from, Snapshot* to) const { + to->max = from.max; + to->min = from.min; + to->mean = from.mean; + to->stddev = from.stddev; + to->median = from.median; + to->percentile_75th = from.percentile_75th; + to->percentile_95th = from.percentile_95th; + to->percentile_98th = from.percentile_98th; + to->percentile_99th = from.percentile_99th; + to->percentile_999th = from.percentile_999th; + } + + Snapshot build_new_snapshot(hdr_histogram* h) const { + return Snapshot { + hdr_min(h), + hdr_max(h), + static_cast(hdr_mean(h)), + static_cast(hdr_stddev(h)), + hdr_value_at_percentile(h, 50.0), + hdr_value_at_percentile(h, 75.0), + hdr_value_at_percentile(h, 95.0), + hdr_value_at_percentile(h, 98.0), + hdr_value_at_percentile(h, 99.0), + hdr_value_at_percentile(h, 99.9) + }; + } class WriterReaderPhaser { public: @@ -413,6 +440,10 @@ class Metrics : public Allocated { hdr_histogram* histogram_; mutable uv_mutex_t mutex_; + unsigned refresh_interval_; + mutable uint64_t refresh_timestamp_; + mutable Snapshot cached_snapshot_; + private: DISALLOW_COPY_AND_ASSIGN(Histogram); }; From 4f15260e33533ab737cf7b28481bf5ef487d8bb5 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 6 Oct 2023 11:13:19 -0500 Subject: [PATCH 04/11] Intermediate checkin; still have some kinks to work out --- src/metrics.hpp | 16 ++++++- tests/src/unit/tests/test_metrics.cpp | 62 +++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/metrics.hpp b/src/metrics.hpp index e310c0f93..b1178c54a 100644 --- a/src/metrics.hpp +++ b/src/metrics.hpp @@ -286,7 +286,7 @@ class Metrics : public Allocated { uv_mutex_destroy(&mutex_); } - void record_value(int64_t value) { + void record_value(int64_t value) const { histograms_[thread_state_->current_thread_id()].record_value(value); } @@ -304,6 +304,7 @@ class Metrics : public Allocated { histograms_[i].add(histogram_); // TODO: Reset per-thread histograms as well + //histograms_[i].clear(); } cached_snapshot_ = build_new_snapshot(histogram_); @@ -429,6 +430,19 @@ class Metrics : public Allocated { hdr_reset(from); } + // Same as add() above but without the actual addition. The contract here (and + // in the other methods of this class) is that whenever the phase is flipped the + // now-unused histogram has to be cleared. That provides the guarantee that whatever + // histogram we move to here will be starting out as empty. Without such a guarantee + // we'd have to explicitly clear the histogram that will be pointed to by active_index_ + // which we can't really do (in an atomic way) given the current concurrency regime. + void clear() { + int inactive_index = active_index_.exchange(!active_index_.load()); + hdr_histogram* from = histograms_[inactive_index]; + phaser_.flip_phase(); + hdr_reset(from); + } + private: hdr_histogram* histograms_[2]; mutable Atomic active_index_; diff --git a/tests/src/unit/tests/test_metrics.cpp b/tests/src/unit/tests/test_metrics.cpp index 16218f80d..ee7a336df 100644 --- a/tests/src/unit/tests/test_metrics.cpp +++ b/tests/src/unit/tests/test_metrics.cpp @@ -144,6 +144,68 @@ TEST(MetricsUnitTest, HistogramEmpty) { EXPECT_EQ(snapshot.percentile_999th, 0); } +TEST(MetricsUnitTest, HistogramGetSnapshotResetsTheWorld) { + Metrics::ThreadState thread_state(1); + Metrics::Histogram histogram(&thread_state); + + for (uint64_t i = 1; i <= 100; ++i) { + histogram.record_value(i); + } + + // First get_snapshot() operation should reset all internal + // counters + Metrics::Histogram::Snapshot snapshot; + histogram.get_snapshot(&snapshot); + + histogram.record_value(200); + + histogram.get_snapshot(&snapshot); + + EXPECT_EQ(snapshot.min, 200); + EXPECT_EQ(snapshot.max, 200); + EXPECT_EQ(snapshot.median, 200); + EXPECT_EQ(snapshot.percentile_75th, 200); + EXPECT_EQ(snapshot.percentile_95th, 200); + EXPECT_EQ(snapshot.percentile_98th, 200); + EXPECT_EQ(snapshot.percentile_99th, 200); + EXPECT_EQ(snapshot.percentile_999th, 200); + EXPECT_EQ(snapshot.mean, 200); + EXPECT_EQ(snapshot.stddev, 0); +} + +TEST(MetricsUnitTest, HistogramWithRefreshInterval) { + unsigned refresh_interval = 1000; + Metrics::ThreadState thread_state(1); + Metrics::Histogram histogram(&thread_state, refresh_interval); + + for (uint64_t i = 1; i <= 100; ++i) { + histogram.record_value(i); + } + + //Metrics::Histogram::Snapshot snapshot1; + //histogram.get_snapshot(&snapshot1); + + test::Utils::msleep(1.2 * refresh_interval); + + for (uint64_t i = 101; i <= 200; ++i) { + histogram.record_value(i); + } + + Metrics::Histogram::Snapshot snapshot; + histogram.get_snapshot(&snapshot); + + EXPECT_EQ(snapshot.min, 101); + EXPECT_EQ(snapshot.max, 200); + EXPECT_EQ(snapshot.median, 150); + EXPECT_EQ(snapshot.percentile_75th, 175); + EXPECT_EQ(snapshot.percentile_95th, 195); + EXPECT_EQ(snapshot.percentile_98th, 198); + EXPECT_EQ(snapshot.percentile_99th, 199); + EXPECT_EQ(snapshot.percentile_999th, 200); + EXPECT_EQ(snapshot.mean, 150); + EXPECT_EQ(snapshot.stddev, 28); +} + TEST(MetricsUnitTest, HistogramWithThreads) { HistogramThreadArgs args[NUM_THREADS]; From 949081fa2362a4fecb6281b7a6cc29072e4144f2 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 6 Oct 2023 12:52:45 -0500 Subject: [PATCH 05/11] Well, at least we're actually passing the tests now --- src/config.hpp | 2 +- src/constants.hpp | 2 +- src/metrics.hpp | 37 ++++++++++------ tests/src/unit/tests/test_metrics.cpp | 62 +++++++++++++-------------- 4 files changed, 55 insertions(+), 48 deletions(-) diff --git a/src/config.hpp b/src/config.hpp index b267bb64e..19ee0daca 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -75,7 +75,7 @@ class Config { , host_listener_(new DefaultHostListener()) , monitor_reporting_interval_secs_(CASS_DEFAULT_CLIENT_MONITOR_EVENTS_INTERVAL_SECS) , cluster_metadata_resolver_factory_(new DefaultClusterMetadataResolverFactory()) - , histogram_refresh_interval_(CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_CACHING) { + , histogram_refresh_interval_(CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_REFRESH) { profiles_.set_empty_key(String()); // Assign the defaults to the cluster profile diff --git a/src/constants.hpp b/src/constants.hpp index bc7691f8a..371c74602 100644 --- a/src/constants.hpp +++ b/src/constants.hpp @@ -142,7 +142,7 @@ #define CASS_DEFAULT_MAX_TRACING_DATA_WAIT_TIME_MS 15 #define CASS_DEFAULT_RETRY_TRACING_DATA_WAIT_TIME_MS 3 #define CASS_DEFAULT_TRACING_CONSISTENCY CASS_CONSISTENCY_ONE -#define CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_CACHING 0 +#define CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_REFRESH 0 // Request-level defaults #define CASS_DEFAULT_CONSISTENCY CASS_CONSISTENCY_LOCAL_ONE diff --git a/src/metrics.hpp b/src/metrics.hpp index b1178c54a..1dd297e4c 100644 --- a/src/metrics.hpp +++ b/src/metrics.hpp @@ -269,7 +269,7 @@ class Metrics : public Allocated { int64_t percentile_999th; }; - Histogram(ThreadState* thread_state, unsigned refresh_interval = CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_CACHING) + Histogram(ThreadState* thread_state, unsigned refresh_interval = CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_REFRESH) : thread_state_(thread_state) , histograms_(new PerThreadHistogram[thread_state->max_threads()]) { @@ -293,38 +293,47 @@ class Metrics : public Allocated { void get_snapshot(Snapshot* snapshot) const { ScopedMutex l(&mutex_); + // In the "no refresh" case (the default) fall back to the old behaviour; add per-thread + // timestamps to histogram_ (without any clearing of data) and return what's there. + if (refresh_interval_ == CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_REFRESH) { + + for (size_t i = 0; i < thread_state_->max_threads(); ++i) { + histograms_[i].add(histogram_); + } + + if (histogram_->total_count == 0) { + // There is no data; default to 0 for the stats. + copy_snapshot(Snapshot {}, snapshot); + } else { + copy_snapshot(build_new_snapshot(histogram_), snapshot); + } + return; + } + + // Refresh interval is in use. If we've exceeded the interval clear histogram_, + // compute a new aggregate histogram and build (and cache) a new snapshot. Otherwise + // just return the cached version. uint64_t now = get_time_since_epoch_ms(); if (now - refresh_timestamp_ >= refresh_interval_) { - // Reset the combined histogram back to a zero state hdr_reset(histogram_); - // Add individual per-thread histograms to the combined histogram for (size_t i = 0; i < thread_state_->max_threads(); ++i) { histograms_[i].add(histogram_); - - // TODO: Reset per-thread histograms as well - //histograms_[i].clear(); } cached_snapshot_ = build_new_snapshot(histogram_); refresh_timestamp_ = now; } - // Processing continues from here whether we updated the cached snapshot or not - if (histogram_->total_count == 0) { - // There is no data; default to 0 for the stats. - copy_snapshot(Snapshot {}, snapshot); - } else { - copy_snapshot(cached_snapshot_, snapshot); - } + copy_snapshot(cached_snapshot_, snapshot); } private: void copy_snapshot(Snapshot from, Snapshot* to) const { - to->max = from.max; to->min = from.min; + to->max = from.max; to->mean = from.mean; to->stddev = from.stddev; to->median = from.median; diff --git a/tests/src/unit/tests/test_metrics.cpp b/tests/src/unit/tests/test_metrics.cpp index ee7a336df..b62b5db63 100644 --- a/tests/src/unit/tests/test_metrics.cpp +++ b/tests/src/unit/tests/test_metrics.cpp @@ -144,56 +144,54 @@ TEST(MetricsUnitTest, HistogramEmpty) { EXPECT_EQ(snapshot.percentile_999th, 0); } -TEST(MetricsUnitTest, HistogramGetSnapshotResetsTheWorld) { +TEST(MetricsUnitTest, HistogramWithRefreshInterval) { + unsigned refresh_interval = 1000; Metrics::ThreadState thread_state(1); - Metrics::Histogram histogram(&thread_state); - - for (uint64_t i = 1; i <= 100; ++i) { - histogram.record_value(i); - } + Metrics::Histogram histogram(&thread_state, refresh_interval); - // First get_snapshot() operation should reset all internal - // counters Metrics::Histogram::Snapshot snapshot; - histogram.get_snapshot(&snapshot); - - histogram.record_value(200); + // Retrieval before the first interval runs will simply return zeros histogram.get_snapshot(&snapshot); - - EXPECT_EQ(snapshot.min, 200); - EXPECT_EQ(snapshot.max, 200); - EXPECT_EQ(snapshot.median, 200); - EXPECT_EQ(snapshot.percentile_75th, 200); - EXPECT_EQ(snapshot.percentile_95th, 200); - EXPECT_EQ(snapshot.percentile_98th, 200); - EXPECT_EQ(snapshot.percentile_99th, 200); - EXPECT_EQ(snapshot.percentile_999th, 200); - EXPECT_EQ(snapshot.mean, 200); + EXPECT_EQ(snapshot.min, 0); + EXPECT_EQ(snapshot.max, 0); + EXPECT_EQ(snapshot.median, 0); + EXPECT_EQ(snapshot.percentile_75th, 0); + EXPECT_EQ(snapshot.percentile_95th, 0); + EXPECT_EQ(snapshot.percentile_98th, 0); + EXPECT_EQ(snapshot.percentile_99th, 0); + EXPECT_EQ(snapshot.percentile_999th, 0); + EXPECT_EQ(snapshot.mean, 0); EXPECT_EQ(snapshot.stddev, 0); -} - -TEST(MetricsUnitTest, HistogramWithRefreshInterval) { - unsigned refresh_interval = 1000; - Metrics::ThreadState thread_state(1); - Metrics::Histogram histogram(&thread_state, refresh_interval); + // Values added during the first interval (or for that matter any + // interval) will be buffered in per-thread counters and will be + // included in the next generated snapshot for (uint64_t i = 1; i <= 100; ++i) { histogram.record_value(i); } + test::Utils::msleep(1.2 * refresh_interval); - //Metrics::Histogram::Snapshot snapshot1; - //histogram.get_snapshot(&snapshot1); + histogram.get_snapshot(&snapshot); + EXPECT_EQ(snapshot.min, 1); + EXPECT_EQ(snapshot.max, 100); + EXPECT_EQ(snapshot.median, 50); + EXPECT_EQ(snapshot.percentile_75th, 75); + EXPECT_EQ(snapshot.percentile_95th, 95); + EXPECT_EQ(snapshot.percentile_98th, 98); + EXPECT_EQ(snapshot.percentile_99th, 99); + EXPECT_EQ(snapshot.percentile_999th, 100); + EXPECT_EQ(snapshot.mean, 50); + EXPECT_EQ(snapshot.stddev, 28); + // Generated snapshot should only include values added within + // the current interval test::Utils::msleep(1.2 * refresh_interval); - for (uint64_t i = 101; i <= 200; ++i) { histogram.record_value(i); } - Metrics::Histogram::Snapshot snapshot; histogram.get_snapshot(&snapshot); - EXPECT_EQ(snapshot.min, 101); EXPECT_EQ(snapshot.max, 200); EXPECT_EQ(snapshot.median, 150); From 23992002ed1e75896a3e3bbd5a2233acfd228525 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 6 Oct 2023 17:28:27 -0500 Subject: [PATCH 06/11] Add docs for entry fn in cassandra.h --- include/cassandra.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/cassandra.h b/include/cassandra.h index a23a2da5d..570acb1be 100644 --- a/include/cassandra.h +++ b/include/cassandra.h @@ -2934,6 +2934,24 @@ CASS_EXPORT void cass_cluster_set_monitor_reporting_interval(CassCluster* cluster, unsigned interval_secs); +/** + * Sets the amount of time after which metric histograms should be refreshed. + * Upon refresh histograms are reset to zero, effectively dropping any history to + * that point. Refresh occurs when a snapshot is requested so ths value should + * be thought of as a minimum time to refresh. + * + * If refresh is not enabled the driver will continue to accumulate histogram + * data over the life of a session; this is the default behaviour and replicates + * the behaviour of previous versions. + * + * Note that the specified interval must be > 0 otherwise CASS_ERROR_LIB_BAD_PARAMS + * will be returned. + * + * @public @memberof CassCluster + * + * @param cluster + * @param refresh_interval Minimum interval (in milliseconds) for refresh interval + */ CASS_EXPORT CassError cass_cluster_set_histogram_refresh_interval(CassCluster* cluster, unsigned refresh_interval); From 9670aaa799e9f76de7e84512f928fd13236465d1 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Sat, 7 Oct 2023 01:08:28 -0500 Subject: [PATCH 07/11] Fix to get good compilation on CentOS 7 + some additional cleanup --- src/metrics.hpp | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/metrics.hpp b/src/metrics.hpp index 1dd297e4c..938a8bb2f 100644 --- a/src/metrics.hpp +++ b/src/metrics.hpp @@ -271,11 +271,12 @@ class Metrics : public Allocated { Histogram(ThreadState* thread_state, unsigned refresh_interval = CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_REFRESH) : thread_state_(thread_state) - , histograms_(new PerThreadHistogram[thread_state->max_threads()]) { + , histograms_(new PerThreadHistogram[thread_state->max_threads()]) + , zero_snapshot_(Snapshot {0,0,0,0,0,0,0,0,0,0}) { refresh_interval_ = refresh_interval; refresh_timestamp_ = get_time_since_epoch_ms(); - cached_snapshot_ = Snapshot {}; + cached_snapshot_ = zero_snapshot_; hdr_init(1LL, HIGHEST_TRACKABLE_VALUE, 3, &histogram_); uv_mutex_init(&mutex_); @@ -286,7 +287,7 @@ class Metrics : public Allocated { uv_mutex_destroy(&mutex_); } - void record_value(int64_t value) const { + void record_value(int64_t value) { histograms_[thread_state_->current_thread_id()].record_value(value); } @@ -303,7 +304,7 @@ class Metrics : public Allocated { if (histogram_->total_count == 0) { // There is no data; default to 0 for the stats. - copy_snapshot(Snapshot {}, snapshot); + copy_snapshot(zero_snapshot_, snapshot); } else { copy_snapshot(build_new_snapshot(histogram_), snapshot); } @@ -439,19 +440,6 @@ class Metrics : public Allocated { hdr_reset(from); } - // Same as add() above but without the actual addition. The contract here (and - // in the other methods of this class) is that whenever the phase is flipped the - // now-unused histogram has to be cleared. That provides the guarantee that whatever - // histogram we move to here will be starting out as empty. Without such a guarantee - // we'd have to explicitly clear the histogram that will be pointed to by active_index_ - // which we can't really do (in an atomic way) given the current concurrency regime. - void clear() { - int inactive_index = active_index_.exchange(!active_index_.load()); - hdr_histogram* from = histograms_[inactive_index]; - phaser_.flip_phase(); - hdr_reset(from); - } - private: hdr_histogram* histograms_[2]; mutable Atomic active_index_; @@ -466,6 +454,7 @@ class Metrics : public Allocated { unsigned refresh_interval_; mutable uint64_t refresh_timestamp_; mutable Snapshot cached_snapshot_; + const Snapshot zero_snapshot_; private: DISALLOW_COPY_AND_ASSIGN(Histogram); From 09148f25cf7a9dae16b4bf4174b8b3ed94e69beb Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Tue, 10 Oct 2023 12:44:46 -0500 Subject: [PATCH 08/11] Optimize build-then-send case per comments from Joao in code review --- src/metrics.hpp | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/metrics.hpp b/src/metrics.hpp index 938a8bb2f..72b008511 100644 --- a/src/metrics.hpp +++ b/src/metrics.hpp @@ -306,7 +306,7 @@ class Metrics : public Allocated { // There is no data; default to 0 for the stats. copy_snapshot(zero_snapshot_, snapshot); } else { - copy_snapshot(build_new_snapshot(histogram_), snapshot); + build_and_copy_snapshot(histogram_, snapshot); } return; } @@ -333,16 +333,16 @@ class Metrics : public Allocated { private: void copy_snapshot(Snapshot from, Snapshot* to) const { - to->min = from.min; - to->max = from.max; - to->mean = from.mean; - to->stddev = from.stddev; - to->median = from.median; - to->percentile_75th = from.percentile_75th; - to->percentile_95th = from.percentile_95th; - to->percentile_98th = from.percentile_98th; - to->percentile_99th = from.percentile_99th; - to->percentile_999th = from.percentile_999th; + to->min = from.min; + to->max = from.max; + to->mean = from.mean; + to->stddev = from.stddev; + to->median = from.median; + to->percentile_75th = from.percentile_75th; + to->percentile_95th = from.percentile_95th; + to->percentile_98th = from.percentile_98th; + to->percentile_99th = from.percentile_99th; + to->percentile_999th = from.percentile_999th; } Snapshot build_new_snapshot(hdr_histogram* h) const { @@ -360,6 +360,22 @@ class Metrics : public Allocated { }; } + // Optimized version of chained build_new_snapshot -> copy_snapshot calls + // to avoid creation of an unnecessary intermediate struct + void build_and_copy_snapshot(hdr_histogram* h, Snapshot* to) const { + to->min = hdr_min(h); + to->max = hdr_max(h); + to->mean = static_cast(hdr_mean(h)); + to->stddev = static_cast(hdr_stddev(h)); + to->median = hdr_value_at_percentile(h, 50.0); + to->percentile_75th = hdr_value_at_percentile(h, 75.0); + to->percentile_95th = hdr_value_at_percentile(h, 95.0); + to->percentile_98th = hdr_value_at_percentile(h, 98.0); + to->percentile_99th = hdr_value_at_percentile(h, 99.0); + to->percentile_999th = hdr_value_at_percentile(h, 99.9); + } + + class WriterReaderPhaser { public: WriterReaderPhaser() From f3c2f200429c2fce4af329aeb8a33fa50f93cf99 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 12 Oct 2023 05:14:53 -0500 Subject: [PATCH 09/11] Updated function name to avoid the suggestion that we're building anything --- src/metrics.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/metrics.hpp b/src/metrics.hpp index 72b008511..228cf460f 100644 --- a/src/metrics.hpp +++ b/src/metrics.hpp @@ -306,7 +306,7 @@ class Metrics : public Allocated { // There is no data; default to 0 for the stats. copy_snapshot(zero_snapshot_, snapshot); } else { - build_and_copy_snapshot(histogram_, snapshot); + histogram_to_snapshot(histogram_, snapshot); } return; } @@ -362,7 +362,7 @@ class Metrics : public Allocated { // Optimized version of chained build_new_snapshot -> copy_snapshot calls // to avoid creation of an unnecessary intermediate struct - void build_and_copy_snapshot(hdr_histogram* h, Snapshot* to) const { + void histogram_to_snapshot(hdr_histogram* h, Snapshot* to) const { to->min = hdr_min(h); to->max = hdr_max(h); to->mean = static_cast(hdr_mean(h)); From ba19f711ee4f34a9655cb45d60ae4f249b97c6a2 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Thu, 12 Oct 2023 06:25:12 -0500 Subject: [PATCH 10/11] Fixing the INT64_MAX issue discussed in code review --- src/metrics.hpp | 6 +++++- tests/src/unit/tests/test_metrics.cpp | 26 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/metrics.hpp b/src/metrics.hpp index 228cf460f..f07a9ba40 100644 --- a/src/metrics.hpp +++ b/src/metrics.hpp @@ -323,7 +323,11 @@ class Metrics : public Allocated { histograms_[i].add(histogram_); } - cached_snapshot_ = build_new_snapshot(histogram_); + if (histogram_->total_count == 0) { + copy_snapshot(zero_snapshot_, &cached_snapshot_); + } else { + cached_snapshot_ = build_new_snapshot(histogram_); + } refresh_timestamp_ = now; } diff --git a/tests/src/unit/tests/test_metrics.cpp b/tests/src/unit/tests/test_metrics.cpp index b62b5db63..28d3e5260 100644 --- a/tests/src/unit/tests/test_metrics.cpp +++ b/tests/src/unit/tests/test_metrics.cpp @@ -204,6 +204,32 @@ TEST(MetricsUnitTest, HistogramWithRefreshInterval) { EXPECT_EQ(snapshot.stddev, 28); } +// Variant of the case above. If we have no requests for the entirety +// of the refresh interval make sure the stats return zero +TEST(MetricsUnitTest, HistogramWithRefreshIntervalNoActivity) { + unsigned refresh_interval = 1000; + Metrics::ThreadState thread_state(1); + Metrics::Histogram histogram(&thread_state, refresh_interval); + + Metrics::Histogram::Snapshot snapshot; + + // Initial refresh interval (where we always return zero) + another interval of + // no activity + test::Utils::msleep(2.2 * refresh_interval); + + histogram.get_snapshot(&snapshot); + EXPECT_EQ(snapshot.min, 0); + EXPECT_EQ(snapshot.max, 0); + EXPECT_EQ(snapshot.median, 0); + EXPECT_EQ(snapshot.percentile_75th, 0); + EXPECT_EQ(snapshot.percentile_95th, 0); + EXPECT_EQ(snapshot.percentile_98th, 0); + EXPECT_EQ(snapshot.percentile_99th, 0); + EXPECT_EQ(snapshot.percentile_999th, 0); + EXPECT_EQ(snapshot.mean, 0); + EXPECT_EQ(snapshot.stddev, 0); +} + TEST(MetricsUnitTest, HistogramWithThreads) { HistogramThreadArgs args[NUM_THREADS]; From 528cbcd07b4b0ff1c7589ade1db8338908f6d9db Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 18 Oct 2023 16:31:52 -0500 Subject: [PATCH 11/11] Incorporate feedback on additional code simplification from code review --- src/metrics.hpp | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/src/metrics.hpp b/src/metrics.hpp index f07a9ba40..6cf028f7b 100644 --- a/src/metrics.hpp +++ b/src/metrics.hpp @@ -326,7 +326,7 @@ class Metrics : public Allocated { if (histogram_->total_count == 0) { copy_snapshot(zero_snapshot_, &cached_snapshot_); } else { - cached_snapshot_ = build_new_snapshot(histogram_); + histogram_to_snapshot(histogram_, &cached_snapshot_); } refresh_timestamp_ = now; } @@ -349,23 +349,6 @@ class Metrics : public Allocated { to->percentile_999th = from.percentile_999th; } - Snapshot build_new_snapshot(hdr_histogram* h) const { - return Snapshot { - hdr_min(h), - hdr_max(h), - static_cast(hdr_mean(h)), - static_cast(hdr_stddev(h)), - hdr_value_at_percentile(h, 50.0), - hdr_value_at_percentile(h, 75.0), - hdr_value_at_percentile(h, 95.0), - hdr_value_at_percentile(h, 98.0), - hdr_value_at_percentile(h, 99.0), - hdr_value_at_percentile(h, 99.9) - }; - } - - // Optimized version of chained build_new_snapshot -> copy_snapshot calls - // to avoid creation of an unnecessary intermediate struct void histogram_to_snapshot(hdr_histogram* h, Snapshot* to) const { to->min = hdr_min(h); to->max = hdr_max(h);