Monitoring enhancements for mixgraph #9672

mdcallag · 2022-03-07T22:45:26Z

Today the db_bench summary line for mixgraph has details on the number of gets done and gets that return a key. It would help to have the same for seeks. The current output is below:

mixgraph     :      52.606 micros/op 19009 ops/sec;    1.3 MB/s ( Gets:56797338 Puts:9585190 Seek:2051471 of 31235719 in 58848809 found)

This part of the output is only for Get() operations:

31235719 in 58848809 found

This is an ugly proof-of-concept change to get that output:

@@ -6307,12 +6325,14 @@ class Benchmark {
     char msg[256];
     snprintf(msg, sizeof(msg),
              "( Gets:%" PRIu64 " Puts:%" PRIu64 " Seek:%" PRIu64 " of %" PRIu64
-             " in %" PRIu64 " found)\n",
-             gets, puts, seek, found, read);
+             " in %" PRIu64 " get_found and %" PRIu64 " in %" PRIu64
+            " seek_found %.1f avg_seek_length)\n",
+             gets, puts, seek, found, gets, seek_found, seek,
+            (seek_length * 1.0) / seek);

     thread->stats.AddBytes(bytes);
     thread->stats.AddMessage(msg);

Another piece of information that I want is the average lengths for the following, because both are likely to use a complex distribution today:

of a scan (how many calls to Next)
of a key

And this ugly proof-of-concept adds both:

diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc
index 9aebf7dd7..fa6ee06aa 100644
--- a/tools/db_bench_tool.cc
+++ b/tools/db_bench_tool.cc
@@ -6127,6 +6127,7 @@ class Benchmark {
     int64_t found = 0;
     int64_t seek = 0;
     int64_t seek_found = 0;
+    int64_t seek_length = 0;
     int64_t bytes = 0;
     const int64_t default_value_max = 1 * 1024 * 1024;
     int64_t value_max = default_value_max;
@@ -6139,6 +6140,11 @@ class Benchmark {
     std::vector<double> ratio{FLAGS_mix_get_ratio, FLAGS_mix_put_ratio,
                               FLAGS_mix_seek_ratio};
     char value_buffer[default_value_max];
+    double total_val_size = 0;
+    int64_t total_vals = 0;
+
+    fprintf(stdout, "ZZ ratios are: %f %f %f\n", FLAGS_mix_get_ratio, FLAGS_mix_put_ratio, FLAGS_mix_seek_ratio);
+
     QueryDecider query;
     RandomGenerator gen;
     Status s;
@@ -6160,6 +6166,14 @@ class Benchmark {
           NewGenericRateLimiter(static_cast<int64_t>(write_rate)));
     }

+    if (use_random_modeling) {
+      fprintf(stdout, "ZZ use_random_modeling\n");
+    } else if (use_prefix_modeling) {
+      fprintf(stdout, "ZZ use_prefix_modeling\n");
+    } else {
+      fprintf(stdout, "ZZ cdf seed modeling\n");
+    }
+
     // Decide if user wants to use prefix based key generation
     if (FLAGS_keyrange_dist_a != 0.0 || FLAGS_keyrange_dist_b != 0.0 ||
         FLAGS_keyrange_dist_c != 0.0 || FLAGS_keyrange_dist_d != 0.0) {
@@ -6261,6 +6275,9 @@ class Benchmark {
         } else if (val_size > value_max) {
           val_size = val_size % value_max;
         }
+       total_val_size += val_size;
+       total_vals++;
+
         s = db_with_cfh->db->Put(
             write_options_, key,
             gen.Generate(static_cast<unsigned int>(val_size)));
@@ -6290,6 +6307,7 @@ class Benchmark {
                 ParetoCdfInversion(u, FLAGS_iter_theta, FLAGS_iter_k,
                                    FLAGS_iter_sigma) %
                 scan_len_max;
+           seek_length += scan_length;
             for (int64_t j = 0; j < scan_length && single_iter->Valid(); j++) {
               Slice value = single_iter->value();
               memcpy(value_buffer, value.data(),
@@ -6307,12 +6325,14 @@ class Benchmark {
     char msg[256];
     snprintf(msg, sizeof(msg),
              "( Gets:%" PRIu64 " Puts:%" PRIu64 " Seek:%" PRIu64 " of %" PRIu64
-             " in %" PRIu64 " found)\n",
-             gets, puts, seek, found, read);
+             " in %" PRIu64 " get_found and %" PRIu64 " in %" PRIu64
+            " seek_found %.1f avg_seek_length)\n",
+             gets, puts, seek, found, gets, seek_found, seek,
+            (seek_length * 1.0) / seek);

     thread->stats.AddBytes(bytes);
     thread->stats.AddMessage(msg);
-
+    fprintf(stderr, "ZZ total_vals %ld total_val_size(MB) %f avg_val_size %.1f\n", total_vals, total_val_size / (1024*1024), total_val_size / total_vals);
     if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
       thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
                                get_perf_context()->ToString());

The text was updated successfully, but these errors were encountered:

Summary: Changes: * improves monitoring by displaying average size of a Put value and average scan length * forces the minimum value size to be 10. Before this it was 0 if you didn't set the distribution parameters. * uses reasonable defaults for the distribution parameters that determine value size and scan length * includes seeks in "reads ... found" message, before this they were missing This is for #9672 Pull Request resolved: #9711 Test Plan: Before this change: ./db_bench --benchmarks=fillseq,mixgraph --mix_get_ratio=50 --mix_put_ratio=25 --mix_seek_ratio=25 --num=100000 --value_k=0.2615 --value_sigma=25.45 --iter_k=2.517 --iter_sigma=14.236 fillseq : 4.289 micros/op 233138 ops/sec; 25.8 MB/s mixgraph : 18.461 micros/op 54166 ops/sec; 755.0 MB/s ( Gets:50164 Puts:24919 Seek:24917 of 50164 in 75081 found) After this change: ./db_bench --benchmarks=fillseq,mixgraph --mix_get_ratio=50 --mix_put_ratio=25 --mix_seek_ratio=25 --num=100000 --value_k=0.2615 --value_sigma=25.45 --iter_k=2.517 --iter_sigma=14.236 fillseq : 3.974 micros/op 251553 ops/sec; 27.8 MB/s mixgraph : 16.722 micros/op 59795 ops/sec; 833.5 MB/s ( Gets:50164 Puts:24919 Seek:24917, reads 75081 in 75081 found, avg size: 36.0 value, 504.9 scan) Reviewed By: jay-zhuang Differential Revision: D35030190 Pulled By: mdcallag fbshipit-source-id: d8f555f28d869f752ddb674a524108884511b151

mdcallag added the enhancement label Mar 7, 2022

mdcallag self-assigned this Mar 7, 2022

mdcallag mentioned this issue Mar 17, 2022

Make mixgraph easier to use #9711

Closed

mdcallag closed this as completed Mar 22, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Monitoring enhancements for mixgraph #9672

Monitoring enhancements for mixgraph #9672

mdcallag commented Mar 7, 2022

Monitoring enhancements for mixgraph #9672

Monitoring enhancements for mixgraph #9672

Comments

mdcallag commented Mar 7, 2022