memory benchmark: support auto-detect CPU L3 cache

During writing to a small memory block, the CPU just writes to L3 cache. So use a memory block size larger than L3 cache size should be better. Allow to specify --memory-block-size=0, then sysbench auto-detect CPU L3 cache size and alignup to power of 2 to do test work. For example: Orignally, run this command on my PC and got a result 47634.81 MiB/sec # sysbench memory --memory-scope=local --threads=12 run In face, the real performance is about 15G/s. The test result gets about 300% deviation. A test case should be added in test_memory.t, but the github CI failed. I tried to reproduce on an ARM server, but this still work well. Maybe this test case could be added in the future. $ sysbench memory --memory-scope=local --memory-oper=write --memory-total-size=1G --memory-block-size=0 --events=1 --time=0 --threads=2 run sysbench *.* * (glob) Running the test with following options: Number of threads: 2 Initializing random number generator from current time Running memory speed test with the following options: block size: * (glob) total size: 1024MiB operation: write scope: local Initializing worker threads... Threads started! Total operations: * (* per second) (glob) 1024.00 MiB transferred (* MiB/sec) (glob) Throughput: events/s (eps): * (glob) time elapsed: *s (glob) total number of events: * (glob) Latency (ms): min: *.* (glob) avg: *.* (glob) max: *.* (glob) 95th percentile: *.* (glob) sum: *.* (glob) Threads fairness: events (avg/stddev): */* (glob) execution time (avg/stddev): */* (glob) $ sysbench $args cleanup sysbench *.* * (glob) Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
akopytov · Jun 10, 2021 · fdd710a · fdd710a
1 parent ead2689
commit fdd710a
Show file tree

Hide file tree

Showing 2 changed files with 69 additions and 21 deletions.
diff --git a/src/tests/memory/sb_memory.c b/src/tests/memory/sb_memory.c
@@ -31,14 +31,28 @@
 # include <sys/shm.h>
 #endif
 
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+# include <sys/types.h>
+#endif
+
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+
+#ifdef HAVE_FCNTL_H
+# include <fcntl.h>
+#endif
+
 #include <inttypes.h>
 
 #define LARGE_PAGE_SIZE (4UL * 1024 * 1024)
 
 /* Memory test arguments */
 static sb_arg_t memory_args[] =
 {
-  SB_OPT("memory-block-size", "size of memory block for test", "1K", SIZE),
+  /* A typical size of a morden CPU, Ex Intel(R) Xeon(R) Platinum 8260 has 36608K */
+  SB_OPT("memory-block-size", "size of memory block for test. If 0, auto-detect CPU L3 cache and apply", "65536K", SIZE),
   SB_OPT("memory-total-size", "total size of data to transfer", "100G", SIZE),
   SB_OPT("memory-scope", "memory access scope {global,local}", "global",
          STRING),
@@ -107,6 +121,35 @@ int register_test_memory(sb_list_t *tests)
   return 0;
 }
 
+static size_t memory_detect_l3_size(void)
+{
+  int file;
+  char *l3cache_path = "/sys/devices/system/cpu/cpu0/cache/index3/size";
+  char buf[16] = {0};
+  size_t buflen;
+  size_t sizekb;
+  size_t alignkb = 1;
+
+  file = open(l3cache_path, O_RDONLY, 0);
+  if (file < 0)
+    return -1;
+
+  if (read(file, buf, sizeof(buf)) > 0) {
+    buflen = strlen(buf);
+    /* try to strip last '\n' */
+    if (buf[buflen - 1] == '\n')
+      buf[buflen - 1] = '\0';
+
+    /* to make sure memory block size is larger than L3 cache size */
+    sizekb = atol(buf);
+    while (alignkb < sizekb)
+      alignkb <<= 1;
+  }
+
+  close(file);
+
+  return alignkb * 1024;
+}
 
 int memory_init(void)
 {
@@ -115,12 +158,29 @@ int memory_init(void)
   size_t       *buffer;
 
   memory_block_size = sb_get_value_size("memory-block-size");
-  if (memory_block_size < SIZEOF_SIZE_T ||
-      /* Must be a power of 2 */
-      (memory_block_size & (memory_block_size - 1)) != 0)
+  if (memory_block_size && (memory_block_size < SIZEOF_SIZE_T))
+  {
+    log_text(LOG_FATAL, "Invalid value for memory-block-size: %s, "
+             "should not less than %d, or specify 0 to auto-detect CPU L3 cache size",
+             sb_get_value_string("memory-block-size"), SIZEOF_SIZE_T);
+    return 1;
+  }
+
+  if (!memory_block_size)
+  {
+    /* auto detect L3 cache size */
+    memory_block_size = memory_detect_l3_size();
+    if (memory_block_size < 0)
+    {
+      log_text(LOG_FATAL, "Auto-detect memory-block-size failed");
+      return 1;
+    }
+  }
+
+  /* Must be a power of 2 */
+  if ((memory_block_size & (memory_block_size - 1)) != 0)
   {
-    log_text(LOG_FATAL, "Invalid value for memory-block-size: %s",
-             sb_get_value_string("memory-block-size"));
+    log_text(LOG_FATAL, "Invalid value for memory-block-size: %ld, should be a power of 2", memory_block_size);
     return 1;
   }
 

diff --git a/tests/t/test_memory.t b/tests/t/test_memory.t
@@ -19,7 +19,7 @@ help' only on Linux.
   sysbench * (glob)
 
   memory options:
-    --memory-block-size=SIZE    size of memory block for test [1K]
+    --memory-block-size=SIZE    size of memory block for test. If 0, auto-detect CPU L3 cache and apply [65536K]
     --memory-total-size=SIZE    total size of data to transfer [100G]
     --memory-scope=STRING       memory access scope {global,local} [global]
     --memory-oper=STRING        type of memory operations {read, write, none} [write]
@@ -31,28 +31,16 @@ help' only on Linux.
   'memory' test does not implement the 'prepare' command.
   [1]
 
-  $ sysbench $args --memory-block-size=-1 run
-  sysbench * (glob)
-
-  FATAL: Invalid value for memory-block-size: -1
-  [1]
-
-  $ sysbench $args --memory-block-size=0 run
-  sysbench * (glob)
-
-  FATAL: Invalid value for memory-block-size: 0
-  [1]
-
   $ sysbench $args --memory-block-size=3 run
   sysbench * (glob)
 
-  FATAL: Invalid value for memory-block-size: 3
+  FATAL: Invalid value for memory-block-size: 3, should not less than 8, or specify 0 to auto-detect CPU L3 cache size
   [1]
 
   $ sysbench $args --memory-block-size=9 run
   sysbench * (glob)
 
-  FATAL: Invalid value for memory-block-size: 9
+  FATAL: Invalid value for memory-block-size: 9, should be a power of 2
   [1]
 
 ########################################################################