Skip to content

Commit

Permalink
memory benchmark: support auto-detect CPU L3 cache
Browse files Browse the repository at this point in the history
During writing to a small memory block, the CPU just writes to L3
cache. So use a memory block size larger than L3 cache size should be
better. Allow to specify --memory-block-size=0, then sysbench
auto-detect CPU L3 cache size and alignup to power of 2 to do test
work.

For example:
Orignally, run this command on my PC and got a result 47634.81 MiB/sec
 # sysbench memory --memory-scope=local --threads=12 run

In face, the real performance is about 15G/s. The test result gets
about 300% deviation.

A test case should be added in test_memory.t, but the github CI failed.
I tried to reproduce on an ARM server, but this still work well. Maybe
this test case could be added in the future.

  $ sysbench memory --memory-scope=local --memory-oper=write --memory-total-size=1G --memory-block-size=0 --events=1 --time=0 --threads=2 run
  sysbench *.* * (glob)

  Running the test with following options:
  Number of threads: 2
  Initializing random number generator from current time

  Running memory speed test with the following options:
    block size: * (glob)
    total size: 1024MiB
    operation: write
    scope: local

  Initializing worker threads...

  Threads started!

  Total operations: * (* per second) (glob)

  1024.00 MiB transferred (* MiB/sec) (glob)

  Throughput:
      events/s (eps): * (glob)
      time elapsed:                        *s (glob)
      total number of events:              * (glob)

  Latency (ms):
           min:                              *.* (glob)
           avg:                              *.* (glob)
           max:                              *.* (glob)
           95th percentile:         *.* (glob)
           sum: *.* (glob)

  Threads fairness:
      events (avg/stddev):           */* (glob)
      execution time (avg/stddev):   */* (glob)

  $ sysbench $args cleanup
  sysbench *.* * (glob)

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
  • Loading branch information
pizhenwei committed Jun 10, 2021
1 parent ead2689 commit fdd710a
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 21 deletions.
72 changes: 66 additions & 6 deletions src/tests/memory/sb_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,28 @@
# include <sys/shm.h>
#endif

#ifdef HAVE_UNISTD_H
# include <unistd.h>
# include <sys/types.h>
#endif

#ifdef HAVE_SYS_STAT_H
# include <sys/stat.h>
#endif

#ifdef HAVE_FCNTL_H
# include <fcntl.h>
#endif

#include <inttypes.h>

#define LARGE_PAGE_SIZE (4UL * 1024 * 1024)

/* Memory test arguments */
static sb_arg_t memory_args[] =
{
SB_OPT("memory-block-size", "size of memory block for test", "1K", SIZE),
/* A typical size of a morden CPU, Ex Intel(R) Xeon(R) Platinum 8260 has 36608K */
SB_OPT("memory-block-size", "size of memory block for test. If 0, auto-detect CPU L3 cache and apply", "65536K", SIZE),
SB_OPT("memory-total-size", "total size of data to transfer", "100G", SIZE),
SB_OPT("memory-scope", "memory access scope {global,local}", "global",
STRING),
Expand Down Expand Up @@ -107,6 +121,35 @@ int register_test_memory(sb_list_t *tests)
return 0;
}

static size_t memory_detect_l3_size(void)
{
int file;
char *l3cache_path = "/sys/devices/system/cpu/cpu0/cache/index3/size";
char buf[16] = {0};
size_t buflen;
size_t sizekb;
size_t alignkb = 1;

file = open(l3cache_path, O_RDONLY, 0);
if (file < 0)
return -1;

if (read(file, buf, sizeof(buf)) > 0) {
buflen = strlen(buf);
/* try to strip last '\n' */
if (buf[buflen - 1] == '\n')
buf[buflen - 1] = '\0';

/* to make sure memory block size is larger than L3 cache size */
sizekb = atol(buf);
while (alignkb < sizekb)
alignkb <<= 1;
}

close(file);

return alignkb * 1024;
}

int memory_init(void)
{
Expand All @@ -115,12 +158,29 @@ int memory_init(void)
size_t *buffer;

memory_block_size = sb_get_value_size("memory-block-size");
if (memory_block_size < SIZEOF_SIZE_T ||
/* Must be a power of 2 */
(memory_block_size & (memory_block_size - 1)) != 0)
if (memory_block_size && (memory_block_size < SIZEOF_SIZE_T))
{
log_text(LOG_FATAL, "Invalid value for memory-block-size: %s, "
"should not less than %d, or specify 0 to auto-detect CPU L3 cache size",
sb_get_value_string("memory-block-size"), SIZEOF_SIZE_T);
return 1;
}

if (!memory_block_size)
{
/* auto detect L3 cache size */
memory_block_size = memory_detect_l3_size();
if (memory_block_size < 0)
{
log_text(LOG_FATAL, "Auto-detect memory-block-size failed");
return 1;
}
}

/* Must be a power of 2 */
if ((memory_block_size & (memory_block_size - 1)) != 0)
{
log_text(LOG_FATAL, "Invalid value for memory-block-size: %s",
sb_get_value_string("memory-block-size"));
log_text(LOG_FATAL, "Invalid value for memory-block-size: %ld, should be a power of 2", memory_block_size);
return 1;
}

Expand Down
18 changes: 3 additions & 15 deletions tests/t/test_memory.t
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ help' only on Linux.
sysbench * (glob)

memory options:
--memory-block-size=SIZE size of memory block for test [1K]
--memory-block-size=SIZE size of memory block for test. If 0, auto-detect CPU L3 cache and apply [65536K]
--memory-total-size=SIZE total size of data to transfer [100G]
--memory-scope=STRING memory access scope {global,local} [global]
--memory-oper=STRING type of memory operations {read, write, none} [write]
Expand All @@ -31,28 +31,16 @@ help' only on Linux.
'memory' test does not implement the 'prepare' command.
[1]

$ sysbench $args --memory-block-size=-1 run
sysbench * (glob)

FATAL: Invalid value for memory-block-size: -1
[1]

$ sysbench $args --memory-block-size=0 run
sysbench * (glob)

FATAL: Invalid value for memory-block-size: 0
[1]

$ sysbench $args --memory-block-size=3 run
sysbench * (glob)

FATAL: Invalid value for memory-block-size: 3
FATAL: Invalid value for memory-block-size: 3, should not less than 8, or specify 0 to auto-detect CPU L3 cache size
[1]

$ sysbench $args --memory-block-size=9 run
sysbench * (glob)

FATAL: Invalid value for memory-block-size: 9
FATAL: Invalid value for memory-block-size: 9, should be a power of 2
[1]

########################################################################
Expand Down

0 comments on commit fdd710a

Please sign in to comment.