Skip to content

Commit

Permalink
os/bluestore: introduce new io_uring IO engine
Browse files Browse the repository at this point in the history
This implements low-level IO engine, which utilizes brand-new
io_uring IO interface: https://lwn.net/Articles/776428/

The following basic config was used for fio_ceph_objectstore:

  rw=randwrite
  iodepth=16
  nr_files=1
  numjobs=1
  size=256m

  bluestore_min_alloc_size = 4096
  bluestore_max_blob_size  = 65536

  bluestore_block_path     = /dev/ram0
  bluestore_block_db_path  = /dev/ram1
  bluestore_block_wal_path = /dev/ram2

bluestore_iouring=false

   4k  IOPS=25.5k, BW=99.8MiB/s, Lat=0.374ms
   8k  IOPS=21.5k, BW=168MiB/s,  Lat=0.441ms
  16k  IOPS=17.2k, BW=268MiB/s,  Lat=0.544ms
  32k  IOPS=12.3k, BW=383MiB/s,  Lat=0.753ms
  64k  IOPS=8358,  BW=522MiB/s,  Lat=1.083ms
 128k  IOPS=4724,  BW=591MiB/s,  Lat=1.906ms

bluestore_iouring=true

   4k  IOPS=29.2k, BW=114MiB/s,  Lat=0.331ms
   8k  IOPS=30.7k, BW=240MiB/s,  Lat=0.319ms
  16k  IOPS=27.4k, BW=428MiB/s,  Lat=0.368ms
  32k  IOPS=22.7k, BW=709MiB/s,  Lat=0.475ms
  64k  IOPS=15.6k, BW=978MiB/s,  Lat=0.754ms
 128k  IOPS=9572,  BW=1197MiB/s, Lat=1.223ms

Overall IOPS increase is the following:

   4k  +14%
   8k  +42%
  16k  +59%
  32k  +89%
  64k  +85%
 128k  +102%

By default libaio is used.  If bluestore_ioring=true is set but kernel
does not support io_uring or architecture is not x86-64, libaio will be
used instead.

Signed-off-by: Roman Penyaev <rpenyaev@suse.de>
  • Loading branch information
rouming committed May 8, 2019
1 parent 35c68bb commit 43d028d
Show file tree
Hide file tree
Showing 7 changed files with 629 additions and 2 deletions.
3 changes: 3 additions & 0 deletions src/common/options.cc
Expand Up @@ -4787,6 +4787,9 @@ std::vector<Option> get_global_options() {
.set_description("Enforces specific hw profile settings")
.set_long_description("'hdd' enforces settings intended for BlueStore above a rotational drive. 'ssd' enforces settings intended for BlueStore above a solid drive. 'default' - using settings for the actual hardware."),

Option("bluestore_ioring", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(false)
.set_description("Enables Linux io_uring API instead of libaio"),

// -----------------------------------------
// kstore
Expand Down
3 changes: 2 additions & 1 deletion src/os/CMakeLists.txt
Expand Up @@ -38,7 +38,8 @@ endif(WITH_BLUESTORE)
if(HAVE_LIBAIO OR HAVE_POSIXAIO)
list(APPEND libos_srcs
bluestore/KernelDevice.cc
bluestore/aio.cc)
bluestore/aio.cc
bluestore/io_uring.cc)
endif()

if(WITH_FUSE)
Expand Down
14 changes: 13 additions & 1 deletion src/os/bluestore/KernelDevice.cc
Expand Up @@ -54,8 +54,20 @@ KernelDevice::KernelDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, ai
fd_directs.resize(WRITE_LIFE_MAX, -1);
fd_buffereds.resize(WRITE_LIFE_MAX, -1);

bool use_ioring = g_ceph_context->_conf.get_val<bool>("bluestore_ioring");
unsigned int iodepth = cct->_conf->bdev_aio_max_queue_depth;
io_queue = std::unique_ptr<io_queue_t>(new aio_queue_t(iodepth));

if (use_ioring && ioring_queue_t::supported()) {
io_queue = std::unique_ptr<io_queue_t>(new ioring_queue_t(iodepth));
} else {
static bool once;
if (use_ioring && !once) {
derr << "WARNING: io_uring API is not supported! Fallback to libaio!"
<< dendl;
once = true;
}
io_queue = std::unique_ptr<io_queue_t>(new aio_queue_t(iodepth));
}
}

int KernelDevice::_lock()
Expand Down
1 change: 1 addition & 0 deletions src/os/bluestore/KernelDevice.h
Expand Up @@ -23,6 +23,7 @@
#include "include/utime.h"

#include "ceph_aio.h"
#include "ceph_io_uring.h"
#include "BlockDevice.h"

#ifndef RW_IO_MAX
Expand Down
74 changes: 74 additions & 0 deletions src/os/bluestore/ceph_io_uring.h
@@ -0,0 +1,74 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab

#pragma once

#include "acconfig.h"

#include "include/types.h"
#include "ceph_aio.h"

struct io_sq_ring {
unsigned *head;
unsigned *tail;
unsigned *ring_mask;
unsigned *ring_entries;
unsigned *flags;
unsigned *array;
};

struct io_cq_ring {
unsigned *head;
unsigned *tail;
unsigned *ring_mask;
unsigned *ring_entries;
struct io_uring_cqe *cqes;
};

struct ioring_mmap {
void *ptr;
size_t len;
};

struct ioring_data {
int ring_fd;

struct io_sq_ring sq_ring;
struct io_uring_sqe *sqes;
unsigned sq_ring_mask;

struct io_cq_ring cq_ring;
unsigned cq_ring_mask;

unsigned queued;
int cq_ring_off;
unsigned iodepth;

uint64_t cachehit;
uint64_t cachemiss;

struct ioring_mmap mmap[3];

std::map<int, int> fixed_fds_map;
};

struct ioring_queue_t : public io_queue_t {
struct ioring_data _ioring;

typedef list<aio_t>::iterator aio_iter;

// Returns true if arch is x86-64 and kernel supports io_uring
static bool supported();

explicit ioring_queue_t(unsigned iodepth) :
_ioring() {
_ioring.iodepth = iodepth;
}

int init(std::vector<int> &fds) override;
void shutdown() override;

int submit_batch(aio_iter begin, aio_iter end, uint16_t aios_size,
void *priv, int *retries) override;
int get_next_completed(int timeout_ms, aio_t **paio, int max) override;
};

0 comments on commit 43d028d

Please sign in to comment.