Skip to content

Commit

Permalink
block: ublk: enable zoned storage support
Browse files Browse the repository at this point in the history
Add zoned storage support to ublk: report_zones and operations:
 - REQ_OP_ZONE_OPEN
 - REQ_OP_ZONE_CLOSE
 - REQ_OP_ZONE_FINISH
 - REQ_OP_ZONE_RESET

This allows implementation of zoned storage devices in user space. An
example user space implementation based on ubdsrv is available [1].

[1] metaspace/ubdsrv@2c60b9f

Signed-off-by: Andreas Hindborg <a.hindborg@samsung.com>
  • Loading branch information
metaspace authored and intel-lab-lkp committed Mar 16, 2023
1 parent eeac8ed commit 723d5c2
Show file tree
Hide file tree
Showing 6 changed files with 308 additions and 58 deletions.
4 changes: 4 additions & 0 deletions drivers/block/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,10 @@ config BLK_DEV_UBLK
can handle batch more effectively, but task_work_add() isn't exported
for module, so ublk has to be built to kernel.

config BLK_DEV_UBLK_ZONED
def_bool y
depends on BLK_DEV_UBLK && BLK_DEV_ZONED

source "drivers/block/rnbd/Kconfig"

endif # BLK_DEV
1 change: 1 addition & 0 deletions drivers/block/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,6 @@ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/

obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o
obj-$(CONFIG_BLK_DEV_UBLK_ZONED) += ublk_drv-zoned.o

swim_mod-y := swim.o swim_asm.o
142 changes: 142 additions & 0 deletions drivers/block/ublk_drv-zoned.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2023 Andreas Hindborg <a.hindborg@samsung.com>
*/
#include <linux/blkzoned.h>
#include <linux/ublk_cmd.h>
#include "ublk_drv.h"

void ublk_set_nr_zones(struct ublk_device *ub)
{
const struct ublk_param_basic *p = &ub->params.basic;

if (ub->dev_info.flags & UBLK_F_ZONED && p->chunk_sectors)
ub->ub_disk->nr_zones = p->dev_sectors / p->chunk_sectors;
}

void ublk_dev_param_zoned_apply(struct ublk_device *ub)
{
const struct ublk_param_zoned *p = &ub->params.zoned;

if (ub->dev_info.flags & UBLK_F_ZONED) {
disk_set_max_active_zones(ub->ub_disk, p->max_active_zones);
disk_set_max_open_zones(ub->ub_disk, p->max_open_zones);
}
}

int ublk_revalidate_disk_zones(struct gendisk *disk)
{
return blk_revalidate_disk_zones(disk, NULL);
}

// Based on virtblk_alloc_report_buffer
static void *ublk_alloc_report_buffer(struct ublk_device *ublk,
unsigned int nr_zones,
unsigned int zone_sectors, size_t *buflen)
{
struct request_queue *q = ublk->ub_disk->queue;
size_t bufsize;
void *buf;

nr_zones = min_t(unsigned int, nr_zones,
get_capacity(ublk->ub_disk) >> ilog2(zone_sectors));

bufsize = nr_zones * sizeof(struct blk_zone);
bufsize =
min_t(size_t, bufsize, queue_max_hw_sectors(q) << SECTOR_SHIFT);
bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);

while (bufsize >= sizeof(struct blk_zone)) {
buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
if (buf) {
*buflen = bufsize;
return buf;
}
bufsize >>= 1;
}

bufsize = 0;
return NULL;
}

int ublk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
{
unsigned int done_zones = 0;
struct ublk_device *ub = disk->private_data;
unsigned int zone_size_sectors = disk->queue->limits.chunk_sectors;
unsigned int first_zone = sector >> ilog2(zone_size_sectors);
struct blk_zone *buffer;
size_t buffer_length;
unsigned int max_zones_per_request;

if (!(ub->dev_info.flags & UBLK_F_ZONED))
return -EOPNOTSUPP;

nr_zones = min_t(unsigned int, ub->ub_disk->nr_zones - first_zone,
nr_zones);

buffer = ublk_alloc_report_buffer(ub, nr_zones, zone_size_sectors,
&buffer_length);
if (!buffer)
return -ENOMEM;

max_zones_per_request = buffer_length / sizeof(struct blk_zone);

while (done_zones < nr_zones) {
unsigned int remaining_zones = nr_zones - done_zones;
unsigned int zones_in_request = min_t(
unsigned int, remaining_zones, max_zones_per_request);
int err = 0;
struct request *req;
struct ublk_rq_data *pdu;
blk_status_t status;

memset(buffer, 0, buffer_length);

req = blk_mq_alloc_request(disk->queue, REQ_OP_DRV_IN, 0);
if (IS_ERR(req))
return PTR_ERR(req);

pdu = blk_mq_rq_to_pdu(req);
pdu->operation = UBLK_IO_OP_REPORT_ZONES;
pdu->sector = sector;
pdu->nr_sectors = remaining_zones * zone_size_sectors;

err = blk_rq_map_kern(disk->queue, req, buffer, buffer_length,
GFP_KERNEL);
if (err) {
blk_mq_free_request(req);
kvfree(buffer);
return err;
}

status = blk_execute_rq(req, 0);
err = blk_status_to_errno(status);
blk_mq_free_request(req);
if (err) {
kvfree(buffer);
return err;
}

for (unsigned int i = 0; i < zones_in_request; i++) {
struct blk_zone *zone = buffer + i;

err = cb(zone, i, data);
if (err)
return err;

done_zones++;
sector += zone_size_sectors;

/* A zero length zone means don't ask for more zones */
if (!zone->len) {
kvfree(buffer);
return done_zones;
}
}
}

kvfree(buffer);
return done_zones;
}
113 changes: 61 additions & 52 deletions drivers/block/ublk_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <linux/task_work.h>
#include <linux/namei.h>
#include <uapi/linux/ublk_cmd.h>
#include "ublk_drv.h"

#define UBLK_MINORS (1U << MINORBITS)

Expand All @@ -53,16 +54,13 @@
| UBLK_F_NEED_GET_DATA \
| UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
| UBLK_F_UNPRIVILEGED_DEV)
| UBLK_F_UNPRIVILEGED_DEV \
| UBLK_F_ZONED)

/* All UBLK_PARAM_TYPE_* should be included here */
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
UBLK_PARAM_TYPE_DISCARD | UBLK_PARAM_TYPE_DEVT)

struct ublk_rq_data {
struct llist_node node;
struct callback_head work;
};
#define UBLK_PARAM_TYPE_ALL \
(UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED)

struct ublk_uring_cmd_pdu {
struct ublk_queue *ubq;
Expand Down Expand Up @@ -135,45 +133,6 @@ struct ublk_queue {

#define UBLK_DAEMON_MONITOR_PERIOD (5 * HZ)

struct ublk_device {
struct gendisk *ub_disk;

char *__queues;

unsigned int queue_size;
struct ublksrv_ctrl_dev_info dev_info;

struct blk_mq_tag_set tag_set;

struct cdev cdev;
struct device cdev_dev;

#define UB_STATE_OPEN 0
#define UB_STATE_USED 1
#define UB_STATE_DELETED 2
unsigned long state;
int ub_number;

struct mutex mutex;

spinlock_t mm_lock;
struct mm_struct *mm;

struct ublk_params params;

struct completion completion;
unsigned int nr_queues_ready;
unsigned int nr_privileged_daemon;

/*
* Our ubq->daemon may be killed without any notification, so
* monitor each queue's daemon periodically
*/
struct delayed_work monitor_work;
struct work_struct quiesce_work;
struct work_struct stop_work;
};

/* header of ublk_params */
struct ublk_params_header {
__u32 len;
Expand Down Expand Up @@ -225,6 +184,9 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub)
set_disk_ro(ub->ub_disk, true);

set_capacity(ub->ub_disk, p->dev_sectors);

if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
ublk_set_nr_zones(ub);
}

static void ublk_dev_param_discard_apply(struct ublk_device *ub)
Expand Down Expand Up @@ -285,6 +247,9 @@ static int ublk_apply_params(struct ublk_device *ub)
if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
ublk_dev_param_discard_apply(ub);

if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && (ub->params.types & UBLK_PARAM_TYPE_ZONED))
ublk_dev_param_zoned_apply(ub);

return 0;
}

Expand Down Expand Up @@ -420,9 +385,10 @@ static int ublk_open(struct block_device *bdev, fmode_t mode)
}

static const struct block_device_operations ub_fops = {
.owner = THIS_MODULE,
.open = ublk_open,
.free_disk = ublk_free_disk,
.owner = THIS_MODULE,
.open = ublk_open,
.free_disk = ublk_free_disk,
.report_zones = ublk_report_zones,
};

#define UBLK_MAX_PIN_PAGES 32
Expand Down Expand Up @@ -558,7 +524,7 @@ static int ublk_unmap_io(const struct ublk_queue *ubq,
{
const unsigned int rq_bytes = blk_rq_bytes(req);

if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) {
if ((req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_DRV_IN) && ublk_rq_has_data(req)) {
struct ublk_map_data data = {
.ubq = ubq,
.rq = req,
Expand Down Expand Up @@ -607,6 +573,7 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
{
struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
struct ublk_io *io = &ubq->ios[req->tag];
struct ublk_rq_data *pdu = blk_mq_rq_to_pdu(req);
u32 ublk_op;

switch (req_op(req)) {
Expand All @@ -625,6 +592,35 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
case REQ_OP_WRITE_ZEROES:
ublk_op = UBLK_IO_OP_WRITE_ZEROES;
break;
case REQ_OP_ZONE_OPEN:
ublk_op = UBLK_IO_OP_ZONE_OPEN;
break;
case REQ_OP_ZONE_CLOSE:
ublk_op = UBLK_IO_OP_ZONE_CLOSE;
break;
case REQ_OP_ZONE_FINISH:
ublk_op = UBLK_IO_OP_ZONE_FINISH;
break;
case REQ_OP_ZONE_RESET:
ublk_op = UBLK_IO_OP_ZONE_RESET;
break;
case REQ_OP_DRV_IN:
case REQ_OP_DRV_OUT:
ublk_op = pdu->operation;
switch (ublk_op) {
case UBLK_IO_OP_REPORT_ZONES:
iod->op_flags = ublk_op | ublk_req_build_flags(req);
iod->nr_sectors = pdu->nr_sectors;
iod->start_sector = pdu->sector;
iod->addr = io->addr;
return BLK_STS_OK;
default:
return BLK_STS_IOERR;
}
case REQ_OP_ZONE_APPEND:
case REQ_OP_ZONE_RESET_ALL:
/* We do not support zone append or reset_all yet */
fallthrough;
default:
return BLK_STS_IOERR;
}
Expand Down Expand Up @@ -671,7 +667,8 @@ static void ublk_complete_rq(struct request *req)
*
* Both the two needn't unmap.
*/
if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) {
if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE &&
req_op(req) != REQ_OP_DRV_IN) {
blk_mq_end_request(req, BLK_STS_OK);
return;
}
Expand Down Expand Up @@ -1601,6 +1598,15 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
if (ub->nr_privileged_daemon != ub->nr_queues_ready)
set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);

if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ub->dev_info.flags & UBLK_F_ZONED) {
disk_set_zoned(disk, BLK_ZONED_HM);
blk_queue_required_elevator_features(disk->queue, ELEVATOR_F_ZBD_SEQ_WRITE);
ret = ublk_revalidate_disk_zones(disk);
if (ret)
goto out_put_disk;
}

get_device(&ub->cdev_dev);
ret = add_disk(disk);
if (ret) {
Expand Down Expand Up @@ -1746,6 +1752,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;

if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
ub->dev_info.flags &= ~UBLK_F_ZONED;

/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;

Expand Down
Loading

0 comments on commit 723d5c2

Please sign in to comment.