Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
nvme: add copy offload support
For device supporting native copy, nvme driver receives read and
write request with BLK_COPY op flags.
For read request the nvme driver populates the payload with source
information.
For write request the driver converts it to nvme copy command using the
source information in the payload and submits to the device.
current design only supports single source range.
This design is courtesy Mikulas Patocka's token based copy

trace event support for nvme_copy_cmd.
Set the device copy limits to queue limits.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Javier González <javier.gonz@samsung.com>
Signed-off-by: Arnav Dawn <arnav.dawn@samsung.com>
  • Loading branch information
nj-shetty authored and intel-lab-lkp committed Apr 26, 2022
1 parent c406c51 commit e029014
Show file tree
Hide file tree
Showing 8 changed files with 229 additions and 5 deletions.
116 changes: 114 additions & 2 deletions drivers/nvme/host/core.c
Expand Up @@ -724,6 +724,87 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
}

static inline blk_status_t nvme_setup_copy_read(struct nvme_ns *ns, struct request *req)
{
struct bio *bio = req->bio;
struct nvme_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]);

memcpy(token->subsys, "nvme", 4);
token->ns = ns;
token->src_sector = bio->bi_iter.bi_sector;
token->sectors = bio->bi_iter.bi_size >> 9;

return BLK_STS_OK;
}

static inline blk_status_t nvme_setup_copy_write(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd)
{
struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_copy_range *range = NULL;
struct bio *bio = req->bio;
struct nvme_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]);
sector_t src_sector, dst_sector, n_sectors;
u64 src_lba, dst_lba, n_lba;
unsigned short nr_range = 1;
u16 control = 0;
u32 dsmgmt = 0;

if (unlikely(memcmp(token->subsys, "nvme", 4)))
return BLK_STS_NOTSUPP;
if (unlikely(token->ns != ns))
return BLK_STS_NOTSUPP;

src_sector = token->src_sector;
dst_sector = bio->bi_iter.bi_sector;
n_sectors = token->sectors;
if (WARN_ON(n_sectors != bio->bi_iter.bi_size >> 9))
return BLK_STS_NOTSUPP;

src_lba = nvme_sect_to_lba(ns, src_sector);
dst_lba = nvme_sect_to_lba(ns, dst_sector);
n_lba = nvme_sect_to_lba(ns, n_sectors);

if (unlikely(nvme_lba_to_sect(ns, src_lba) != src_sector) ||
unlikely(nvme_lba_to_sect(ns, dst_lba) != dst_sector) ||
unlikely(nvme_lba_to_sect(ns, n_lba) != n_sectors))
return BLK_STS_NOTSUPP;

if (WARN_ON(!n_lba))
return BLK_STS_NOTSUPP;

if (req->cmd_flags & REQ_FUA)
control |= NVME_RW_FUA;

if (req->cmd_flags & REQ_FAILFAST_DEV)
control |= NVME_RW_LR;

memset(cmnd, 0, sizeof(*cmnd));
cmnd->copy.opcode = nvme_cmd_copy;
cmnd->copy.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->copy.sdlba = cpu_to_le64(dst_lba);

range = kmalloc_array(nr_range, sizeof(*range),
GFP_ATOMIC | __GFP_NOWARN);
if (!range)
return BLK_STS_RESOURCE;

range[0].slba = cpu_to_le64(src_lba);
range[0].nlb = cpu_to_le16(n_lba - 1);

cmnd->copy.nr_range = 0;

req->special_vec.bv_page = virt_to_page(range);
req->special_vec.bv_offset = offset_in_page(range);
req->special_vec.bv_len = sizeof(*range) * nr_range;
req->rq_flags |= RQF_SPECIAL_PAYLOAD;

cmnd->copy.control = cpu_to_le16(control);
cmnd->copy.dspec = cpu_to_le32(dsmgmt);

return BLK_STS_OK;
}

static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd)
{
Expand Down Expand Up @@ -947,10 +1028,16 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
ret = nvme_setup_discard(ns, req, cmd);
break;
case REQ_OP_READ:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read);
if (unlikely(req->cmd_flags & REQ_COPY))
ret = nvme_setup_copy_read(ns, req);
else
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read);
break;
case REQ_OP_WRITE:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write);
if (unlikely(req->cmd_flags & REQ_COPY))
ret = nvme_setup_copy_write(ns, req, cmd);
else
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write);
break;
case REQ_OP_ZONE_APPEND:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
Expand Down Expand Up @@ -1642,6 +1729,29 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
}

static void nvme_config_copy(struct gendisk *disk, struct nvme_ns *ns,
struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
struct request_queue *q = disk->queue;

if (!(ctrl->oncs & NVME_CTRL_ONCS_COPY)) {
blk_queue_max_copy_sectors(q, 0);
blk_queue_max_copy_range_sectors(q, 0);
blk_queue_max_copy_nr_ranges(q, 0);
blk_queue_flag_clear(QUEUE_FLAG_COPY, q);
return;
}

/* setting copy limits */
if (blk_queue_flag_test_and_set(QUEUE_FLAG_COPY, q))
return;

blk_queue_max_copy_sectors(q, nvme_lba_to_sect(ns, le32_to_cpu(id->mcl)));
blk_queue_max_copy_range_sectors(q, nvme_lba_to_sect(ns, le16_to_cpu(id->mssrl)));
blk_queue_max_copy_nr_ranges(q, id->msrc + 1);
}

static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
{
return uuid_equal(&a->uuid, &b->uuid) &&
Expand Down Expand Up @@ -1841,6 +1951,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
set_capacity_and_notify(disk, capacity);

nvme_config_discard(disk, ns);
nvme_config_copy(disk, ns, id);
blk_queue_max_write_zeroes_sectors(disk->queue,
ns->ctrl->max_zeroes_sectors);
}
Expand Down Expand Up @@ -4833,6 +4944,7 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64);
BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_copy_command) != 64);
BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64);
Expand Down
4 changes: 4 additions & 0 deletions drivers/nvme/host/fc.c
Expand Up @@ -2788,6 +2788,10 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ret)
return ret;

if (unlikely((rq->cmd_flags & REQ_COPY) && (req_op(rq) == REQ_OP_READ))) {
blk_mq_end_request(rq, BLK_STS_OK);
return BLK_STS_OK;
}
/*
* nvme core doesn't quite treat the rq opaquely. Commands such
* as WRITE ZEROES will return a non-zero rq payload_bytes yet
Expand Down
7 changes: 7 additions & 0 deletions drivers/nvme/host/nvme.h
Expand Up @@ -482,6 +482,13 @@ struct nvme_ns {

};

struct nvme_copy_token {
char subsys[4];
struct nvme_ns *ns;
u64 src_sector;
u64 sectors;
};

/* NVMe ns supports metadata actions by the controller (generate/strip) */
static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
{
Expand Down
25 changes: 25 additions & 0 deletions drivers/nvme/host/pci.c
Expand Up @@ -511,6 +511,14 @@ static inline void nvme_sq_copy_cmd(struct nvme_queue *nvmeq,
nvmeq->sq_tail = 0;
}

static void nvme_commit_sqdb(struct nvme_queue *nvmeq)
{
spin_lock(&nvmeq->sq_lock);
if (nvmeq->sq_tail != nvmeq->last_sq_tail)
nvme_write_sq_db(nvmeq, true);
spin_unlock(&nvmeq->sq_lock);
}

static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
{
struct nvme_queue *nvmeq = hctx->driver_data;
Expand Down Expand Up @@ -918,6 +926,11 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
if (ret)
return ret;

if (unlikely((req->cmd_flags & REQ_COPY) && (req_op(req) == REQ_OP_READ))) {
blk_mq_start_request(req);
return BLK_STS_OK;
}

if (blk_rq_nr_phys_segments(req)) {
ret = nvme_map_data(dev, req, &iod->cmd);
if (ret)
Expand All @@ -931,6 +944,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
}

blk_mq_start_request(req);

return BLK_STS_OK;
out_unmap_data:
nvme_unmap_data(dev, req);
Expand Down Expand Up @@ -964,6 +978,17 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
ret = nvme_prep_rq(dev, req);
if (unlikely(ret))
return ret;
if (unlikely((req->cmd_flags & REQ_COPY) && (req_op(req) == REQ_OP_READ))) {
blk_mq_set_request_complete(req);
blk_mq_end_request(req, BLK_STS_OK);
/* Commit the sq if copy read was the last req in the list,
* as copy read deoesn't update sq db
*/
if (bd->last)
nvme_commit_sqdb(nvmeq);
return ret;
}

spin_lock(&nvmeq->sq_lock);
nvme_sq_copy_cmd(nvmeq, &iod->cmd);
nvme_write_sq_db(nvmeq, bd->last);
Expand Down
6 changes: 6 additions & 0 deletions drivers/nvme/host/rdma.c
Expand Up @@ -2087,6 +2087,12 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ret)
goto unmap_qe;

if (unlikely((rq->cmd_flags & REQ_COPY) && (req_op(rq) == REQ_OP_READ))) {
blk_mq_end_request(rq, BLK_STS_OK);
ret = BLK_STS_OK;
goto unmap_qe;
}

blk_mq_start_request(rq);

if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
Expand Down
14 changes: 14 additions & 0 deletions drivers/nvme/host/tcp.c
Expand Up @@ -2394,6 +2394,11 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
if (ret)
return ret;

if (unlikely((rq->cmd_flags & REQ_COPY) && (req_op(rq) == REQ_OP_READ))) {
blk_mq_start_request(req);
return BLK_STS_OK;
}

req->state = NVME_TCP_SEND_CMD_PDU;
req->status = cpu_to_le16(NVME_SC_SUCCESS);
req->offset = 0;
Expand Down Expand Up @@ -2462,6 +2467,15 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,

blk_mq_start_request(rq);

if (unlikely((rq->cmd_flags & REQ_COPY) && (req_op(rq) == REQ_OP_READ))) {
blk_mq_set_request_complete(rq);
blk_mq_end_request(rq, BLK_STS_OK);
/* if copy read is the last req queue tcp reqs */
if (bd->last && nvme_tcp_queue_more(queue))
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
return ret;
}

nvme_tcp_queue_request(req, true, bd->last);

return BLK_STS_OK;
Expand Down
19 changes: 19 additions & 0 deletions drivers/nvme/host/trace.c
Expand Up @@ -150,6 +150,23 @@ static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10)
return ret;
}

static const char *nvme_trace_copy(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
u64 slba = get_unaligned_le64(cdw10);
u8 nr_range = get_unaligned_le16(cdw10 + 8);
u16 control = get_unaligned_le16(cdw10 + 10);
u32 dsmgmt = get_unaligned_le32(cdw10 + 12);
u32 reftag = get_unaligned_le32(cdw10 + 16);

trace_seq_printf(p,
"slba=%llu, nr_range=%u, ctrl=0x%x, dsmgmt=%u, reftag=%u",
slba, nr_range, control, dsmgmt, reftag);
trace_seq_putc(p, 0);

return ret;
}

static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
Expand Down Expand Up @@ -243,6 +260,8 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p,
return nvme_trace_zone_mgmt_send(p, cdw10);
case nvme_cmd_zone_mgmt_recv:
return nvme_trace_zone_mgmt_recv(p, cdw10);
case nvme_cmd_copy:
return nvme_trace_copy(p, cdw10);
default:
return nvme_trace_common(p, cdw10);
}
Expand Down
43 changes: 40 additions & 3 deletions include/linux/nvme.h
Expand Up @@ -316,7 +316,7 @@ struct nvme_id_ctrl {
__u8 nvscc;
__u8 nwpc;
__le16 acwu;
__u8 rsvd534[2];
__le16 ocfs;
__le32 sgls;
__le32 mnan;
__u8 rsvd544[224];
Expand Down Expand Up @@ -344,6 +344,7 @@ enum {
NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
NVME_CTRL_ONCS_RESERVATIONS = 1 << 5,
NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
NVME_CTRL_ONCS_COPY = 1 << 8,
NVME_CTRL_VWC_PRESENT = 1 << 0,
NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3,
Expand Down Expand Up @@ -393,7 +394,10 @@ struct nvme_id_ns {
__le16 npdg;
__le16 npda;
__le16 nows;
__u8 rsvd74[18];
__le16 mssrl;
__le32 mcl;
__u8 msrc;
__u8 rsvd91[11];
__le32 anagrpid;
__u8 rsvd96[3];
__u8 nsattr;
Expand Down Expand Up @@ -750,6 +754,7 @@ enum nvme_opcode {
nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11,
nvme_cmd_resv_release = 0x15,
nvme_cmd_copy = 0x19,
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
nvme_cmd_zone_append = 0x7d,
Expand All @@ -771,7 +776,8 @@ enum nvme_opcode {
nvme_opcode_name(nvme_cmd_resv_release), \
nvme_opcode_name(nvme_cmd_zone_mgmt_send), \
nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \
nvme_opcode_name(nvme_cmd_zone_append))
nvme_opcode_name(nvme_cmd_zone_append), \
nvme_opcode_name(nvme_cmd_copy))



Expand Down Expand Up @@ -945,6 +951,36 @@ struct nvme_dsm_range {
__le64 slba;
};

struct nvme_copy_command {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2;
__le64 metadata;
union nvme_data_ptr dptr;
__le64 sdlba;
__u8 nr_range;
__u8 rsvd12;
__le16 control;
__le16 rsvd13;
__le16 dspec;
__le32 ilbrt;
__le16 lbat;
__le16 lbatm;
};

struct nvme_copy_range {
__le64 rsvd0;
__le64 slba;
__le16 nlb;
__le16 rsvd18;
__le32 rsvd20;
__le32 eilbrt;
__le16 elbat;
__le16 elbatm;
};

struct nvme_write_zeroes_cmd {
__u8 opcode;
__u8 flags;
Expand Down Expand Up @@ -1499,6 +1535,7 @@ struct nvme_command {
struct nvme_download_firmware dlfw;
struct nvme_format_cmd format;
struct nvme_dsm_cmd dsm;
struct nvme_copy_command copy;
struct nvme_write_zeroes_cmd write_zeroes;
struct nvme_zone_mgmt_send_cmd zms;
struct nvme_zone_mgmt_recv_cmd zmr;
Expand Down

0 comments on commit e029014

Please sign in to comment.