Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
hw/block/nvme: add support for the asynchronous event request command
Add support for the Asynchronous Event Request command. Required for
compliance with NVMe revision 1.3d. See NVM Express 1.3d, Section 5.2
("Asynchronous Event Request command").

Mostly imported from Keith's qemu-nvme tree. Modified with a max number
of queued events (controllable with the aer_max_queued device
parameter). The spec states that the controller *should* retain
events, so we do best effort here.

Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Acked-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
  • Loading branch information
birkelund committed Jun 29, 2020
1 parent a6949f4 commit 928a6ea
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 9 deletions.
180 changes: 174 additions & 6 deletions hw/block/nvme.c
Expand Up @@ -342,6 +342,85 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
}

static void nvme_process_aers(void *opaque)
{
NvmeCtrl *n = opaque;
NvmeAsyncEvent *event, *next;

trace_pci_nvme_process_aers(n->aer_queued);

QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
NvmeRequest *req;
NvmeAerResult *result;

/* can't post cqe if there is nothing to complete */
if (!n->outstanding_aers) {
trace_pci_nvme_no_outstanding_aers();
break;
}

/* ignore if masked (cqe posted, but event not cleared) */
if (n->aer_mask & (1 << event->result.event_type)) {
trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask);
continue;
}

QTAILQ_REMOVE(&n->aer_queue, event, entry);
n->aer_queued--;

n->aer_mask |= 1 << event->result.event_type;
n->outstanding_aers--;

req = n->aer_reqs[n->outstanding_aers];

result = (NvmeAerResult *) &req->cqe.result;
result->event_type = event->result.event_type;
result->event_info = event->result.event_info;
result->log_page = event->result.log_page;
g_free(event);

req->status = NVME_SUCCESS;

trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info,
result->log_page);

nvme_enqueue_req_completion(&n->admin_cq, req);
}
}

static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
uint8_t event_info, uint8_t log_page)
{
NvmeAsyncEvent *event;

trace_pci_nvme_enqueue_event(event_type, event_info, log_page);

if (n->aer_queued == n->params.aer_max_queued) {
trace_pci_nvme_enqueue_event_noqueue(n->aer_queued);
return;
}

event = g_new(NvmeAsyncEvent, 1);
event->result = (NvmeAerResult) {
.event_type = event_type,
.event_info = event_info,
.log_page = log_page,
};

QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry);
n->aer_queued++;

nvme_process_aers(n);
}

static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
{
n->aer_mask &= ~(1 << event_type);
if (!QTAILQ_EMPTY(&n->aer_queue)) {
nvme_process_aers(n);
}
}

static void nvme_rw_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
Expand Down Expand Up @@ -592,8 +671,9 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
return NVME_SUCCESS;
}

static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
uint64_t off, NvmeRequest *req)
static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
uint32_t buf_len, uint64_t off,
NvmeRequest *req)
{
uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
Expand Down Expand Up @@ -642,6 +722,10 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
smart.power_on_hours[0] =
cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60);

if (!rae) {
nvme_clear_events(n, NVME_AER_TYPE_SMART);
}

return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
prp2);
}
Expand All @@ -668,14 +752,19 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
prp2);
}

static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
uint64_t off, NvmeRequest *req)
static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
uint32_t buf_len, uint64_t off,
NvmeRequest *req)
{
uint32_t trans_len;
uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
NvmeErrorLog errlog;

if (!rae) {
nvme_clear_events(n, NVME_AER_TYPE_ERROR);
}

if (off > sizeof(errlog)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
Expand Down Expand Up @@ -716,9 +805,9 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)

switch (lid) {
case NVME_LOG_ERROR_INFO:
return nvme_error_info(n, cmd, len, off, req);
return nvme_error_info(n, cmd, rae, len, off, req);
case NVME_LOG_SMART_INFO:
return nvme_smart_info(n, cmd, len, off, req);
return nvme_smart_info(n, cmd, rae, len, off, req);
case NVME_LOG_FW_SLOT_INFO:
return nvme_fw_log_info(n, cmd, len, off, req);
default:
Expand Down Expand Up @@ -1000,6 +1089,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
((n->params.max_ioqpairs - 1) << 16));
trace_pci_nvme_getfeat_numq(result);
break;
case NVME_ASYNCHRONOUS_EVENT_CONF:
result = cpu_to_le32(n->features.async_config);
break;
case NVME_TIMESTAMP:
return nvme_get_feature_timestamp(n, cmd);
default:
Expand Down Expand Up @@ -1051,6 +1143,14 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
return NVME_INVALID_FIELD | NVME_DNR;
}

if (((n->temperature >= n->features.temp_thresh_hi) ||
(n->temperature <= n->features.temp_thresh_low)) &&
NVME_AEC_SMART(n->features.async_config) & NVME_SMART_TEMPERATURE) {
nvme_enqueue_event(n, NVME_AER_TYPE_SMART,
NVME_AER_INFO_SMART_TEMP_THRESH,
NVME_LOG_SMART_INFO);
}

break;
case NVME_VOLATILE_WRITE_CACHE:
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
Expand All @@ -1063,6 +1163,9 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) |
((n->params.max_ioqpairs - 1) << 16));
break;
case NVME_ASYNCHRONOUS_EVENT_CONF:
n->features.async_config = dw11;
break;
case NVME_TIMESTAMP:
return nvme_set_feature_timestamp(n, cmd);
default:
Expand All @@ -1072,6 +1175,25 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
return NVME_SUCCESS;
}

static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
trace_pci_nvme_aer(nvme_cid(req));

if (n->outstanding_aers > n->params.aerl) {
trace_pci_nvme_aer_aerl_exceeded();
return NVME_AER_LIMIT_EXCEEDED;
}

n->aer_reqs[n->outstanding_aers] = req;
n->outstanding_aers++;

if (!QTAILQ_EMPTY(&n->aer_queue)) {
nvme_process_aers(n);
}

return NVME_NO_COMPLETE;
}

static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), cmd->opcode);
Expand All @@ -1095,6 +1217,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
return nvme_set_feature(n, cmd, req);
case NVME_ADM_CMD_GET_FEATURES:
return nvme_get_feature(n, cmd, req);
case NVME_ADM_CMD_ASYNC_EV_REQ:
return nvme_aer(n, cmd, req);
default:
trace_pci_nvme_err_invalid_admin_opc(cmd->opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
Expand Down Expand Up @@ -1149,6 +1273,15 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
}
}

while (!QTAILQ_EMPTY(&n->aer_queue)) {
NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue);
QTAILQ_REMOVE(&n->aer_queue, event, entry);
g_free(event);
}

n->aer_queued = 0;
n->outstanding_aers = 0;

blk_flush(n->conf.blk);
n->bar.cc = 0;
}
Expand Down Expand Up @@ -1245,6 +1378,8 @@ static int nvme_start_ctrl(NvmeCtrl *n)

nvme_set_timestamp(n, 0ULL);

QTAILQ_INIT(&n->aer_queue);

return 0;
}

Expand Down Expand Up @@ -1466,6 +1601,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
"completion queue doorbell write"
" for nonexistent queue,"
" sqid=%"PRIu32", ignoring", qid);

if (n->outstanding_aers) {
nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
NVME_LOG_ERROR_INFO);
}

return;
}

Expand All @@ -1476,6 +1618,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
" beyond queue size, sqid=%"PRIu32","
" new_head=%"PRIu16", ignoring",
qid, new_head);

if (n->outstanding_aers) {
nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
NVME_AER_INFO_ERR_INVALID_DB_VALUE,
NVME_LOG_ERROR_INFO);
}

return;
}

Expand Down Expand Up @@ -1506,6 +1655,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
"submission queue doorbell write"
" for nonexistent queue,"
" sqid=%"PRIu32", ignoring", qid);

if (n->outstanding_aers) {
nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
NVME_LOG_ERROR_INFO);
}

return;
}

Expand All @@ -1516,6 +1672,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
" beyond queue size, sqid=%"PRIu32","
" new_tail=%"PRIu16", ignoring",
qid, new_tail);

if (n->outstanding_aers) {
nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
NVME_AER_INFO_ERR_INVALID_DB_VALUE,
NVME_LOG_ERROR_INFO);
}

return;
}

Expand Down Expand Up @@ -1637,6 +1800,7 @@ static void nvme_init_state(NvmeCtrl *n)
n->temperature = NVME_TEMPERATURE;
n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
}

static void nvme_init_blk(NvmeCtrl *n, Error **errp)
Expand Down Expand Up @@ -1792,6 +1956,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
* inconsequential.
*/
id->acl = 3;
id->aerl = n->params.aerl;
id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
id->lpa = NVME_LPA_EXTENDED;

Expand Down Expand Up @@ -1866,6 +2031,7 @@ static void nvme_exit(PCIDevice *pci_dev)
g_free(n->namespaces);
g_free(n->cq);
g_free(n->sq);
g_free(n->aer_reqs);

if (n->params.cmb_size_mb) {
g_free(n->cmbuf);
Expand All @@ -1886,6 +2052,8 @@ static Property nvme_props[] = {
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64),
DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
DEFINE_PROP_END_OF_LIST(),
};

Expand Down
10 changes: 9 additions & 1 deletion hw/block/nvme.h
Expand Up @@ -9,10 +9,12 @@ typedef struct NvmeParams {
uint32_t max_ioqpairs;
uint16_t msix_qsize;
uint32_t cmb_size_mb;
uint8_t aerl;
uint32_t aer_max_queued;
} NvmeParams;

typedef struct NvmeAsyncEvent {
QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry;
QTAILQ_ENTRY(NvmeAsyncEvent) entry;
NvmeAerResult result;
} NvmeAsyncEvent;

Expand Down Expand Up @@ -94,6 +96,7 @@ typedef struct NvmeCtrl {
uint32_t num_namespaces;
uint32_t max_q_ents;
uint64_t ns_size;
uint8_t outstanding_aers;
uint8_t *cmbuf;
uint32_t irq_status;
uint64_t host_timestamp; /* Timestamp sent by the host */
Expand All @@ -103,6 +106,11 @@ typedef struct NvmeCtrl {

HostMemoryBackend *pmrdev;

uint8_t aer_mask;
NvmeRequest **aer_reqs;
QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
int aer_queued;

NvmeNamespace *namespaces;
NvmeSQueue **sq;
NvmeCQueue **cq;
Expand Down
9 changes: 9 additions & 0 deletions hw/block/trace-events
Expand Up @@ -51,6 +51,15 @@ pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64""
pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64""
pci_nvme_process_aers(int queued) "queued %d"
pci_nvme_aer(uint16_t cid) "cid %"PRIu16""
pci_nvme_aer_aerl_exceeded(void) "aerl exceeded"
pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
pci_nvme_mmio_read(uint64_t addr) "addr 0x%"PRIx64""
pci_nvme_mmio_write(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64""
Expand Down
8 changes: 6 additions & 2 deletions include/block/nvme.h
Expand Up @@ -597,8 +597,8 @@ enum NvmeAsyncEventRequest {
NVME_AER_TYPE_SMART = 1,
NVME_AER_TYPE_IO_SPECIFIC = 6,
NVME_AER_TYPE_VENDOR_SPECIFIC = 7,
NVME_AER_INFO_ERR_INVALID_SQ = 0,
NVME_AER_INFO_ERR_INVALID_DB = 1,
NVME_AER_INFO_ERR_INVALID_DB_REGISTER = 0,
NVME_AER_INFO_ERR_INVALID_DB_VALUE = 1,
NVME_AER_INFO_ERR_DIAG_FAIL = 2,
NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3,
NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR = 4,
Expand Down Expand Up @@ -902,6 +902,10 @@ typedef struct NvmeFeatureVal {

#define NVME_TEMP_TMPTH(temp) ((temp >> 0) & 0xffff)

#define NVME_AEC_SMART(aec) (aec & 0xff)
#define NVME_AEC_NS_ATTR(aec) ((aec >> 8) & 0x1)
#define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)

enum NvmeFeatureIds {
NVME_ARBITRATION = 0x1,
NVME_POWER_MANAGEMENT = 0x2,
Expand Down

0 comments on commit 928a6ea

Please sign in to comment.