Skip to content

Commit a0bf1ca

Browse files
Hiral PatelJames Bottomley
authored andcommitted
[SCSI] fnic: fnic driver may hit BUG_ON on device reset
The issue was observed when LUN Reset is issued through IOCTL or sg_reset utility. fnic driver issues LUN RESET to firmware. On successful completion of device reset, driver cleans up all the pending IOs that were issued prior to device reset. These pending IOs are expected to be in ABTS_PENDING state. This works fine, when the device reset operation resulted from midlayer, but not when device reset was triggered from IOCTL path as the pending IOs were not in ABTS_PENDING state. execution path hits panic if the pending IO is not in ABTS_PENDING state. Changes: The fix replaces BUG_ON check in fnic_clean_pending_aborts() with marking pending IOs as ABTS_PENDING if they were not in ABTS_PENDING state and skips if they were already in ABTS_PENDING state. An extra check is added to validate the abort status of the commands after a delay of 2 * E_D_TOV using a helper function. The helper function returns 1 if it finds any pending IO in ABTS_PENDING state, belong to the LUN on which device reset was issued else 0. With this, device reset operation returns success only if the helper funciton returns 0, otherwise it returns failure. Other changes: - Removed code in fnic_clean_pending_aborts() that returns failure if it finds io_req NULL, instead of returning failure added code to continue with next io - Added device reset flags for debugging in fnic_terminate_rport_io, fnic_rport_exch_reset, and fnic_clean_pending_aborts Signed-off-by: Narsimhulu Musini <nmusini@cisco.com> Signed-off-by: Hiral Patel <hiralpat@cisco.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
1 parent cfe16d5 commit a0bf1ca

File tree

2 files changed

+116
-7
lines changed

2 files changed

+116
-7
lines changed

drivers/scsi/fnic/fnic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,8 @@ const char *fnic_state_to_str(unsigned int state);
303303
void fnic_log_q_error(struct fnic *fnic);
304304
void fnic_handle_link_event(struct fnic *fnic);
305305

306+
int fnic_is_abts_pending(struct fnic *, struct scsi_cmnd *);
307+
306308
static inline int
307309
fnic_chk_state_flags_locked(struct fnic *fnic, unsigned long st_flags)
308310
{

drivers/scsi/fnic/fnic_scsi.c

Lines changed: 114 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,7 +1271,8 @@ static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
12711271
spin_unlock_irqrestore(io_lock, flags);
12721272
} else {
12731273
spin_lock_irqsave(io_lock, flags);
1274-
CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
1274+
if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
1275+
CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
12751276
spin_unlock_irqrestore(io_lock, flags);
12761277
}
12771278
}
@@ -1379,7 +1380,8 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
13791380
spin_unlock_irqrestore(io_lock, flags);
13801381
} else {
13811382
spin_lock_irqsave(io_lock, flags);
1382-
CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
1383+
if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
1384+
CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
13831385
spin_unlock_irqrestore(io_lock, flags);
13841386
}
13851387
}
@@ -1592,7 +1594,7 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic,
15921594
static int fnic_clean_pending_aborts(struct fnic *fnic,
15931595
struct scsi_cmnd *lr_sc)
15941596
{
1595-
int tag;
1597+
int tag, abt_tag;
15961598
struct fnic_io_req *io_req;
15971599
spinlock_t *io_lock;
15981600
unsigned long flags;
@@ -1601,6 +1603,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
16011603
struct scsi_lun fc_lun;
16021604
struct scsi_device *lun_dev = lr_sc->device;
16031605
DECLARE_COMPLETION_ONSTACK(tm_done);
1606+
enum fnic_ioreq_state old_ioreq_state;
16041607

16051608
for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
16061609
sc = scsi_host_find_tag(fnic->lport->host, tag);
@@ -1629,7 +1632,41 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
16291632
"Found IO in %s on lun\n",
16301633
fnic_ioreq_state_to_str(CMD_STATE(sc)));
16311634

1632-
BUG_ON(CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING);
1635+
if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
1636+
spin_unlock_irqrestore(io_lock, flags);
1637+
continue;
1638+
}
1639+
if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
1640+
(!(CMD_FLAGS(sc) & FNIC_DEV_RST_PENDING))) {
1641+
FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
1642+
"%s dev rst not pending sc 0x%p\n", __func__,
1643+
sc);
1644+
spin_unlock_irqrestore(io_lock, flags);
1645+
continue;
1646+
}
1647+
old_ioreq_state = CMD_STATE(sc);
1648+
/*
1649+
* Any pending IO issued prior to reset is expected to be
1650+
* in abts pending state, if not we need to set
1651+
* FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending.
1652+
* When IO is completed, the IO will be handed over and
1653+
* handled in this function.
1654+
*/
1655+
CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
1656+
1657+
if (io_req->abts_done)
1658+
shost_printk(KERN_ERR, fnic->lport->host,
1659+
"%s: io_req->abts_done is set state is %s\n",
1660+
__func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
1661+
1662+
BUG_ON(io_req->abts_done);
1663+
1664+
abt_tag = tag;
1665+
if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
1666+
abt_tag |= FNIC_TAG_DEV_RST;
1667+
FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
1668+
"%s: dev rst sc 0x%p\n", __func__, sc);
1669+
}
16331670

16341671
CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
16351672
io_req->abts_done = &tm_done;
@@ -1638,16 +1675,23 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
16381675
/* Now queue the abort command to firmware */
16391676
int_to_scsilun(sc->device->lun, &fc_lun);
16401677

1641-
if (fnic_queue_abort_io_req(fnic, tag,
1678+
if (fnic_queue_abort_io_req(fnic, abt_tag,
16421679
FCPIO_ITMF_ABT_TASK_TERM,
16431680
fc_lun.scsi_lun, io_req)) {
16441681
spin_lock_irqsave(io_lock, flags);
16451682
io_req = (struct fnic_io_req *)CMD_SP(sc);
16461683
if (io_req)
16471684
io_req->abts_done = NULL;
1685+
if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
1686+
CMD_STATE(sc) = old_ioreq_state;
16481687
spin_unlock_irqrestore(io_lock, flags);
16491688
ret = 1;
16501689
goto clean_pending_aborts_end;
1690+
} else {
1691+
spin_lock_irqsave(io_lock, flags);
1692+
if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
1693+
CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
1694+
spin_unlock_irqrestore(io_lock, flags);
16511695
}
16521696

16531697
wait_for_completion_timeout(&tm_done,
@@ -1659,8 +1703,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
16591703
io_req = (struct fnic_io_req *)CMD_SP(sc);
16601704
if (!io_req) {
16611705
spin_unlock_irqrestore(io_lock, flags);
1662-
ret = 1;
1663-
goto clean_pending_aborts_end;
1706+
continue;
16641707
}
16651708

16661709
io_req->abts_done = NULL;
@@ -1678,6 +1721,12 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
16781721
mempool_free(io_req, fnic->io_req_pool);
16791722
}
16801723

1724+
schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov));
1725+
1726+
/* walk again to check, if IOs are still pending in fw */
1727+
if (fnic_is_abts_pending(fnic, lr_sc))
1728+
ret = FAILED;
1729+
16811730
clean_pending_aborts_end:
16821731
return ret;
16831732
}
@@ -2142,3 +2191,61 @@ void fnic_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did)
21422191
fc_exch_mgr_reset(lp, sid, did);
21432192

21442193
}
2194+
2195+
/*
2196+
* fnic_is_abts_pending() is a helper function that
2197+
* walks through tag map to check if there is any IOs pending,if there is one,
2198+
* then it returns 1 (true), otherwise 0 (false)
2199+
* if @lr_sc is non NULL, then it checks IOs specific to particular LUN,
2200+
* otherwise, it checks for all IOs.
2201+
*/
2202+
int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
2203+
{
2204+
int tag;
2205+
struct fnic_io_req *io_req;
2206+
spinlock_t *io_lock;
2207+
unsigned long flags;
2208+
int ret = 0;
2209+
struct scsi_cmnd *sc;
2210+
struct scsi_device *lun_dev = NULL;
2211+
2212+
if (lr_sc)
2213+
lun_dev = lr_sc->device;
2214+
2215+
/* walk again to check, if IOs are still pending in fw */
2216+
for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
2217+
sc = scsi_host_find_tag(fnic->lport->host, tag);
2218+
/*
2219+
* ignore this lun reset cmd or cmds that do not belong to
2220+
* this lun
2221+
*/
2222+
if (!sc || (lr_sc && (sc->device != lun_dev || sc == lr_sc)))
2223+
continue;
2224+
2225+
io_lock = fnic_io_lock_hash(fnic, sc);
2226+
spin_lock_irqsave(io_lock, flags);
2227+
2228+
io_req = (struct fnic_io_req *)CMD_SP(sc);
2229+
2230+
if (!io_req || sc->device != lun_dev) {
2231+
spin_unlock_irqrestore(io_lock, flags);
2232+
continue;
2233+
}
2234+
2235+
/*
2236+
* Found IO that is still pending with firmware and
2237+
* belongs to the LUN that we are resetting
2238+
*/
2239+
FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
2240+
"Found IO in %s on lun\n",
2241+
fnic_ioreq_state_to_str(CMD_STATE(sc)));
2242+
2243+
if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
2244+
spin_unlock_irqrestore(io_lock, flags);
2245+
ret = 1;
2246+
continue;
2247+
}
2248+
}
2249+
2250+
return ret;
2251+
}

0 commit comments

Comments
 (0)