Skip to content
This repository has been archived by the owner on Jan 6, 2020. It is now read-only.

Commit

Permalink
scsi: lpfc: Fix list corruption on the completion queue.
Browse files Browse the repository at this point in the history
Enabling list_debug showed the drivers txcmplq was suffering list
corruption. The systems will eventually crash because the iocb free list
gets crossed linked with the prings txcmplq.  Most systems will run for a
while after the corruption, but will eventually crash when a scsi eh reset
occurs and the txcmplq is attempted to be flushed. The flush gets stuck in
an endless loop.

The problem is the abort handler does not hold the sli4 ring lock while
validating the IO so the IO could complete while the driver is still
preping the abort.  The erroneously generated abort, when it completes, has
pointers to the original IO that has already completed, and the IO
manipulation (for the second time) corrupts the list.

Correct by taking the ring lock early in the abort handler so the erroneous
abort won't be sent if the io has/is completing.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
  • Loading branch information
jsmart-gh authored and martinkpetersen committed Aug 2, 2018
1 parent b615a20 commit 8931c73
Showing 1 changed file with 36 additions and 8 deletions.
44 changes: 36 additions & 8 deletions drivers/scsi/lpfc/lpfc_scsi.c
Original file line number Diff line number Diff line change
Expand Up @@ -4727,7 +4727,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
struct lpfc_scsi_buf *lpfc_cmd;
IOCB_t *cmd, *icmd;
int ret = SUCCESS, status = 0;
struct lpfc_sli_ring *pring_s4;
struct lpfc_sli_ring *pring_s4 = NULL;
int ret_val;
unsigned long flags;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
Expand Down Expand Up @@ -4757,8 +4757,25 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
}

iocb = &lpfc_cmd->cur_iocbq;
if (phba->sli_rev == LPFC_SLI_REV4) {
if (!(phba->cfg_fof) ||
(!(iocb->iocb_flag & LPFC_IO_FOF))) {
pring_s4 =
phba->sli4_hba.fcp_wq[iocb->hba_wqidx]->pring;
} else {
iocb->hba_wqidx = 0;
pring_s4 = phba->sli4_hba.oas_wq->pring;
}
if (!pring_s4) {
ret = FAILED;
goto out_unlock;
}
spin_lock(&pring_s4->ring_lock);
}
/* the command is in process of being cancelled */
if (!(iocb->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
if (phba->sli_rev == LPFC_SLI_REV4)
spin_unlock(&pring_s4->ring_lock);
spin_unlock_irqrestore(&phba->hbalock, flags);
lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
"3169 SCSI Layer abort requested I/O has been "
Expand All @@ -4772,6 +4789,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
* see the completion before the eh fired. Just return SUCCESS.
*/
if (lpfc_cmd->pCmd != cmnd) {
if (phba->sli_rev == LPFC_SLI_REV4)
spin_unlock(&pring_s4->ring_lock);
lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
"3170 SCSI Layer abort requested I/O has been "
"completed by LLD.\n");
Expand All @@ -4784,13 +4803,17 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
if (iocb->iocb_flag & LPFC_DRIVER_ABORTED) {
lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
"3389 SCSI Layer I/O Abort Request is pending\n");
if (phba->sli_rev == LPFC_SLI_REV4)
spin_unlock(&pring_s4->ring_lock);
spin_unlock_irqrestore(&phba->hbalock, flags);
goto wait_for_cmpl;
}

abtsiocb = __lpfc_sli_get_iocbq(phba);
if (abtsiocb == NULL) {
ret = FAILED;
if (phba->sli_rev == LPFC_SLI_REV4)
spin_unlock(&pring_s4->ring_lock);
goto out_unlock;
}

Expand Down Expand Up @@ -4828,14 +4851,9 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)

abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
abtsiocb->vport = vport;
lpfc_cmd->waitq = &waitq;
if (phba->sli_rev == LPFC_SLI_REV4) {
pring_s4 = lpfc_sli4_calc_ring(phba, abtsiocb);
if (pring_s4 == NULL) {
ret = FAILED;
goto out_unlock;
}
/* Note: both hbalock and ring_lock must be set here */
spin_lock(&pring_s4->ring_lock);
ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
abtsiocb, 0);
spin_unlock(&pring_s4->ring_lock);
Expand All @@ -4848,6 +4866,17 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)


if (ret_val == IOCB_ERROR) {
if (phba->sli_rev == LPFC_SLI_REV4)
spin_lock_irqsave(&pring_s4->ring_lock, flags);
else
spin_lock_irqsave(&phba->hbalock, flags);
/* Indicate the IO is not being aborted by the driver. */
iocb->iocb_flag &= ~LPFC_DRIVER_ABORTED;
lpfc_cmd->waitq = NULL;
if (phba->sli_rev == LPFC_SLI_REV4)
spin_unlock_irqrestore(&pring_s4->ring_lock, flags);
else
spin_unlock_irqrestore(&phba->hbalock, flags);
lpfc_sli_release_iocbq(phba, abtsiocb);
ret = FAILED;
goto out;
Expand All @@ -4858,7 +4887,6 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
&phba->sli.sli3_ring[LPFC_FCP_RING], HA_R0RE_REQ);

wait_for_cmpl:
lpfc_cmd->waitq = &waitq;
/* Wait for abort to complete */
wait_event_timeout(waitq,
(lpfc_cmd->pCmd != cmnd),
Expand Down

0 comments on commit 8931c73

Please sign in to comment.