Skip to content

Commit faae19b

Browse files
Chad Dupuismartinkpetersen
authored andcommitted
scsi: bnx2fc: Fix hung task messages when a cleanup response is not received during abort
If a cleanup task is not responded to while we are in bnx2fc_abts_cleanup, it will hang the SCSI error handler since we use wait_for_completion instead of wait_for_completion_timeout. So, use wait_for_completion_timeout so that we don't hang the SCSI error handler thread forever. Fixes the call trace: [183373.131468] INFO: task scsi_eh_16:110146 blocked for more than 120 seconds. [183373.131469] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [183373.131470] scsi_eh_16 D ffff88103f2fca14 0 110146 2 0x00000080 [183373.131472] ffff880855e77cb0 0000000000000046 ffff881050654e70 ffff880855e77fd8 [183373.131474] ffff880855e77fd8 ffff880855e77fd8 ffff881050654e70 ffff88103f2fcb48 [183373.131475] ffff88103f2fcb50 7fffffffffffffff ffff881050654e70 ffff88103f2fca14 [183373.131477] Call Trace: [183373.131479] [<ffffffff8168b579>] schedule+0x29/0x70 [183373.131481] [<ffffffff81688fc9>] schedule_timeout+0x239/0x2d0 [183373.131486] [<ffffffff8142821e>] ? __dev_printk+0x3e/0x90 [183373.131487] [<ffffffff814282cd>] ? dev_printk+0x5d/0x80 [183373.131490] [<ffffffff8168b956>] wait_for_completion+0x116/0x170 [183373.131492] [<ffffffff810c4ec0>] ? wake_up_state+0x20/0x20 [183373.131494] [<ffffffffa048c234>] bnx2fc_abts_cleanup+0x3d/0x62 [bnx2fc] [183373.131497] [<ffffffffa0483a80>] bnx2fc_eh_abort+0x470/0x580 [bnx2fc] [183373.131500] [<ffffffff814570af>] scsi_error_handler+0x59f/0x8b0 [183373.131501] [<ffffffff81456b10>] ? scsi_eh_get_sense+0x250/0x250 [183373.131503] [<ffffffff810b052f>] kthread+0xcf/0xe0 [183373.131505] [<ffffffff810b0460>] ? kthread_create_on_node+0x140/0x140 [183373.131507] [<ffffffff81696418>] ret_from_fork+0x58/0x90 [183373.131509] [<ffffffff810b0460>] ? kthread_create_on_node+0x140/0x140 Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com> Reviewed-by: Laurence Oberman <loberman@redhat.com> Tested-by: Laurence Oberman <loberman@redhat.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
1 parent 6363b3f commit faae19b

File tree

1 file changed

+32
-8
lines changed

1 file changed

+32
-8
lines changed

drivers/scsi/bnx2fc/bnx2fc_io.c

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,24 +1084,35 @@ static int bnx2fc_abts_cleanup(struct bnx2fc_cmd *io_req)
10841084
{
10851085
struct bnx2fc_rport *tgt = io_req->tgt;
10861086
int rc = SUCCESS;
1087+
unsigned int time_left;
10871088

10881089
io_req->wait_for_comp = 1;
10891090
bnx2fc_initiate_cleanup(io_req);
10901091

10911092
spin_unlock_bh(&tgt->tgt_lock);
10921093

1093-
wait_for_completion(&io_req->tm_done);
1094-
1094+
/*
1095+
* Can't wait forever on cleanup response lest we let the SCSI error
1096+
* handler wait forever
1097+
*/
1098+
time_left = wait_for_completion_timeout(&io_req->tm_done,
1099+
BNX2FC_FW_TIMEOUT);
10951100
io_req->wait_for_comp = 0;
1101+
if (!time_left)
1102+
BNX2FC_IO_DBG(io_req, "%s(): Wait for cleanup timed out.\n",
1103+
__func__);
1104+
10961105
/*
1097-
* release the reference taken in eh_abort to allow the
1098-
* target to re-login after flushing IOs
1106+
* Release reference held by SCSI command the cleanup completion
1107+
* hits the BNX2FC_CLEANUP case in bnx2fc_process_cq_compl() and
1108+
* thus the SCSI command is not returnedi by bnx2fc_scsi_done().
10991109
*/
11001110
kref_put(&io_req->refcount, bnx2fc_cmd_release);
11011111

11021112
spin_lock_bh(&tgt->tgt_lock);
11031113
return rc;
11041114
}
1115+
11051116
/**
11061117
* bnx2fc_eh_abort - eh_abort_handler api to abort an outstanding
11071118
* SCSI command
@@ -1118,6 +1129,7 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd)
11181129
struct fc_lport *lport;
11191130
struct bnx2fc_rport *tgt;
11201131
int rc;
1132+
unsigned int time_left;
11211133

11221134
rc = fc_block_scsi_eh(sc_cmd);
11231135
if (rc)
@@ -1194,14 +1206,19 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd)
11941206
if (cancel_delayed_work(&io_req->timeout_work))
11951207
kref_put(&io_req->refcount,
11961208
bnx2fc_cmd_release); /* drop timer hold */
1209+
/*
1210+
* We don't want to hold off the upper layer timer so simply
1211+
* cleanup the command and return that I/O was successfully
1212+
* aborted.
1213+
*/
11971214
rc = bnx2fc_abts_cleanup(io_req);
11981215
/* This only occurs when an task abort was requested while ABTS
11991216
is in progress. Setting the IO_CLEANUP flag will skip the
12001217
RRQ process in the case when the fw generated SCSI_CMD cmpl
12011218
was a result from the ABTS request rather than the CLEANUP
12021219
request */
12031220
set_bit(BNX2FC_FLAG_IO_CLEANUP, &io_req->req_flags);
1204-
goto out;
1221+
goto done;
12051222
}
12061223

12071224
/* Cancel the current timer running on this io_req */
@@ -1221,7 +1238,11 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd)
12211238
}
12221239
spin_unlock_bh(&tgt->tgt_lock);
12231240

1224-
wait_for_completion(&io_req->tm_done);
1241+
/* Wait 2 * RA_TOV + 1 to be sure timeout function hasn't fired */
1242+
time_left = wait_for_completion_timeout(&io_req->tm_done,
1243+
(2 * rp->r_a_tov + 1) * HZ);
1244+
if (time_left)
1245+
BNX2FC_IO_DBG(io_req, "Timed out in eh_abort waiting for tm_done");
12251246

12261247
spin_lock_bh(&tgt->tgt_lock);
12271248
io_req->wait_for_comp = 0;
@@ -1233,8 +1254,12 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd)
12331254
/* Let the scsi-ml try to recover this command */
12341255
printk(KERN_ERR PFX "abort failed, xid = 0x%x\n",
12351256
io_req->xid);
1257+
/*
1258+
* Cleanup firmware residuals before returning control back
1259+
* to SCSI ML.
1260+
*/
12361261
rc = bnx2fc_abts_cleanup(io_req);
1237-
goto out;
1262+
goto done;
12381263
} else {
12391264
/*
12401265
* We come here even when there was a race condition
@@ -1249,7 +1274,6 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd)
12491274
done:
12501275
/* release the reference taken in eh_abort */
12511276
kref_put(&io_req->refcount, bnx2fc_cmd_release);
1252-
out:
12531277
spin_unlock_bh(&tgt->tgt_lock);
12541278
return rc;
12551279
}

0 commit comments

Comments
 (0)