Skip to content
/ linux Public

Commit ce22aae

Browse files
damien-lemoalgregkh
authored andcommitted
ata: libata-scsi: avoid Non-NCQ command starvation
[ Upstream commit 0ea8408 ] When a non-NCQ command is issued while NCQ commands are being executed, ata_scsi_qc_issue() indicates to the SCSI layer that the command issuing should be deferred by returning SCSI_MLQUEUE_XXX_BUSY. This command deferring is correct and as mandated by the ACS specifications since NCQ and non-NCQ commands cannot be mixed. However, in the case of a host adapter using multiple submission queues, when the target device is under a constant load of NCQ commands, there are no guarantees that requeueing the non-NCQ command will be executed later and it may be deferred again repeatedly as other submission queues can constantly issue NCQ commands from different CPUs ahead of the non-NCQ command. This can lead to very long delays for the execution of non-NCQ commands, and even complete starvation for these commands in the worst case scenario. Since the block layer and the SCSI layer do not distinguish between queueable (NCQ) and non queueable (non-NCQ) commands, libata-scsi SAT implementation must ensure forward progress for non-NCQ commands in the presence of NCQ command traffic. This is similar to what SAS HBAs with a hardware/firmware based SAT implementation do. Implement such forward progress guarantee by limiting requeueing of non-NCQ commands from ata_scsi_qc_issue(): when a non-NCQ command is received and NCQ commands are in-flight, do not force a requeue of the non-NCQ command by returning SCSI_MLQUEUE_XXX_BUSY and instead return 0 to indicate that the command was accepted but hold on to the qc using the new deferred_qc field of struct ata_port. This deferred qc will be issued using the work item deferred_qc_work running the function ata_scsi_deferred_qc_work() once all in-flight commands complete, which is checked with the port qc_defer() callback return value indicating that no further delay is necessary. This check is done using the helper function ata_scsi_schedule_deferred_qc() which is called from ata_scsi_qc_complete(). This thus excludes this mechanism from all internal non-NCQ commands issued by ATA EH. When a port deferred_qc is non NULL, that is, the port has a command waiting for the device queue to drain, the issuing of all incoming commands (both NCQ and non-NCQ) is deferred using the regular busy mechanism. This simplifies the code and also avoids potential denial of service problems if a user issues too many non-NCQ commands. Finally, whenever ata EH is scheduled, regardless of the reason, a deferred qc is always requeued so that it can be retried once EH completes. This is done by calling the function ata_scsi_requeue_deferred_qc() from ata_eh_set_pending(). This avoids the need for any special processing for the deferred qc in case of NCQ error, link or device reset, or device timeout. Reported-by: Xingui Yang <yangxingui@huawei.com> Reported-by: Igor Pylypiv <ipylypiv@google.com> Fixes: bdb0130 ("scsi: Add host and host template flag 'host_tagset'") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal <dlemoal@kernel.org> Reviewed-by: Niklas Cassel <cassel@kernel.org> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: John Garry <john.g.garry@oracle.com> Tested-by: Igor Pylypiv <ipylypiv@google.com> Tested-by: Xingui Yang <yangxingui@huawei.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent e44a354 commit ce22aae

File tree

5 files changed

+109
-0
lines changed

5 files changed

+109
-0
lines changed

drivers/ata/libata-core.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5521,6 +5521,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
55215521
mutex_init(&ap->scsi_scan_mutex);
55225522
INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
55235523
INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
5524+
INIT_WORK(&ap->deferred_qc_work, ata_scsi_deferred_qc_work);
55245525
INIT_LIST_HEAD(&ap->eh_done_q);
55255526
init_waitqueue_head(&ap->eh_wait_q);
55265527
init_completion(&ap->park_req_pending);
@@ -6131,6 +6132,10 @@ static void ata_port_detach(struct ata_port *ap)
61316132
}
61326133
}
61336134

6135+
/* Make sure the deferred qc work finished. */
6136+
cancel_work_sync(&ap->deferred_qc_work);
6137+
WARN_ON(ap->deferred_qc);
6138+
61346139
/* Tell EH to disable all devices */
61356140
ap->pflags |= ATA_PFLAG_UNLOADING;
61366141
ata_port_schedule_eh(ap);

drivers/ata/libata-eh.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,12 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
920920

921921
ap->pflags |= ATA_PFLAG_EH_PENDING;
922922

923+
/*
924+
* If we have a deferred qc, requeue it so that it is retried once EH
925+
* completes.
926+
*/
927+
ata_scsi_requeue_deferred_qc(ap);
928+
923929
if (!fastdrain)
924930
return;
925931

drivers/ata/libata-scsi.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,8 +1671,77 @@ static void ata_qc_done(struct ata_queued_cmd *qc)
16711671
done(cmd);
16721672
}
16731673

1674+
void ata_scsi_deferred_qc_work(struct work_struct *work)
1675+
{
1676+
struct ata_port *ap =
1677+
container_of(work, struct ata_port, deferred_qc_work);
1678+
struct ata_queued_cmd *qc;
1679+
unsigned long flags;
1680+
1681+
spin_lock_irqsave(ap->lock, flags);
1682+
1683+
/*
1684+
* If we still have a deferred qc and we are not in EH, issue it. In
1685+
* such case, we should not need any more deferring the qc, so warn if
1686+
* qc_defer() says otherwise.
1687+
*/
1688+
qc = ap->deferred_qc;
1689+
if (qc && !ata_port_eh_scheduled(ap)) {
1690+
WARN_ON_ONCE(ap->ops->qc_defer(qc));
1691+
ap->deferred_qc = NULL;
1692+
ata_qc_issue(qc);
1693+
}
1694+
1695+
spin_unlock_irqrestore(ap->lock, flags);
1696+
}
1697+
1698+
void ata_scsi_requeue_deferred_qc(struct ata_port *ap)
1699+
{
1700+
struct ata_queued_cmd *qc = ap->deferred_qc;
1701+
struct scsi_cmnd *scmd;
1702+
1703+
lockdep_assert_held(ap->lock);
1704+
1705+
/*
1706+
* If we have a deferred qc when a reset occurs or NCQ commands fail,
1707+
* do not try to be smart about what to do with this deferred command
1708+
* and simply retry it by completing it with DID_SOFT_ERROR.
1709+
*/
1710+
if (!qc)
1711+
return;
1712+
1713+
scmd = qc->scsicmd;
1714+
ap->deferred_qc = NULL;
1715+
ata_qc_free(qc);
1716+
scmd->result = (DID_SOFT_ERROR << 16);
1717+
scsi_done(scmd);
1718+
}
1719+
1720+
static void ata_scsi_schedule_deferred_qc(struct ata_port *ap)
1721+
{
1722+
struct ata_queued_cmd *qc = ap->deferred_qc;
1723+
1724+
lockdep_assert_held(ap->lock);
1725+
1726+
/*
1727+
* If we have a deferred qc, then qc_defer() is defined and we can use
1728+
* this callback to determine if this qc is good to go, unless EH has
1729+
* been scheduled.
1730+
*/
1731+
if (!qc)
1732+
return;
1733+
1734+
if (ata_port_eh_scheduled(ap)) {
1735+
ata_scsi_requeue_deferred_qc(ap);
1736+
return;
1737+
}
1738+
if (!ap->ops->qc_defer(qc))
1739+
queue_work(system_highpri_wq, &ap->deferred_qc_work);
1740+
}
1741+
16741742
static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
16751743
{
1744+
struct ata_port *ap = qc->ap;
16761745
struct scsi_cmnd *cmd = qc->scsicmd;
16771746
u8 *cdb = cmd->cmnd;
16781747
bool have_sense = qc->flags & ATA_QCFLAG_SENSE_VALID;
@@ -1700,6 +1769,8 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
17001769
}
17011770

17021771
ata_qc_done(qc);
1772+
1773+
ata_scsi_schedule_deferred_qc(ap);
17031774
}
17041775

17051776
static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc)
@@ -1709,6 +1780,16 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc)
17091780
if (!ap->ops->qc_defer)
17101781
goto issue;
17111782

1783+
/*
1784+
* If we already have a deferred qc, then rely on the SCSI layer to
1785+
* requeue and defer all incoming commands until the deferred qc is
1786+
* processed, once all on-going commands complete.
1787+
*/
1788+
if (ap->deferred_qc) {
1789+
ata_qc_free(qc);
1790+
return SCSI_MLQUEUE_DEVICE_BUSY;
1791+
}
1792+
17121793
/* Check if the command needs to be deferred. */
17131794
ret = ap->ops->qc_defer(qc);
17141795
switch (ret) {
@@ -1727,6 +1808,18 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc)
17271808
}
17281809

17291810
if (ret) {
1811+
/*
1812+
* We must defer this qc: if this is not an NCQ command, keep
1813+
* this qc as a deferred one and report to the SCSI layer that
1814+
* we issued it so that it is not requeued. The deferred qc will
1815+
* be issued with the port deferred_qc_work once all on-going
1816+
* commands complete.
1817+
*/
1818+
if (!ata_is_ncq(qc->tf.protocol)) {
1819+
ap->deferred_qc = qc;
1820+
return 0;
1821+
}
1822+
17301823
/* Force a requeue of the command to defer its execution. */
17311824
ata_qc_free(qc);
17321825
return ret;

drivers/ata/libata.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ void ata_scsi_sdev_config(struct scsi_device *sdev);
166166
int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
167167
struct ata_device *dev);
168168
int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev);
169+
void ata_scsi_deferred_qc_work(struct work_struct *work);
170+
void ata_scsi_requeue_deferred_qc(struct ata_port *ap);
169171

170172
/* libata-eh.c */
171173
extern unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);

include/linux/libata.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,9 @@ struct ata_port {
897897
u64 qc_active;
898898
int nr_active_links; /* #links with active qcs */
899899

900+
struct work_struct deferred_qc_work;
901+
struct ata_queued_cmd *deferred_qc;
902+
900903
struct ata_link link; /* host default link */
901904
struct ata_link *slave_link; /* see ata_slave_link_init() */
902905

0 commit comments

Comments
 (0)