Skip to content

Commit 6b08fe7

Browse files
Merge patch series "target: Remove atomics from main IO path"
Mike Christie <michael.christie@oracle.com> says: The following patches made over Linus's tree remove the atomic use from the main IO path. There was a handful of atomic_longs used just used for stats and a couple atomics used for handling ordered commands. These patches move the stats to per cpu, and moves the ordered tracking to a per cpu counter. With the patches 8K IOPS increases by up to 33% when running fio with numjobs >= 4 and using the vhost-scsi target with virtio-scsi and virtio num_queues >= 4 (jobs and queues match, and virtqueue_size and cmd_per_lun are increased to match the total iodepth of all jobs). Link: https://lore.kernel.org/r/20250424032741.16216-1-michael.christie@oracle.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2 parents a0d1cf5 + 268975a commit 6b08fe7

File tree

4 files changed

+204
-97
lines changed

4 files changed

+204
-97
lines changed

drivers/target/target_core_device.c

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,14 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd)
5555
rcu_read_lock();
5656
deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun);
5757
if (deve) {
58-
atomic_long_inc(&deve->total_cmds);
58+
this_cpu_inc(deve->stats->total_cmds);
5959

6060
if (se_cmd->data_direction == DMA_TO_DEVICE)
61-
atomic_long_add(se_cmd->data_length,
62-
&deve->write_bytes);
61+
this_cpu_add(deve->stats->write_bytes,
62+
se_cmd->data_length);
6363
else if (se_cmd->data_direction == DMA_FROM_DEVICE)
64-
atomic_long_add(se_cmd->data_length,
65-
&deve->read_bytes);
64+
this_cpu_add(deve->stats->read_bytes,
65+
se_cmd->data_length);
6666

6767
if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
6868
deve->lun_access_ro) {
@@ -126,14 +126,14 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd)
126126
* target_core_fabric_configfs.c:target_fabric_port_release
127127
*/
128128
se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev);
129-
atomic_long_inc(&se_cmd->se_dev->num_cmds);
129+
this_cpu_inc(se_cmd->se_dev->stats->total_cmds);
130130

131131
if (se_cmd->data_direction == DMA_TO_DEVICE)
132-
atomic_long_add(se_cmd->data_length,
133-
&se_cmd->se_dev->write_bytes);
132+
this_cpu_add(se_cmd->se_dev->stats->write_bytes,
133+
se_cmd->data_length);
134134
else if (se_cmd->data_direction == DMA_FROM_DEVICE)
135-
atomic_long_add(se_cmd->data_length,
136-
&se_cmd->se_dev->read_bytes);
135+
this_cpu_add(se_cmd->se_dev->stats->read_bytes,
136+
se_cmd->data_length);
137137

138138
return ret;
139139
}
@@ -322,13 +322,20 @@ int core_enable_device_list_for_node(
322322
struct se_portal_group *tpg)
323323
{
324324
struct se_dev_entry *orig, *new;
325+
int ret = 0;
325326

326327
new = kzalloc(sizeof(*new), GFP_KERNEL);
327328
if (!new) {
328329
pr_err("Unable to allocate se_dev_entry memory\n");
329330
return -ENOMEM;
330331
}
331332

333+
new->stats = alloc_percpu(struct se_dev_entry_io_stats);
334+
if (!new->stats) {
335+
ret = -ENOMEM;
336+
goto free_deve;
337+
}
338+
332339
spin_lock_init(&new->ua_lock);
333340
INIT_LIST_HEAD(&new->ua_list);
334341
INIT_LIST_HEAD(&new->lun_link);
@@ -351,17 +358,17 @@ int core_enable_device_list_for_node(
351358
" for dynamic -> explicit NodeACL conversion:"
352359
" %s\n", nacl->initiatorname);
353360
mutex_unlock(&nacl->lun_entry_mutex);
354-
kfree(new);
355-
return -EINVAL;
361+
ret = -EINVAL;
362+
goto free_stats;
356363
}
357364
if (orig->se_lun_acl != NULL) {
358365
pr_warn_ratelimited("Detected existing explicit"
359366
" se_lun_acl->se_lun_group reference for %s"
360367
" mapped_lun: %llu, failing\n",
361368
nacl->initiatorname, mapped_lun);
362369
mutex_unlock(&nacl->lun_entry_mutex);
363-
kfree(new);
364-
return -EINVAL;
370+
ret = -EINVAL;
371+
goto free_stats;
365372
}
366373

367374
new->se_lun = lun;
@@ -394,6 +401,20 @@ int core_enable_device_list_for_node(
394401

395402
target_luns_data_has_changed(nacl, new, true);
396403
return 0;
404+
405+
free_stats:
406+
free_percpu(new->stats);
407+
free_deve:
408+
kfree(new);
409+
return ret;
410+
}
411+
412+
static void target_free_dev_entry(struct rcu_head *head)
413+
{
414+
struct se_dev_entry *deve = container_of(head, struct se_dev_entry,
415+
rcu_head);
416+
free_percpu(deve->stats);
417+
kfree(deve);
397418
}
398419

399420
void core_disable_device_list_for_node(
@@ -443,7 +464,7 @@ void core_disable_device_list_for_node(
443464
kref_put(&orig->pr_kref, target_pr_kref_release);
444465
wait_for_completion(&orig->pr_comp);
445466

446-
kfree_rcu(orig, rcu_head);
467+
call_rcu(&orig->rcu_head, target_free_dev_entry);
447468

448469
core_scsi3_free_pr_reg_from_nacl(dev, nacl);
449470
target_luns_data_has_changed(nacl, NULL, false);
@@ -679,6 +700,18 @@ static void scsi_dump_inquiry(struct se_device *dev)
679700
pr_debug(" Type: %s ", scsi_device_type(device_type));
680701
}
681702

703+
static void target_non_ordered_release(struct percpu_ref *ref)
704+
{
705+
struct se_device *dev = container_of(ref, struct se_device,
706+
non_ordered);
707+
unsigned long flags;
708+
709+
spin_lock_irqsave(&dev->delayed_cmd_lock, flags);
710+
if (!list_empty(&dev->delayed_cmd_list))
711+
schedule_work(&dev->delayed_cmd_work);
712+
spin_unlock_irqrestore(&dev->delayed_cmd_lock, flags);
713+
}
714+
682715
struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
683716
{
684717
struct se_device *dev;
@@ -689,11 +722,13 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
689722
if (!dev)
690723
return NULL;
691724

725+
dev->stats = alloc_percpu(struct se_dev_io_stats);
726+
if (!dev->stats)
727+
goto free_device;
728+
692729
dev->queues = kcalloc(nr_cpu_ids, sizeof(*dev->queues), GFP_KERNEL);
693-
if (!dev->queues) {
694-
hba->backend->ops->free_device(dev);
695-
return NULL;
696-
}
730+
if (!dev->queues)
731+
goto free_stats;
697732

698733
dev->queue_cnt = nr_cpu_ids;
699734
for (i = 0; i < dev->queue_cnt; i++) {
@@ -707,6 +742,10 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
707742
INIT_WORK(&q->sq.work, target_queued_submit_work);
708743
}
709744

745+
if (percpu_ref_init(&dev->non_ordered, target_non_ordered_release,
746+
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
747+
goto free_queues;
748+
710749
dev->se_hba = hba;
711750
dev->transport = hba->backend->ops;
712751
dev->transport_flags = dev->transport->transport_flags_default;
@@ -791,6 +830,14 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
791830
sizeof(dev->t10_wwn.revision));
792831

793832
return dev;
833+
834+
free_queues:
835+
kfree(dev->queues);
836+
free_stats:
837+
free_percpu(dev->stats);
838+
free_device:
839+
hba->backend->ops->free_device(dev);
840+
return NULL;
794841
}
795842

796843
/*
@@ -980,6 +1027,9 @@ void target_free_device(struct se_device *dev)
9801027

9811028
WARN_ON(!list_empty(&dev->dev_sep_list));
9821029

1030+
percpu_ref_exit(&dev->non_ordered);
1031+
cancel_work_sync(&dev->delayed_cmd_work);
1032+
9831033
if (target_dev_configured(dev)) {
9841034
dev->transport->destroy_device(dev);
9851035

@@ -1001,6 +1051,7 @@ void target_free_device(struct se_device *dev)
10011051
dev->transport->free_prot(dev);
10021052

10031053
kfree(dev->queues);
1054+
free_percpu(dev->stats);
10041055
dev->transport->free_device(dev);
10051056
}
10061057

drivers/target/target_core_stat.c

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -280,30 +280,51 @@ static ssize_t target_stat_lu_num_cmds_show(struct config_item *item,
280280
char *page)
281281
{
282282
struct se_device *dev = to_stat_lu_dev(item);
283+
struct se_dev_io_stats *stats;
284+
unsigned int cpu;
285+
u32 cmds = 0;
286+
287+
for_each_possible_cpu(cpu) {
288+
stats = per_cpu_ptr(dev->stats, cpu);
289+
cmds += stats->total_cmds;
290+
}
283291

284292
/* scsiLuNumCommands */
285-
return snprintf(page, PAGE_SIZE, "%lu\n",
286-
atomic_long_read(&dev->num_cmds));
293+
return snprintf(page, PAGE_SIZE, "%u\n", cmds);
287294
}
288295

289296
static ssize_t target_stat_lu_read_mbytes_show(struct config_item *item,
290297
char *page)
291298
{
292299
struct se_device *dev = to_stat_lu_dev(item);
300+
struct se_dev_io_stats *stats;
301+
unsigned int cpu;
302+
u32 bytes = 0;
303+
304+
for_each_possible_cpu(cpu) {
305+
stats = per_cpu_ptr(dev->stats, cpu);
306+
bytes += stats->read_bytes;
307+
}
293308

294309
/* scsiLuReadMegaBytes */
295-
return snprintf(page, PAGE_SIZE, "%lu\n",
296-
atomic_long_read(&dev->read_bytes) >> 20);
310+
return snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20);
297311
}
298312

299313
static ssize_t target_stat_lu_write_mbytes_show(struct config_item *item,
300314
char *page)
301315
{
302316
struct se_device *dev = to_stat_lu_dev(item);
317+
struct se_dev_io_stats *stats;
318+
unsigned int cpu;
319+
u32 bytes = 0;
320+
321+
for_each_possible_cpu(cpu) {
322+
stats = per_cpu_ptr(dev->stats, cpu);
323+
bytes += stats->write_bytes;
324+
}
303325

304326
/* scsiLuWrittenMegaBytes */
305-
return snprintf(page, PAGE_SIZE, "%lu\n",
306-
atomic_long_read(&dev->write_bytes) >> 20);
327+
return snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20);
307328
}
308329

309330
static ssize_t target_stat_lu_resets_show(struct config_item *item, char *page)
@@ -1019,18 +1040,26 @@ static ssize_t target_stat_auth_num_cmds_show(struct config_item *item,
10191040
{
10201041
struct se_lun_acl *lacl = auth_to_lacl(item);
10211042
struct se_node_acl *nacl = lacl->se_lun_nacl;
1043+
struct se_dev_entry_io_stats *stats;
10221044
struct se_dev_entry *deve;
1045+
unsigned int cpu;
10231046
ssize_t ret;
1047+
u32 cmds = 0;
10241048

10251049
rcu_read_lock();
10261050
deve = target_nacl_find_deve(nacl, lacl->mapped_lun);
10271051
if (!deve) {
10281052
rcu_read_unlock();
10291053
return -ENODEV;
10301054
}
1055+
1056+
for_each_possible_cpu(cpu) {
1057+
stats = per_cpu_ptr(deve->stats, cpu);
1058+
cmds += stats->total_cmds;
1059+
}
1060+
10311061
/* scsiAuthIntrOutCommands */
1032-
ret = snprintf(page, PAGE_SIZE, "%lu\n",
1033-
atomic_long_read(&deve->total_cmds));
1062+
ret = snprintf(page, PAGE_SIZE, "%u\n", cmds);
10341063
rcu_read_unlock();
10351064
return ret;
10361065
}
@@ -1040,18 +1069,26 @@ static ssize_t target_stat_auth_read_mbytes_show(struct config_item *item,
10401069
{
10411070
struct se_lun_acl *lacl = auth_to_lacl(item);
10421071
struct se_node_acl *nacl = lacl->se_lun_nacl;
1072+
struct se_dev_entry_io_stats *stats;
10431073
struct se_dev_entry *deve;
1074+
unsigned int cpu;
10441075
ssize_t ret;
1076+
u32 bytes = 0;
10451077

10461078
rcu_read_lock();
10471079
deve = target_nacl_find_deve(nacl, lacl->mapped_lun);
10481080
if (!deve) {
10491081
rcu_read_unlock();
10501082
return -ENODEV;
10511083
}
1084+
1085+
for_each_possible_cpu(cpu) {
1086+
stats = per_cpu_ptr(deve->stats, cpu);
1087+
bytes += stats->read_bytes;
1088+
}
1089+
10521090
/* scsiAuthIntrReadMegaBytes */
1053-
ret = snprintf(page, PAGE_SIZE, "%u\n",
1054-
(u32)(atomic_long_read(&deve->read_bytes) >> 20));
1091+
ret = snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20);
10551092
rcu_read_unlock();
10561093
return ret;
10571094
}
@@ -1061,18 +1098,26 @@ static ssize_t target_stat_auth_write_mbytes_show(struct config_item *item,
10611098
{
10621099
struct se_lun_acl *lacl = auth_to_lacl(item);
10631100
struct se_node_acl *nacl = lacl->se_lun_nacl;
1101+
struct se_dev_entry_io_stats *stats;
10641102
struct se_dev_entry *deve;
1103+
unsigned int cpu;
10651104
ssize_t ret;
1105+
u32 bytes = 0;
10661106

10671107
rcu_read_lock();
10681108
deve = target_nacl_find_deve(nacl, lacl->mapped_lun);
10691109
if (!deve) {
10701110
rcu_read_unlock();
10711111
return -ENODEV;
10721112
}
1113+
1114+
for_each_possible_cpu(cpu) {
1115+
stats = per_cpu_ptr(deve->stats, cpu);
1116+
bytes += stats->write_bytes;
1117+
}
1118+
10731119
/* scsiAuthIntrWrittenMegaBytes */
1074-
ret = snprintf(page, PAGE_SIZE, "%u\n",
1075-
(u32)(atomic_long_read(&deve->write_bytes) >> 20));
1120+
ret = snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20);
10761121
rcu_read_unlock();
10771122
return ret;
10781123
}

0 commit comments

Comments
 (0)