Skip to content

Commit 91f3623

Browse files
BernardMetzlerrleon
authored andcommitted
RDMA/siw: Fix tx thread initialization.
Immediately removing the siw module after insertion may crash in siw_stop_tx_thread(), if the according thread did not yet had a chance to initialize its wait queue and siw_stop_tx_thread() tries to wakeup that thread. Initializing the threads state before spwaning it fixes it. Reported-by: Guoqing Jiang <guoqing.jiang@linux.dev> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> Link: https://lore.kernel.org/r/20230728114418.124328-1-bmt@zurich.ibm.com Tested-by: Guoqing Jiang <guoqing.jiang@linux.dev> Signed-off-by: Leon Romanovsky <leon@kernel.org>
1 parent a45e5f1 commit 91f3623

File tree

3 files changed

+43
-44
lines changed

3 files changed

+43
-44
lines changed

drivers/infiniband/sw/siw/siw.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,11 +531,12 @@ void siw_qp_llp_data_ready(struct sock *sk);
531531
void siw_qp_llp_write_space(struct sock *sk);
532532

533533
/* QP TX path functions */
534+
int siw_create_tx_threads(void);
535+
void siw_stop_tx_threads(void);
534536
int siw_run_sq(void *arg);
535537
int siw_qp_sq_process(struct siw_qp *qp);
536538
int siw_sq_start(struct siw_qp *qp);
537539
int siw_activate_tx(struct siw_qp *qp);
538-
void siw_stop_tx_thread(int nr_cpu);
539540
int siw_get_tx_cpu(struct siw_device *sdev);
540541
void siw_put_tx_cpu(int cpu);
541542

drivers/infiniband/sw/siw/siw_main.c

Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -87,29 +87,6 @@ static void siw_device_cleanup(struct ib_device *base_dev)
8787
xa_destroy(&sdev->mem_xa);
8888
}
8989

90-
static int siw_create_tx_threads(void)
91-
{
92-
int cpu, assigned = 0;
93-
94-
for_each_online_cpu(cpu) {
95-
/* Skip HT cores */
96-
if (cpu % cpumask_weight(topology_sibling_cpumask(cpu)))
97-
continue;
98-
99-
siw_tx_thread[cpu] =
100-
kthread_run_on_cpu(siw_run_sq,
101-
(unsigned long *)(long)cpu,
102-
cpu, "siw_tx/%u");
103-
if (IS_ERR(siw_tx_thread[cpu])) {
104-
siw_tx_thread[cpu] = NULL;
105-
continue;
106-
}
107-
108-
assigned++;
109-
}
110-
return assigned;
111-
}
112-
11390
static int siw_dev_qualified(struct net_device *netdev)
11491
{
11592
/*
@@ -529,7 +506,6 @@ static struct rdma_link_ops siw_link_ops = {
529506
static __init int siw_init_module(void)
530507
{
531508
int rv;
532-
int nr_cpu;
533509

534510
if (SENDPAGE_THRESH < SIW_MAX_INLINE) {
535511
pr_info("siw: sendpage threshold too small: %u\n",
@@ -574,12 +550,8 @@ static __init int siw_init_module(void)
574550
return 0;
575551

576552
out_error:
577-
for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) {
578-
if (siw_tx_thread[nr_cpu]) {
579-
siw_stop_tx_thread(nr_cpu);
580-
siw_tx_thread[nr_cpu] = NULL;
581-
}
582-
}
553+
siw_stop_tx_threads();
554+
583555
if (siw_crypto_shash)
584556
crypto_free_shash(siw_crypto_shash);
585557

@@ -593,14 +565,8 @@ static __init int siw_init_module(void)
593565

594566
static void __exit siw_exit_module(void)
595567
{
596-
int cpu;
568+
siw_stop_tx_threads();
597569

598-
for_each_possible_cpu(cpu) {
599-
if (siw_tx_thread[cpu]) {
600-
siw_stop_tx_thread(cpu);
601-
siw_tx_thread[cpu] = NULL;
602-
}
603-
}
604570
unregister_netdevice_notifier(&siw_netdev_nb);
605571
rdma_link_unregister(&siw_link_ops);
606572
ib_unregister_driver(RDMA_DRIVER_SIW);

drivers/infiniband/sw/siw/siw_qp_tx.c

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,10 +1208,45 @@ struct tx_task_t {
12081208

12091209
static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g);
12101210

1211-
void siw_stop_tx_thread(int nr_cpu)
1211+
int siw_create_tx_threads(void)
12121212
{
1213-
kthread_stop(siw_tx_thread[nr_cpu]);
1214-
wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting);
1213+
int cpu, assigned = 0;
1214+
1215+
for_each_online_cpu(cpu) {
1216+
struct tx_task_t *tx_task;
1217+
1218+
/* Skip HT cores */
1219+
if (cpu % cpumask_weight(topology_sibling_cpumask(cpu)))
1220+
continue;
1221+
1222+
tx_task = &per_cpu(siw_tx_task_g, cpu);
1223+
init_llist_head(&tx_task->active);
1224+
init_waitqueue_head(&tx_task->waiting);
1225+
1226+
siw_tx_thread[cpu] =
1227+
kthread_run_on_cpu(siw_run_sq,
1228+
(unsigned long *)(long)cpu,
1229+
cpu, "siw_tx/%u");
1230+
if (IS_ERR(siw_tx_thread[cpu])) {
1231+
siw_tx_thread[cpu] = NULL;
1232+
continue;
1233+
}
1234+
assigned++;
1235+
}
1236+
return assigned;
1237+
}
1238+
1239+
void siw_stop_tx_threads(void)
1240+
{
1241+
int cpu;
1242+
1243+
for_each_possible_cpu(cpu) {
1244+
if (siw_tx_thread[cpu]) {
1245+
kthread_stop(siw_tx_thread[cpu]);
1246+
wake_up(&per_cpu(siw_tx_task_g, cpu).waiting);
1247+
siw_tx_thread[cpu] = NULL;
1248+
}
1249+
}
12151250
}
12161251

12171252
int siw_run_sq(void *data)
@@ -1221,9 +1256,6 @@ int siw_run_sq(void *data)
12211256
struct siw_qp *qp;
12221257
struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu);
12231258

1224-
init_llist_head(&tx_task->active);
1225-
init_waitqueue_head(&tx_task->waiting);
1226-
12271259
while (1) {
12281260
struct llist_node *fifo_list = NULL;
12291261

0 commit comments

Comments
 (0)