Skip to content

Commit a8b62fd

Browse files
author
Peter Zijlstra
committed
stop_machine: Add function and caller debug info
Crashes in stop-machine are hard to connect to the calling code, add a little something to help with that. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Valentin Schneider <valentin.schneider@arm.com> Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com> Link: https://lkml.kernel.org/r/20201023102346.116513635@infradead.org
1 parent 23859ae commit a8b62fd

File tree

4 files changed

+32
-3
lines changed

4 files changed

+32
-3
lines changed

include/linux/stop_machine.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg);
2424
struct cpu_stop_work {
2525
struct list_head list; /* cpu_stopper->works */
2626
cpu_stop_fn_t fn;
27+
unsigned long caller;
2728
void *arg;
2829
struct cpu_stop_done *done;
2930
};
@@ -36,6 +37,8 @@ void stop_machine_park(int cpu);
3637
void stop_machine_unpark(int cpu);
3738
void stop_machine_yield(const struct cpumask *cpumask);
3839

40+
extern void print_stop_info(const char *log_lvl, struct task_struct *task);
41+
3942
#else /* CONFIG_SMP */
4043

4144
#include <linux/workqueue.h>
@@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu,
8083
return false;
8184
}
8285

86+
static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { }
87+
8388
#endif /* CONFIG_SMP */
8489

8590
/*

kernel/sched/core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6447,6 +6447,7 @@ void sched_show_task(struct task_struct *p)
64476447
(unsigned long)task_thread_info(p)->flags);
64486448

64496449
print_worker_info(KERN_INFO, p);
6450+
print_stop_info(KERN_INFO, p);
64506451
show_stack(p, NULL, KERN_INFO);
64516452
put_task_stack(p);
64526453
}

kernel/stop_machine.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,27 @@ struct cpu_stopper {
4242
struct list_head works; /* list of pending works */
4343

4444
struct cpu_stop_work stop_work; /* for stop_cpus */
45+
unsigned long caller;
46+
cpu_stop_fn_t fn;
4547
};
4648

4749
static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
4850
static bool stop_machine_initialized = false;
4951

52+
void print_stop_info(const char *log_lvl, struct task_struct *task)
53+
{
54+
/*
55+
* If @task is a stopper task, it cannot migrate and task_cpu() is
56+
* stable.
57+
*/
58+
struct cpu_stopper *stopper = per_cpu_ptr(&cpu_stopper, task_cpu(task));
59+
60+
if (task != stopper->thread)
61+
return;
62+
63+
printk("%sStopper: %pS <- %pS\n", log_lvl, stopper->fn, (void *)stopper->caller);
64+
}
65+
5066
/* static data for stop_cpus */
5167
static DEFINE_MUTEX(stop_cpus_mutex);
5268
static bool stop_cpus_in_progress;
@@ -123,7 +139,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
123139
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
124140
{
125141
struct cpu_stop_done done;
126-
struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
142+
struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done, .caller = _RET_IP_ };
127143

128144
cpu_stop_init_done(&done, 1);
129145
if (!cpu_stop_queue_work(cpu, &work))
@@ -331,7 +347,8 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
331347
work1 = work2 = (struct cpu_stop_work){
332348
.fn = multi_cpu_stop,
333349
.arg = &msdata,
334-
.done = &done
350+
.done = &done,
351+
.caller = _RET_IP_,
335352
};
336353

337354
cpu_stop_init_done(&done, 2);
@@ -367,7 +384,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
367384
bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
368385
struct cpu_stop_work *work_buf)
369386
{
370-
*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
387+
*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, .caller = _RET_IP_, };
371388
return cpu_stop_queue_work(cpu, work_buf);
372389
}
373390

@@ -487,6 +504,8 @@ static void cpu_stopper_thread(unsigned int cpu)
487504
int ret;
488505

489506
/* cpu stop callbacks must not sleep, make in_atomic() == T */
507+
stopper->caller = work->caller;
508+
stopper->fn = fn;
490509
preempt_count_inc();
491510
ret = fn(arg);
492511
if (done) {
@@ -495,6 +514,8 @@ static void cpu_stopper_thread(unsigned int cpu)
495514
cpu_stop_signal_done(done);
496515
}
497516
preempt_count_dec();
517+
stopper->fn = NULL;
518+
stopper->caller = 0;
498519
WARN_ONCE(preempt_count(),
499520
"cpu_stop: %ps(%p) leaked preempt count\n", fn, arg);
500521
goto repeat;

lib/dump_stack.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/atomic.h>
1313
#include <linux/kexec.h>
1414
#include <linux/utsname.h>
15+
#include <linux/stop_machine.h>
1516

1617
static char dump_stack_arch_desc_str[128];
1718

@@ -57,6 +58,7 @@ void dump_stack_print_info(const char *log_lvl)
5758
log_lvl, dump_stack_arch_desc_str);
5859

5960
print_worker_info(log_lvl, current);
61+
print_stop_info(log_lvl, current);
6062
}
6163

6264
/**

0 commit comments

Comments
 (0)