Skip to content

Commit

Permalink
pkg/report: preserve trackback from other CPUs
Browse files Browse the repository at this point in the history
For some bug types kernel produces traceback of all CPUs
(which is presumably useful for these crashes).
However, we currently throw it away because it has context
different from the oops context.
Detect such traceback and preserve output.
  • Loading branch information
dvyukov committed May 10, 2019
1 parent e551e0c commit 4e7ecf0
Show file tree
Hide file tree
Showing 4 changed files with 873 additions and 4 deletions.
12 changes: 10 additions & 2 deletions pkg/report/linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type linux struct {
consoleOutputRe *regexp.Regexp
questionableRe *regexp.Regexp
taskContext *regexp.Regexp
cpuContext *regexp.Regexp
guiltyFileBlacklist []*regexp.Regexp
reportStartIgnores []*regexp.Regexp
infoMessagesWithStack [][]byte
Expand Down Expand Up @@ -55,6 +56,7 @@ func ctorLinux(target *targets.Target, kernelSrc, kernelObj string, ignores []*r
ctx.consoleOutputRe = regexp.MustCompile(`^(?:\*\* [0-9]+ printk messages dropped \*\* )?(?:.* login: )?(?:\<[0-9]+\>)?\[ *[0-9]+\.[0-9]+\](\[ *(?:C|T)[0-9]+\])? `)
ctx.questionableRe = regexp.MustCompile(`(\[\<[0-9a-f]+\>\])? \? +[a-zA-Z0-9_.]+\+0x[0-9a-f]+/[0-9a-f]+`)
ctx.taskContext = regexp.MustCompile(`\[ *T[0-9]+\]`)
ctx.cpuContext = regexp.MustCompile(`\[ *C[0-9]+\]`)
ctx.eoi = []byte("<EOI>")
ctx.guiltyFileBlacklist = []*regexp.Regexp{
regexp.MustCompile(`.*\.h`),
Expand Down Expand Up @@ -198,7 +200,7 @@ func (ctx *linux) findReport(output []byte, oops *oops, startPos int, context st
}
secondReportPos := 0
textLines := 0
skipText := false
skipText, cpuTraceback := false, false
for pos, next := 0, 0; pos < len(output); pos = next + 1 {
next = bytes.IndexByte(output[pos:], '\n')
if next != -1 {
Expand Down Expand Up @@ -238,13 +240,19 @@ func (ctx *linux) findReport(output []byte, oops *oops, startPos int, context st
}
}
}
if !oopsLine && (context1 != context || questionable) {
if !oopsLine && (questionable ||
context1 != context && (!cpuTraceback || !ctx.cpuContext.MatchString(context1))) {
continue
}
textLines++
skipLine := skipText
if bytes.Contains(line, []byte("Disabling lock debugging due to kernel taint")) {
skipLine = true
} else if bytes.Contains(line, []byte("Sending NMI from CPU")) {
// If we are doing traceback of all CPUs, then we also need to preserve output
// from other CPUs regardless of what is the current context.
// Otherwise we will throw traceback away because it does not match the oops context.
cpuTraceback = true
} else if textLines > 25 &&
(bytes.Contains(line, []byte("Kernel panic - not syncing")) ||
bytes.Contains(line, []byte("WARNING: possible circular locking dependency detected"))) {
Expand Down
203 changes: 202 additions & 1 deletion pkg/report/testdata/linux/report/349
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# TODO: we may want to include traceback from other CPUs into the report.
TITLE: INFO: task hung in jbd2_journal_commit_transaction

[ 1431.920513][ T1054] INFO: task jbd2/sda-8:3563 blocked for more than 140 seconds.
Expand Down Expand Up @@ -216,3 +215,205 @@ TITLE: INFO: task hung in jbd2_journal_commit_transaction
[ 1432.425305][ T1054] Kernel Offset: disabled
[ 1432.425916][ T1054] Rebooting in 86400 seconds..

REPORT:
INFO: task jbd2/sda-8:3563 blocked for more than 140 seconds.
Not tainted 4.20.0-next-20190102+ #5
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
jbd2/sda-8 D22600 3563 2 0x80000000
Call Trace:
__schedule+0x897/0x1e60
schedule+0xfe/0x350
io_schedule+0x1c/0x70
bit_wait_io+0x17/0x90
__wait_on_bit+0xb3/0x130
out_of_line_wait_on_bit+0x1f4/0x3b0
__wait_on_buffer+0x6f/0x90
jbd2_journal_commit_transaction+0x658a/0x8ee4
kjournald2+0x20f/0xb90
kthread+0x357/0x430
ret_from_fork+0x3a/0x50
INFO: task kworker/u12:5:1980 blocked for more than 140 seconds.
Not tainted 4.20.0-next-20190102+ #5
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/u12:5 D21864 1980 2 0x80000000
Workqueue: writeback wb_workfn (flush-8:0)
Call Trace:
__schedule+0x897/0x1e60
schedule+0xfe/0x350
io_schedule+0x1c/0x70
rq_qos_wait+0x2f2/0x600
wbt_wait+0x266/0x430
__rq_qos_throttle+0x58/0xb0
blk_mq_make_request+0x424/0x1e10
generic_make_request+0x902/0x17f0
submit_bio+0xba/0x480
ext4_io_submit+0x197/0x230
ext4_writepages+0x139d/0x4180
do_writepages+0x99/0x1a0
__writeback_single_inode+0x1c5/0x1620
writeback_sb_inodes+0x762/0x1260
__writeback_inodes_wb+0x16d/0x3d0
wb_writeback+0xa2d/0xf70
wb_workfn+0xccc/0x16f0
process_one_work+0xd0c/0x1ce0
worker_thread+0x143/0x14a0
kthread+0x357/0x430
ret_from_fork+0x3a/0x50
INFO: task syz-executor2:6677 blocked for more than 140 seconds.
Not tainted 4.20.0-next-20190102+ #5
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
syz-executor2 D19624 6677 22450 0x00000004
Call Trace:
__schedule+0x897/0x1e60
schedule+0xfe/0x350
io_schedule+0x1c/0x70
wait_on_page_bit_common+0xad5/0x1370
__filemap_fdatawait_range+0x614/0x890
file_write_and_wait_range+0xd1/0x100
ext4_sync_file+0x2da/0x14d0
vfs_fsync_range+0x144/0x230
ext4_file_write_iter+0x683/0x1400
do_iter_readv_writev+0x902/0xbc0
do_iter_write+0x184/0x610
vfs_iter_write+0x77/0xb0
iter_file_splice_write+0x885/0xfc0
direct_splice_actor+0x126/0x1a0
splice_direct_to_actor+0x3be/0x9d0
do_splice_direct+0x2c7/0x420
do_sendfile+0x61a/0xe60
__x64_sys_sendfile64+0x15a/0x240
do_syscall_64+0x1a3/0x800
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4576a9
Code: 3b 41 10 76 7d 48 81 ec 88 00 00 00 48 89 ac 24 80 00 00 00 48 8d ac 24 80 00 00 00 48 8b 59 20 48 85 db 75 67 48 8b b4 24 a0 <00> 00 00 8b 8c 24 a8 00 00 00 48 89 e7 f3 a4 48 8b 94 24 98 00 00
RSP: 002b:00007f66fbce3c88 EFLAGS: 00000246 ORIG_RAX: 0000000000000028
RAX: ffffffffffffffda RBX: 000000000071bf00 RCX: 00000000004576a9
RDX: 0000000020000040 RSI: 0000000000000005 RDI: 0000000000000005
RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000
R10: 00008080fffffffe R11: 0000000000000246 R12: 00007f66fbce46d4
R13: 00000000004abcac R14: 00000000006eb9b8 R15: 00000000ffffffff
INFO: task syz-executor2:6693 blocked for more than 140 seconds.
Not tainted 4.20.0-next-20190102+ #5
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
syz-executor2 D22200 6693 22450 0x00000004
Call Trace:
__schedule+0x897/0x1e60
schedule+0xfe/0x350
jbd2_log_wait_commit+0x320/0x4b0
jbd2_complete_transaction+0x18c/0x1e0
ext4_sync_file+0x1211/0x14d0
vfs_fsync_range+0x144/0x230
ext4_file_write_iter+0x683/0x1400
do_iter_readv_writev+0x902/0xbc0
do_iter_write+0x184/0x610
vfs_iter_write+0x77/0xb0
iter_file_splice_write+0x885/0xfc0
direct_splice_actor+0x126/0x1a0
splice_direct_to_actor+0x3be/0x9d0
do_splice_direct+0x2c7/0x420
do_sendfile+0x61a/0xe60
__x64_sys_sendfile64+0x15a/0x240
do_syscall_64+0x1a3/0x800
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4576a9
Code: 3b 41 10 76 7d 48 81 ec 88 00 00 00 48 89 ac 24 80 00 00 00 48 8d ac 24 80 00 00 00 48 8b 59 20 48 85 db 75 67 48 8b b4 24 a0 <00> 00 00 8b 8c 24 a8 00 00 00 48 89 e7 f3 a4 48 8b 94 24 98 00 00
RSP: 002b:00007f66fbcc2c88 EFLAGS: 00000246 ORIG_RAX: 0000000000000028
RAX: ffffffffffffffda RBX: 000000000071bfa0 RCX: 00000000004576a9
RDX: 0000000020000380 RSI: 0000000000000005 RDI: 0000000000000005
RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000
R10: 0001000000020000 R11: 0000000000000246 R12: 00007f66fbcc36d4
R13: 00000000004abcac R14: 00000000006eb9b8 R15: 00000000ffffffff

Showing all locks held in the system:
1 lock held by khungtaskd/1054:
#0: 00000000c542499d (rcu_read_lock){....}, at: debug_show_all_locks+0xc6/0x41d
1 lock held by rsyslogd/5859:
#0: 00000000567e80cf (&f->f_pos_lock){+.+.}, at: __fdget_pos+0x1b3/0x1f0
2 locks held by getty/5951:
#0: 000000003b0dfac2 (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x33/0x40
#1: 00000000281ebc43 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0x30a/0x1eb0
2 locks held by getty/5952:
#0: 00000000f0d48c91 (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x33/0x40
#1: 00000000cff3c1fb (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0x30a/0x1eb0
2 locks held by getty/5953:
#0: 00000000cb353a8f (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x33/0x40
#1: 00000000373ded25 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0x30a/0x1eb0
2 locks held by getty/5954:
#0: 000000005c8be537 (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x33/0x40
#1: 00000000d7aec815 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0x30a/0x1eb0
2 locks held by getty/5955:
#0: 00000000eda8138a (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x33/0x40
#1: 00000000f4fd60a6 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0x30a/0x1eb0
2 locks held by getty/5956:
#0: 000000000745b5fb (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x33/0x40
#1: 00000000a4c3442a (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0x30a/0x1eb0
2 locks held by getty/5957:
#0: 000000002cb7c49e (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x33/0x40
#1: 000000009258e26f (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0x30a/0x1eb0
4 locks held by kworker/u12:5/1980:
#0: 000000005e902bc8 ((wq_completion)"writeback"){+.+.}, at: process_one_work+0xbc7/0x1ce0
#1: 0000000056cd520a ((work_completion)(&(&wb->dwork)->work)){+.+.}, at: process_one_work+0xc1d/0x1ce0
#2: 00000000146ba37b (&type->s_umount_key#30){++++}, at: trylock_super+0x22/0x110
#3: 00000000fec9c2c2 (&sbi->s_journal_flag_rwsem){.+.+}, at: do_writepages+0x99/0x1a0
1 lock held by syz-executor2/6677:
#0: 000000007b67e744 (sb_writers#3){.+.+}, at: do_sendfile+0xad7/0xe60
1 lock held by syz-executor2/6693:
#0: 000000007b67e744 (sb_writers#3){.+.+}, at: do_sendfile+0xad7/0xe60

=============================================

NMI backtrace for cpu 3
CPU: 3 PID: 1054 Comm: khungtaskd Not tainted 4.20.0-next-20190102+ #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
Call Trace:
dump_stack+0x1db/0x2d0
nmi_cpu_backtrace.cold+0x63/0xa4
nmi_trigger_cpumask_backtrace+0x1be/0x236
arch_trigger_cpumask_backtrace+0x14/0x20
watchdog+0xbbb/0x1170
kthread+0x357/0x430
ret_from_fork+0x3a/0x50
Sending NMI from CPU 3 to CPUs 0-2:
NMI backtrace for cpu 0 skipped: idling at native_safe_halt+0x2/0x10
NMI backtrace for cpu 2 skipped: idling at native_safe_halt+0x2/0x10
NMI backtrace for cpu 1
CPU: 1 PID: 5857 Comm: rs:main Q:Reg Not tainted 4.20.0-next-20190102+ #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
RIP: 0010:unwind_next_frame+0x36/0x50
Code: fa 55 48 c1 ea 03 48 89 e5 48 83 ec 08 0f b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 0f 8b 17 31 c0 85 d2 74 05 <e8> 35 f5 ff ff c9 c3 48 89 7d f8 e8 aa 54 8e 00 48 8b 7d f8 eb e2
RSP: 0018:ffff88805f667100 EFLAGS: 00000202
RAX: 0000000000000000 RBX: ffff88805f6671b8 RCX: 000000000000000f
RDX: 0000000000000001 RSI: ffffffff8100c393 RDI: ffff88805f667118
RBP: ffff88805f667108 R08: 0000000000000001 R09: ffff88805f667168
R10: ffff88805f667140 R11: ffff88805f667150 R12: 0000000000000000
R13: 0000000000000000 R14: ffff88805c5d2500 R15: ffff8880664f37c0
FS: 00007fc30a025700(0000) GS:ffff88806c440000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f8d9477b190 CR3: 000000006300b001 CR4: 00000000001606e0
Call Trace:
__save_stack_trace+0x7a/0xf0
save_stack_trace+0x1a/0x20
save_stack+0x45/0xd0
kasan_kmalloc+0xcf/0xe0
kasan_slab_alloc+0xf/0x20
kmem_cache_alloc+0x12d/0x710
jbd2__journal_start+0x1e4/0xa80
__ext4_journal_start_sb+0x1a7/0x630
ext4_da_write_begin+0x4ee/0x12d0
generic_perform_write+0x2a0/0x6b0
__generic_file_write_iter+0x25e/0x630
ext4_file_write_iter+0x381/0x1400
__vfs_write+0x764/0xb40
vfs_write+0x20c/0x580
ksys_write+0x105/0x260
__x64_sys_write+0x73/0xb0
do_syscall_64+0x1a3/0x800
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7fc30ba8319d
Code: d1 20 00 00 75 10 b8 01 00 00 00 0f 05 48 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be fa ff ff 48 89 04 24 b8 01 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 07 fb ff ff 48 89 d0 48 83 c4 08 48 3d 01
RSP: 002b:00007fc30a023f90 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000400 RCX: 00007fc30ba8319d
RDX: 0000000000000400 RSI: 00000000008d3ce0 RDI: 0000000000000002
RBP: 00000000008d3ce0 R08: 00000000008d408d R09: 00007fc30b3ffe07
R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000000
R13: 00007fc30a024410 R14: 00007fc304002a50 R15: 00000000008d3ac0
Loading

0 comments on commit 4e7ecf0

Please sign in to comment.