Skip to content

Commit

Permalink
Verifying race exists for mmap on master
Browse files Browse the repository at this point in the history
After doing some tracing on mmap using the O_DIRECT code, I decided to
test out the mmap test case just on master to see if there existed a
race condition that is outside of O_DIRECT. There does exist a race
condition which is shown in the trace below. We are hitting the same
ASSERT that the page should be up to date in mappedread().

The only difference in the test case that is run on the O_DIRECT branch
is it use `direct=1` for fio. However, I changed this to `direct=0` to
make sure we are always going down the buffered path. The reason I did
this my O_DIRECT stack traces were showing we only hit this in the first
write of O_DIRECT while growing the file (that is always sent to the
ARC).

[12039.760554] VERIFY(PageUptodate(pp)) failed
[12039.764745] PANIC at zfs_vnops_os.c:298:mappedread()
[12039.769712] Showing stack for process 1125719
[12039.774071] CPU: 19 PID: 1125719 Comm: fio Kdump: loaded Tainted: P
OE    --------- -  - 4.18.0-408.el8.x86_64 #1
[12039.785193] Hardware name: GIGABYTE R272-Z32-00/MZ32-AR0-00, BIOS R21
10/08/2020
[12039.792577] Call Trace:
[12039.795024]  dump_stack+0x41/0x60
[12039.798343]  spl_panic+0xd0/0xe8 [spl]
[12039.802103]  ? _cond_resched+0x15/0x30
[12039.805854]  ? mutex_lock+0xe/0x30
[12039.809253]  ? arc_cksum_compute+0xcb/0x180 [zfs]
[12039.814129]  ? __raw_spin_unlock+0x5/0x10 [zfs]
[12039.818819]  ? dmu_read_uio_dnode+0xf1/0x130 [zfs]
[12039.823802]  ? kfree+0xd3/0x250
[12039.826948]  ? xas_load+0x8/0x80
[12039.830182]  ? find_get_entry+0xd6/0x1c0
[12039.834106]  ? _cond_resched+0x15/0x30
[12039.837852]  spl_assert+0x17/0x20 [zfs]
[12039.841885]  mappedread+0x136/0x140 [zfs]
[12039.846077]  zfs_read+0x165/0x2e0 [zfs]
[12039.850100]  zpl_iter_read+0xa8/0x110 [zfs]
[12039.854456]  ? __handle_mm_fault+0x44f/0x6c0
[12039.858729]  new_sync_read+0x10f/0x150
[12039.862484]  vfs_read+0x91/0x140
[12039.865716]  ksys_read+0x4f/0xb0
[12039.868946]  do_syscall_64+0x5b/0x1a0
[12039.872614]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[12039.877668] RIP: 0033:0x7fa34855cab4
[12039.881246] Code: c3 0f 1f 44 00 00 41 54 49 89 d4 55 48 89 f5 53 89
fb 48 83 ec 10 e8 7b fc ff ff 4c 89 e2 48 89 ee 89 df 41 89 c0 31 c0 0f
05 <48> 3d 00 f0 ff ff 77 38 44 89 c7 48 89 44 24 08 e8 b7 fc ff ff 48
[12039.899990] RSP: 002b:00007ffc7a17caf0 EFLAGS: 00000246 ORIG_RAX:
0000000000000000
[12039.907550] RAX: ffffffffffffffda RBX: 0000000000000005 RCX:
00007fa34855cab4
[12039.914680] RDX: 0000000000100000 RSI: 00007fa2ddc99010 RDI:
0000000000000005
[12039.921804] RBP: 00007fa2ddc99010 R08: 0000000000000000 R09:
0000000000000000
[12039.928928] R10: 0000000020f2cfd0 R11: 0000000000000246 R12:
0000000000100000
[12039.936052] R13: 000055f6f3432ec0 R14: 0000000000100000 R15:
000055f6f3432ee8
[12166.129858] INFO: task fio:1125717 blocked for more than 120 seconds.
[12166.136307]       Tainted: P           OE    --------- -  -
4.18.0-408.el8.x86_64 #1
[12166.144043] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[12166.151870] task:fio             state:D stack:    0 pid:1125717
ppid:1125444 flags:0x00000080
[12166.160478] Call Trace:
[12166.162930]  __schedule+0x2d1/0x830
[12166.166433]  schedule+0x35/0xa0
[12166.169585]  cv_wait_common+0x153/0x240 [spl]
[12166.173954]  ? finish_wait+0x80/0x80
[12166.177539]  zfs_rangelock_enter_writer+0x46/0x1b0 [zfs]
[12166.183054]  zfs_rangelock_enter_impl+0x110/0x170 [zfs]
[12166.188461]  zfs_write+0x615/0xd70 [zfs]
[12166.192570]  zpl_iter_write+0xe0/0x120 [zfs]
[12166.197032]  ? __handle_mm_fault+0x44f/0x6c0
[12166.201303]  new_sync_write+0x112/0x160
[12166.205146]  vfs_write+0xa5/0x1a0
[12166.208473]  ksys_write+0x4f/0xb0
[12166.211793]  do_syscall_64+0x5b/0x1a0
[12166.215465]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[12166.220520] RIP: 0033:0x7fcb60d50a17
[12166.224103] Code: Unable to access opcode bytes at RIP
0x7fcb60d509ed.
[12166.230633] RSP: 002b:00007ffce0bbd250 EFLAGS: 00000293 ORIG_RAX:
0000000000000001
[12166.238199] RAX: ffffffffffffffda RBX: 0000000000000005 RCX:
00007fcb60d50a17
[12166.245331] RDX: 0000000000100000 RSI: 00007fcaf648d010 RDI:
0000000000000005
[12166.252463] RBP: 00007fcaf648d010 R08: 0000000000000000 R09:
0000000000000000
[12166.259598] R10: 000055aeb180bf80 R11: 0000000000000293 R12:
0000000000100000
[12166.266729] R13: 000055aeb1803ec0 R14: 0000000000100000 R15:
000055aeb1803ee8
[12166.273873] INFO: task fio:1125719 blocked for more than 120 seconds.
[12166.280317]       Tainted: P           OE    --------- -  -
4.18.0-408.el8.x86_64 #1
[12166.288057] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[12166.295882] task:fio             state:D stack:    0 pid:1125719
ppid:1125447 flags:0x80004080
[12166.304489] Call Trace:
[12166.306942]  __schedule+0x2d1/0x830
[12166.310435]  schedule+0x35/0xa0
[12166.313580]  spl_panic+0xe6/0xe8 [spl]
[12166.317341]  ? _cond_resched+0x15/0x30
[12166.321094]  ? mutex_lock+0xe/0x30
[12166.324501]  ? arc_cksum_compute+0xcb/0x180 [zfs]
[12166.329379]  ? __raw_spin_unlock+0x5/0x10 [zfs]
[12166.334078]  ? dmu_read_uio_dnode+0xf1/0x130 [zfs]
[12166.339042]  ? kfree+0xd3/0x250
[12166.342187]  ? xas_load+0x8/0x80
[12166.345421]  ? find_get_entry+0xd6/0x1c0
[12166.349346]  ? _cond_resched+0x15/0x30
[12166.353102]  spl_assert+0x17/0x20 [zfs]
[12166.357130]  mappedread+0x136/0x140 [zfs]
[12166.361324]  zfs_read+0x165/0x2e0 [zfs]
[12166.365347]  zpl_iter_read+0xa8/0x110 [zfs]
[12166.369706]  ? __handle_mm_fault+0x44f/0x6c0
[12166.373978]  new_sync_read+0x10f/0x150
[12166.377731]  vfs_read+0x91/0x140
[12166.380964]  ksys_read+0x4f/0xb0
[12166.384196]  do_syscall_64+0x5b/0x1a0
[12166.387863]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[12166.392913] RIP: 0033:0x7fa34855cab4
[12166.396494] Code: Unable to access opcode bytes at RIP
0x7fa34855ca8a.
[12166.403018] RSP: 002b:00007ffc7a17caf0 EFLAGS: 00000246 ORIG_RAX:
0000000000000000
[12166.410586] RAX: ffffffffffffffda RBX: 0000000000000005 RCX:
00007fa34855cab4
[12166.417717] RDX: 0000000000100000 RSI: 00007fa2ddc99010 RDI:
0000000000000005
[12166.424852] RBP: 00007fa2ddc99010 R08: 0000000000000000 R09:
0000000000000000
[12166.431985] R10: 0000000020f2cfd0 R11: 0000000000000246 R12:
0000000000100000
[12166.439117] R13: 000055f6f3432ec0 R14: 0000000000100000 R15:
000055f6f3432ee8
[12289.008759] INFO: task kworker/u128:1:32743 blocked for more than 120
seconds.
[12289.015983]       Tainted: P           OE    --------- -  -
4.18.0-408.el8.x86_64 #1
[12289.023722] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[12289.031550] task:kworker/u128:1  state:D stack:    0 pid:32743 ppid:
2 flags:0x80004080
[12289.039905] Workqueue: writeback wb_workfn (flush-zfs-915)
[12289.045397] Call Trace:
[12289.047850]  __schedule+0x2d1/0x830
[12289.051343]  schedule+0x35/0xa0
[12289.054491]  cv_wait_common+0x153/0x240 [spl]
[12289.058859]  ? finish_wait+0x80/0x80
[12289.062435]  zfs_rangelock_enter_writer+0x46/0x1b0 [zfs]
[12289.067949]  zfs_rangelock_enter_impl+0x110/0x170 [zfs]
[12289.073357]  zfs_putpage+0x13b/0x5b0 [zfs]
[12289.077640]  ? pmdp_collapse_flush+0x10/0x10
[12289.081909]  ? rmap_walk_file+0x116/0x290
[12289.085922]  ? __mod_memcg_lruvec_state+0x5d/0x160
[12289.090715]  zpl_putpage+0x67/0xd0 [zfs]
[12289.094817]  write_cache_pages+0x197/0x420
[12289.098916]  ? zpl_readpage_filler+0x10/0x10 [zfs]
[12289.103890]  zpl_writepages+0x98/0x130 [zfs]
[12289.108335]  do_writepages+0xc2/0x1c0
[12289.112000]  ? __wb_calc_thresh+0x3a/0x120
[12289.116100]  __writeback_single_inode+0x39/0x2f0
[12289.120719]  writeback_sb_inodes+0x1e6/0x450
[12289.124993]  __writeback_inodes_wb+0x5f/0xc0
[12289.129263]  wb_writeback+0x247/0x2e0
[12289.132931]  wb_workfn+0x346/0x4d0
[12289.136337]  ? __switch_to_asm+0x35/0x70
[12289.140262]  ? __switch_to_asm+0x41/0x70
[12289.144189]  ? __switch_to_asm+0x35/0x70
[12289.148116]  ? __switch_to_asm+0x41/0x70
[12289.152042]  ? __switch_to_asm+0x35/0x70
[12289.155964]  ? __switch_to_asm+0x41/0x70
[12289.159893]  ? __switch_to_asm+0x35/0x70
[12289.163819]  ? __switch_to_asm+0x41/0x70
[12289.167748]  process_one_work+0x1a7/0x360
[12289.171766]  ? create_worker+0x1a0/0x1a0
[12289.175690]  worker_thread+0x30/0x390
[12289.179357]  ? create_worker+0x1a0/0x1a0
[12289.183284]  kthread+0x10a/0x120
[12289.186514]  ? set_kthread_struct+0x40/0x40
[12289.190702]  ret_from_fork+0x35/0x40
[12289.194323] INFO: task fio:1125717 blocked for more than 120 seconds.
[12289.200766]       Tainted: P           OE    --------- -  -
4.18.0-408.el8.x86_64 #1
[12289.208504] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[12289.216331] task:fio             state:D stack:    0 pid:1125717
ppid:1125444 flags:0x00000080
[12289.224935] Call Trace:
[12289.227390]  __schedule+0x2d1/0x830
[12289.230882]  schedule+0x35/0xa0
[12289.234026]  cv_wait_common+0x153/0x240 [spl]
[12289.238396]  ? finish_wait+0x80/0x80
[12289.241975]  zfs_rangelock_enter_writer+0x46/0x1b0 [zfs]
[12289.247471]  zfs_rangelock_enter_impl+0x110/0x170 [zfs]
[12289.252874]  zfs_write+0x615/0xd70 [zfs]
[12289.256979]  zpl_iter_write+0xe0/0x120 [zfs]
[12289.261429]  ? __handle_mm_fault+0x44f/0x6c0
[12289.265708]  new_sync_write+0x112/0x160
[12289.269553]  vfs_write+0xa5/0x1a0
[12289.272869]  ksys_write+0x4f/0xb0
[12289.276191]  do_syscall_64+0x5b/0x1a0
[12289.279856]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[12289.284908] RIP: 0033:0x7fcb60d50a17
[12289.288493] Code: Unable to access opcode bytes at RIP
0x7fcb60d509ed.
[12289.295022] RSP: 002b:00007ffce0bbd250 EFLAGS: 00000293 ORIG_RAX:
0000000000000001
[12289.302590] RAX: ffffffffffffffda RBX: 0000000000000005 RCX:
00007fcb60d50a17
[12289.309723] RDX: 0000000000100000 RSI: 00007fcaf648d010 RDI:
0000000000000005
[12289.316853] RBP: 00007fcaf648d010 R08: 0000000000000000 R09:
0000000000000000
[12289.323987] R10: 000055aeb180bf80 R11: 0000000000000293 R12:
0000000000100000
[12289.331121] R13: 000055aeb1803ec0 R14: 0000000000100000 R15:
000055aeb1803ee8
[12289.338253] INFO: task fio:1125719 blocked for more than 120 seconds.
[12289.344693]       Tainted: P           OE    --------- -  -
4.18.0-408.el8.x86_64 #1
[12289.352431] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[12289.360256] task:fio             state:D stack:    0 pid:1125719
ppid:1125447 flags:0x80004080
[12289.368864] Call Trace:
[12289.371316]  __schedule+0x2d1/0x830
[12289.374808]  schedule+0x35/0xa0
[12289.377953]  spl_panic+0xe6/0xe8 [spl]
[12289.381717]  ? _cond_resched+0x15/0x30
[12289.385467]  ? mutex_lock+0xe/0x30
[12289.388873]  ? arc_cksum_compute+0xcb/0x180 [zfs]
[12289.393756]  ? __raw_spin_unlock+0x5/0x10 [zfs]
[12289.398451]  ? dmu_read_uio_dnode+0xf1/0x130 [zfs]
[12289.403423]  ? kfree+0xd3/0x250
[12289.406571]  ? xas_load+0x8/0x80
[12289.409806]  ? find_get_entry+0xd6/0x1c0
[12289.413732]  ? _cond_resched+0x15/0x30
[12289.417484]  spl_assert+0x17/0x20 [zfs]
[12289.421511]  mappedread+0x136/0x140 [zfs]
[12289.425709]  zfs_read+0x165/0x2e0 [zfs]
[12289.429731]  zpl_iter_read+0xa8/0x110 [zfs]
[12289.434097]  ? __handle_mm_fault+0x44f/0x6c0
[12289.438369]  new_sync_read+0x10f/0x150
[12289.442121]  vfs_read+0x91/0x140
[12289.445353]  ksys_read+0x4f/0xb0
[12289.448588]  do_syscall_64+0x5b/0x1a0
[12289.452254]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[12289.457307] RIP: 0033:0x7fa34855cab4
[12289.460885] Code: Unable to access opcode bytes at RIP
0x7fa34855ca8a.
[12289.467411] RSP: 002b:00007ffc7a17caf0 EFLAGS: 00000246 ORIG_RAX:
0000000000000000
[12289.474977] RAX: ffffffffffffffda RBX: 0000000000000005 RCX:
00007fa34855cab4
[12289.482110] RDX: 0000000000100000 RSI: 00007fa2ddc99010 RDI:
0000000000000005
[12289.489241] RBP: 00007fa2ddc99010 R08: 0000000000000000 R09:
0000000000000000
[12289.496376] R10: 0000000020f2cfd0 R11: 0000000000000246 R12:
0000000000100000
[12289.503506] R13: 000055f6f3432ec0 R14: 0000000000100000 R15:
000055f6f3432ee8
[12411.887657] INFO: task kworker/u128:1:32743 blocked for more than 120
seconds.
[12411.894885]       Tainted: P           OE    --------- -  -
4.18.0-408.el8.x86_64 #1
[12411.902627] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[12411.910450] task:kworker/u128:1  state:D stack:    0 pid:32743 ppid:
2 flags:0x80004080
[12411.918798] Workqueue: writeback wb_workfn (flush-zfs-915)
[12411.924292] Call Trace:
[12411.926747]  __schedule+0x2d1/0x830
[12411.930247]  schedule+0x35/0xa0
[12411.933394]  cv_wait_common+0x153/0x240 [spl]
[12411.937758]  ? finish_wait+0x80/0x80
[12411.941339]  zfs_rangelock_enter_writer+0x46/0x1b0 [zfs]
[12411.946852]  zfs_rangelock_enter_impl+0x110/0x170 [zfs]
[12411.952260]  zfs_putpage+0x13b/0x5b0 [zfs]
[12411.956541]  ? pmdp_collapse_flush+0x10/0x10
[12411.960821]  ? rmap_walk_file+0x116/0x290
[12411.964833]  ? __mod_memcg_lruvec_state+0x5d/0x160
[12411.969627]  zpl_putpage+0x67/0xd0 [zfs]
[12411.973734]  write_cache_pages+0x197/0x420
[12411.977835]  ? zpl_readpage_filler+0x10/0x10 [zfs]
[12411.982802]  zpl_writepages+0x98/0x130 [zfs]
[12411.987246]  do_writepages+0xc2/0x1c0
[12411.990910]  ? __wb_calc_thresh+0x3a/0x120
[12411.995013]  __writeback_single_inode+0x39/0x2f0
[12411.999632]  writeback_sb_inodes+0x1e6/0x450
[12412.003904]  __writeback_inodes_wb+0x5f/0xc0
[12412.008176]  wb_writeback+0x247/0x2e0
[12412.011841]  wb_workfn+0x346/0x4d0
[12412.015248]  ? __switch_to_asm+0x35/0x70
[12412.019175]  ? __switch_to_asm+0x41/0x70
[12412.023098]  ? __switch_to_asm+0x35/0x70
[12412.027027]  ? __switch_to_asm+0x41/0x70
[12412.030952]  ? __switch_to_asm+0x35/0x70
[12412.034879]  ? __switch_to_asm+0x41/0x70
[12412.038805]  ? __switch_to_asm+0x35/0x70
[12412.042731]  ? __switch_to_asm+0x41/0x70
[12412.046656]  process_one_work+0x1a7/0x360
[12412.050671]  ? create_worker+0x1a0/0x1a0
[12412.054594]  worker_thread+0x30/0x390
[12412.058260]  ? create_worker+0x1a0/0x1a0
[12412.062186]  kthread+0x10a/0x120
[12412.065421]  ? set_kthread_struct+0x40/0x40
[12412.069607]  ret_from_fork+0x35/0x40
[12412.073227] INFO: task fio:1125717 blocked for more than 120 seconds.
[12412.079667]       Tainted: P           OE    --------- -  -
4.18.0-408.el8.x86_64 #1
[12412.087406] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[12412.095233] task:fio             state:D stack:    0 pid:1125717
ppid:1125444 flags:0x00000080
[12412.103838] Call Trace:
[12412.106291]  __schedule+0x2d1/0x830
[12412.109785]  schedule+0x35/0xa0
[12412.112930]  cv_wait_common+0x153/0x240 [spl]
[12412.117299]  ? finish_wait+0x80/0x80
[12412.120877]  zfs_rangelock_enter_writer+0x46/0x1b0 [zfs]
[12412.126373]  zfs_rangelock_enter_impl+0x110/0x170 [zfs]
[12412.131778]  zfs_write+0x615/0xd70 [zfs]
[12412.135881]  zpl_iter_write+0xe0/0x120 [zfs]
[12412.140336]  ? __handle_mm_fault+0x44f/0x6c0
[12412.144607]  new_sync_write+0x112/0x160
[12412.148446]  vfs_write+0xa5/0x1a0
[12412.151765]  ksys_write+0x4f/0xb0
[12412.155085]  do_syscall_64+0x5b/0x1a0
[12412.158753]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[12412.163811] RIP: 0033:0x7fcb60d50a17
[12412.167395] Code: Unable to access opcode bytes at RIP
0x7fcb60d509ed.
[12412.173916] RSP: 002b:00007ffce0bbd250 EFLAGS: 00000293 ORIG_RAX:
0000000000000001
[12412.181481] RAX: ffffffffffffffda RBX: 0000000000000005 RCX:
00007fcb60d50a17
[12412.188614] RDX: 0000000000100000 RSI: 00007fcaf648d010 RDI:
0000000000000005
[12412.195749] RBP: 00007fcaf648d010 R08: 0000000000000000 R09:
0000000000000000
[12412.202882] R10: 000055aeb180bf80 R11: 0000000000000293 R12:
0000000000100000
[12412.210014] R13: 000055aeb1803ec0 R14: 0000000000100000 R15:
000055aeb1803ee8
[12412.217146] INFO: task fio:1125719 blocked for more than 120 seconds.
[12412.223587]       Tainted: P           OE    --------- -  -
4.18.0-408.el8.x86_64 #1
[12412.231325] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[12412.239150] task:fio             state:D stack:    0 pid:1125719
ppid:1125447 flags:0x80004080
[12412.247755] Call Trace:
[12412.250213]  __schedule+0x2d1/0x830
[12412.253713]  schedule+0x35/0xa0
[12412.256867]  spl_panic+0xe6/0xe8 [spl]
[12412.260639]  ? _cond_resched+0x15/0x30
[12412.264398]  ? mutex_lock+0xe/0x30
[12412.267803]  ? arc_cksum_compute+0xcb/0x180 [zfs]
[12412.272704]  ? __raw_spin_unlock+0x5/0x10 [zfs]
[12412.277417]  ? dmu_read_uio_dnode+0xf1/0x130 [zfs]
[12412.282396]  ? kfree+0xd3/0x250
[12412.285543]  ? xas_load+0x8/0x80
[12412.288785]  ? find_get_entry+0xd6/0x1c0
[12412.292711]  ? _cond_resched+0x15/0x30
[12412.296465]  spl_assert+0x17/0x20 [zfs]
[12412.300494]  mappedread+0x136/0x140 [zfs]
[12412.304690]  zfs_read+0x165/0x2e0 [zfs]
[12412.308728]  zpl_iter_read+0xa8/0x110 [zfs]
[12412.313105]  ? __handle_mm_fault+0x44f/0x6c0
[12412.317379]  new_sync_read+0x10f/0x150
[12412.321138]  vfs_read+0x91/0x140
[12412.324372]  ksys_read+0x4f/0xb0
[12412.327613]  do_syscall_64+0x5b/0x1a0
[12412.331279]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[12412.336334] RIP: 0033:0x7fa34855cab4
[12412.339912] Code: Unable to access opcode bytes at RIP
0x7fa34855ca8a.
[12412.346437] RSP: 002b:00007ffc7a17caf0 EFLAGS: 00000246 ORIG_RAX:
0000000000000000
[12412.354002] RAX: ffffffffffffffda RBX: 0000000000000005 RCX:
00007fa34855cab4
[12412.361135] RDX: 0000000000100000 RSI: 00007fa2ddc99010 RDI:
0000000000000005
[12412.368268] RBP: 00007fa2ddc99010 R08: 0000000000000000 R09:
0000000000000000
[12412.375402] R10: 0000000020f2cfd0 R11: 0000000000000246 R12:
0000000000100000
[12412.382534] R13: 000055f6f3432ec0 R14: 0000000000100000 R15:
000055f6f3432ee8
[12534.766588] INFO: task kworker/u128:1:32743 blocked for more than 120
seconds.
[12534.773814]       Tainted: P           OE    --------- -  -
4.18.0-408.el8.x86_64 #1
[12534.781556] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[12534.789382] task:kworker/u128:1  state:D stack:    0 pid:32743 ppid:
2 flags:0x80004080
[12534.797737] Workqueue: writeback wb_workfn (flush-zfs-915)
[12534.803228] Call Trace:
[12534.805684]  __schedule+0x2d1/0x830
[12534.809183]  schedule+0x35/0xa0
[12534.812330]  cv_wait_common+0x153/0x240 [spl]
[12534.816698]  ? finish_wait+0x80/0x80
[12534.820287]  zfs_rangelock_enter_writer+0x46/0x1b0 [zfs]
[12534.825806]  zfs_rangelock_enter_impl+0x110/0x170 [zfs]
[12534.831216]  zfs_putpage+0x13b/0x5b0 [zfs]
[12534.835498]  ? pmdp_collapse_flush+0x10/0x10
[12534.839777]  ? rmap_walk_file+0x116/0x290
[12534.843790]  ? __mod_memcg_lruvec_state+0x5d/0x160
[12534.848581]  zpl_putpage+0x67/0xd0 [zfs]
[12534.852692]  write_cache_pages+0x197/0x420
[12534.856798]  ? zpl_readpage_filler+0x10/0x10 [zfs]
[12534.861764]  zpl_writepages+0x98/0x130 [zfs]
[12534.866210]  do_writepages+0xc2/0x1c0
[12534.869878]  ? __wb_calc_thresh+0x3a/0x120
[12534.873975]  __writeback_single_inode+0x39/0x2f0
[12534.878594]  writeback_sb_inodes+0x1e6/0x450
[12534.882866]  __writeback_inodes_wb+0x5f/0xc0
[12534.887137]  wb_writeback+0x247/0x2e0
[12534.890806]  wb_workfn+0x346/0x4d0
[12534.894213]  ? __switch_to_asm+0x35/0x70
[12534.898136]  ? __switch_to_asm+0x41/0x70
[12534.902064]  ? __switch_to_asm+0x35/0x70
[12534.905990]  ? __switch_to_asm+0x41/0x70
[12534.909915]  ? __switch_to_asm+0x35/0x70
[12534.913840]  ? __switch_to_asm+0x41/0x70
[12534.917768]  ? __switch_to_asm+0x35/0x70
[12534.921696]  ? __switch_to_asm+0x41/0x70
[12534.925620]  process_one_work+0x1a7/0x360
[12534.929635]  ? create_worker+0x1a0/0x1a0
[12534.933559]  worker_thread+0x30/0x390
[12534.937226]  ? create_worker+0x1a0/0x1a0
[12534.941152]  kthread+0x10a/0x120
[12534.944392]  ? set_kthread_struct+0x40/0x40
[12534.948576]  ret_from_fork+0x35/0x40
[12534.952215] INFO: task fio:1125717 blocked for more than 120 seconds.
[12534.958656]       Tainted: P           OE    --------- -  -
4.18.0-408.el8.x86_64 #1
[12534.966395] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[12534.974219] task:fio             state:D stack:    0 pid:1125717
ppid:1125444 flags:0x00000080
[12534.982828] Call Trace:
[12534.985282]  __schedule+0x2d1/0x830
[12534.988770]  schedule+0x35/0xa0
[12534.991920]  cv_wait_common+0x153/0x240 [spl]
[12534.996287]  ? finish_wait+0x80/0x80
[12534.999867]  zfs_rangelock_enter_writer+0x46/0x1b0 [zfs]
[12535.005359]  zfs_rangelock_enter_impl+0x110/0x170 [zfs]
[12535.010757]  zfs_write+0x615/0xd70 [zfs]
[12535.014861]  zpl_iter_write+0xe0/0x120 [zfs]
[12535.019307]  ? __handle_mm_fault+0x44f/0x6c0
[12535.023576]  new_sync_write+0x112/0x160
[12535.027418]  vfs_write+0xa5/0x1a0
[12535.030735]  ksys_write+0x4f/0xb0
[12535.034054]  do_syscall_64+0x5b/0x1a0
[12535.037722]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[12535.042773] RIP: 0033:0x7fcb60d50a17
[12535.046359] Code: Unable to access opcode bytes at RIP
0x7fcb60d509ed.
[12535.052889] RSP: 002b:00007ffce0bbd250 EFLAGS: 00000293 ORIG_RAX:
0000000000000001
[12535.060454] RAX: ffffffffffffffda RBX: 0000000000000005 RCX:
00007fcb60d50a17
[12535.067585] RDX: 0000000000100000 RSI: 00007fcaf648d010 RDI:
0000000000000005
[12535.074718] RBP: 00007fcaf648d010 R08: 0000000000000000 R09:
0000000000000000
[12535.081851] R10: 000055aeb180bf80 R11: 0000000000000293 R12:
0000000000100000
[12535.088987] R13: 000055aeb1803ec0 R14: 0000000000100000 R15:
000055aeb1803ee8

Signed-off-by: Brian Atkinson <batkinson@lanl.gov>
  • Loading branch information
bwatkinson committed Jan 26, 2023
1 parent 9cd71c8 commit 14d24d2
Show file tree
Hide file tree
Showing 7 changed files with 488 additions and 0 deletions.
4 changes: 4 additions & 0 deletions tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,10 @@ pre =
post =
tags = ['functional', 'deadman']

[tests/functional/direct_mmap]
tests = ['dio_mmap']
tags = ['functional', 'direct_mmap']

[tests/functional/delegate]
tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos',
'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos',
Expand Down
5 changes: 5 additions & 0 deletions tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,8 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
functional/delegate/delegate_common.kshlib \
functional/devices/devices.cfg \
functional/devices/devices_common.kshlib \
functional/direct_mmap/dio.cfg \
functional/direct_mmap/dio.kshlib \
functional/events/events.cfg \
functional/events/events_common.kshlib \
functional/fault/fault.cfg \
Expand Down Expand Up @@ -1359,6 +1361,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/devices/devices_002_neg.ksh \
functional/devices/devices_003_pos.ksh \
functional/devices/setup.ksh \
functional/direct_mmap/cleanup.ksh \
functional/direct_mmap/dio_mmap.ksh \
functional/direct_mmap/setup.ksh \
functional/dos_attributes/cleanup.ksh \
functional/dos_attributes/read_dos_attrs_001.ksh \
functional/dos_attributes/setup.ksh \
Expand Down
31 changes: 31 additions & 0 deletions tests/zfs-tests/tests/functional/direct_mmap/cleanup.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
#

. $STF_SUITE/include/libtest.shlib

verify_runnable "global"

default_cleanup
26 changes: 26 additions & 0 deletions tests/zfs-tests/tests/functional/direct_mmap/dio.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
#

DIO_VDEV1=$TEST_BASE_DIR/file1
DIO_VDEV2=$TEST_BASE_DIR/file2
DIO_VDEV3=$TEST_BASE_DIR/file3
DIO_VDEVS="$DIO_VDEV1 $DIO_VDEV2 $DIO_VDEV3"

DIO_FILESIZE=4M
DIO_BS=128K
302 changes: 302 additions & 0 deletions tests/zfs-tests/tests/functional/direct_mmap/dio.kshlib
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/direct_mmap/dio.cfg

function dio_cleanup
{
if poolexists $TESTPOOL1; then
destroy_pool $TESTPOOL1
fi

rm -f $DIO_VDEVS
}

#
# Generate an IO workload using fio and then verify the resulting data.
#
function dio_and_verify # mode file-size block-size directory ioengine extra-args
{
typeset mode=$1
typeset size=$2
typeset bs=$3
typeset mntpnt=$4
typeset ioengine=$5
typeset extra_args=$6

# Invoke an fio workload via direct IO and verify with direct IO.
log_must fio --directory=$mntpnt --name=direct-$mode \
--rw=$mode --size=$size --bs=$bs --direct=1 --numjobs=1 \
--verify=sha1 --ioengine=$ioengine --fallocate=none \
--group_reporting --minimal --do_verify=1 $extra_args

# Now just read back the file without direct IO into the ARC as an additional
# verfication step.
log_must fio --directory=$mntpnt --name=direct-$mode \
--rw=read --size=$size --bs=$bs --direct=0 --numjobs=1 \
--ioengine=$ioengine --group_reporting --minimal

log_must rm -f "$mntpnt/direct-*"
}

#
# Get zpool status -d checksum verify failures
#
function get_zpool_status_chksum_verify_failures # pool_name vdev_type
{
typeset pool=$1
typeset vdev_type=$2

if [[ "$vdev_type" == "stripe" ]]; then
val=$(zpool status -dp $pool | \
awk '{s+=$6} END {print s}' )
elif [[ "$vdev_type" == "mirror" || "$vdev_type" == "raidz" ||
"$vdev_type" == "draid" ]]; then
val=$(zpool status -dp $pool | \
awk -v d="$vdev_type" '$0 ~ d {print $6}' )
else
log_fail "Unsupported VDEV type in \
get_zpool_status_chksum_verify_failures(): $vdev_type"
fi
echo "$val"
}

#
# Get ZED dio_verify events
#
function get_zed_dio_verify_events # pool
{
typeset pool=$1

val=$(zpool events $pool | grep -c dio_verify)

echo "$val"
}

#
# Checking for checksum verify write failures with:
# zpool status -d
# zpool events
# After getting that counts will clear the out the ZPool errors and events
#
function check_dio_write_chksum_verify_failures # pool vdev_type expect_errors
{
typeset pool=$1
typeset vdev_type=$2
typeset expect_errors=$3
typeset note_str="expecting none"

if [[ $expect_errors -ne 0 ]]; then
note_str="expecting some"
fi

log_note "Checking for Direct I/O write checksum verify errors \
$note_str on ZPool: $pool"

status_failures=$(get_zpool_status_chksum_verify_failures $pool $vdev_type)
zed_dio_verify_events=$(get_zed_dio_verify_events $pool)

if [[ $expect_errors -ne 0 ]]; then
if [[ $status_failures -eq 0 ||
$zed_dio_verify_events -eq 0 ]]; then
zpool status -dp $pool
zpool events $pool
log_fail "Checksum verifies in zpool status -d \
$status_failures. ZED dio_verify events \
$zed_dio_verify_events. Neither should be 0."
fi
else
if [[ $status_failures -ne 0 ||
$zed_dio_verify_events -ne 0 ]]; then
zpool status -dp $pool
zpool events $pool
log_fail "Checksum verifies in zpool status -d \
$status_failures. ZED dio_verify events \
$zed_dio_verify_events. Both should be zero."
fi
fi

log_must zpool clear $pool
log_must zpool events -c

}

#
# Get the value of a counter from
# Linux: /proc/spl/kstat/zfs/$pool/iostats file.
# FreeBSD: kstat.zfs.$pool.msic.iostats.$stat
#
function get_iostats_stat # pool stat
{
typeset pool=$1
typeset stat=$2

if is_linux; then
iostats_file=/proc/spl/kstat/zfs/$pool/iostats
val=$(grep -m1 "$stat" $iostats_file | awk '{ print $3 }')
else
val=$(sysctl -n kstat.zfs.$pool.misc.iostats.$stat)
fi
if [[ -z "$val" ]]; then
log_fail "Unable to read $stat counter"
fi

echo "$val"
}

#
# Evict any buffered blocks by overwritting them using an O_DIRECT request.
#
function evict_blocks
{
typeset pool=$1
typeset file=$2
typeset size=$3

log_must stride_dd -i /dev/urandom -o $file -b $size -c 1 -D
}

#
# Perform FIO direct IO writes to a file with the given arguments.
# Then verify thae minimum expected number of blocks were written as
# direct IO.
#
function verify_dio_write_count #pool bs size mnpnt
{
typeset pool=$1
typeset bs=$2
typeset size=$3
typeset mntpnt=$4
typeset dio_wr_expected=$(((size / bs) -1))

log_note "Checking for $dio_wr_expected Direct IO writes"

prev_dio_wr=$(get_iostats_stat $pool direct_write_count)
dio_and_verify write $size $bs $mntpnt "sync"
curr_dio_wr=$(get_iostats_stat $pool direct_write_count)
dio_wr_actual=$((curr_dio_wr - prev_dio_wr))

if [[ $dio_wr_actual -lt $dio_wr_expected ]]; then
if is_linux; then
cat /proc/spl/kstat/zfs/$pool/iostats
else
sysctl kstat.zfs.$pool.misc.iostats
fi
log_fail "Direct writes $dio_wr_actual of $dio_wr_expected"
fi
}

#
# Perform a stride_dd write command to the file with the given arguments.
# Then verify the minimum expected number of blocks were written as either
# buffered IO (by the ARC), or direct IO to the application (dd).
#
function check_write # pool file bs count seek flags buf_wr dio_wr
{
typeset pool=$1
typeset file=$2
typeset bs=$3
typeset count=$4
typeset seek=$5
typeset flags=$6
typeset buf_wr_expect=$7
typeset dio_wr_expect=$8

log_note "Checking $count * $bs write(s) at offset $seek, $flags"

prev_buf_wr=$(get_iostats_stat $pool arc_write_count)
prev_dio_wr=$(get_iostats_stat $pool direct_write_count)

log_must stride_dd -i /dev/urandom -o $file -b $bs -c $count \
-k $seek $flags

curr_buf_wr=$(get_iostats_stat $pool arc_write_count)
buf_wr_actual=$((curr_buf_wr - prev_buf_wr))

curr_dio_wr=$(get_iostats_stat $pool direct_write_count)
dio_wr_actual=$((curr_dio_wr - prev_dio_wr))

if [[ $buf_wr_actual -lt $buf_wr_expect ]]; then
if is_linux; then
cat /proc/spl/kstat/zfs/$pool/iostats
else
sysctl kstat.zfs.$pool.misc.iostats
fi
log_fail "Buffered writes $buf_wr_actual of $buf_wr_expect"
fi

if [[ $dio_wr_actual -lt $dio_wr_expect ]]; then
if is_linux; then
cat /proc/spl/kstat/zfs/$pool/iostats
else
sysctl kstat.zfs.$pool.misc.iostats
fi
log_fail "Direct writes $dio_wr_actual of $dio_wr_expect"
fi
}

#
# Perform a stride_dd read command to the file with the given arguments.
# Then verify the minimum expected number of blocks were read as either
# buffered IO (by the ARC), or direct IO to the application (dd).
#
function check_read # pool file bs count skip flags buf_rd dio_rd
{
typeset pool=$1
typeset file=$2
typeset bs=$3
typeset count=$4
typeset skip=$5
typeset flags=$6
typeset buf_rd_expect=$7
typeset dio_rd_expect=$8

log_note "Checking $count * $bs read(s) at offset $skip, $flags"

prev_buf_rd=$(get_iostats_stat $pool arc_read_count)
prev_dio_rd=$(get_iostats_stat $pool direct_read_count)

log_must stride_dd -i $file -o /dev/null -b $bs -c $count \
-p $skip $flags

curr_buf_rd=$(get_iostats_stat $pool arc_read_count)
buf_rd_actual=$((curr_buf_rd - prev_buf_rd))

curr_dio_rd=$(get_iostats_stat $pool direct_read_count)
dio_rd_actual=$((curr_dio_rd - prev_dio_rd))

if [[ $buf_rd_actual -lt $buf_rd_expect ]]; then
if is_linux; then
cat /proc/spl/kstat/zfs/$pool/iostats
else
sysctl kstat.zfs.$pool.misc.iostats
fi
log_fail "Buffered reads $buf_rd_actual of $buf_rd_expect"
fi

if [[ $dio_rd_actual -lt $dio_rd_expect ]]; then
if is_linux; then
cat /proc/spl/kstat/zfs/$pool/iostats
else
sysctl kstat.zfs.$pool.misc.iostats
fi
log_fail "Direct reads $dio_rd_actual of $dio_rd_expect"
fi
}
Loading

0 comments on commit 14d24d2

Please sign in to comment.