Skip to content

Commit 34d9caf

Browse files
Chen Ridonggregkh
authored andcommitted
kernfs: Fix UAF in polling when open file is released
commit 3c9ba27 upstream. A use-after-free (UAF) vulnerability was identified in the PSI (Pressure Stall Information) monitoring mechanism: BUG: KASAN: slab-use-after-free in psi_trigger_poll+0x3c/0x140 Read of size 8 at addr ffff3de3d50bd308 by task systemd/1 psi_trigger_poll+0x3c/0x140 cgroup_pressure_poll+0x70/0xa0 cgroup_file_poll+0x8c/0x100 kernfs_fop_poll+0x11c/0x1c0 ep_item_poll.isra.0+0x188/0x2c0 Allocated by task 1: cgroup_file_open+0x88/0x388 kernfs_fop_open+0x73c/0xaf0 do_dentry_open+0x5fc/0x1200 vfs_open+0xa0/0x3f0 do_open+0x7e8/0xd08 path_openat+0x2fc/0x6b0 do_filp_open+0x174/0x368 Freed by task 8462: cgroup_file_release+0x130/0x1f8 kernfs_drain_open_files+0x17c/0x440 kernfs_drain+0x2dc/0x360 kernfs_show+0x1b8/0x288 cgroup_file_show+0x150/0x268 cgroup_pressure_write+0x1dc/0x340 cgroup_file_write+0x274/0x548 Reproduction Steps: 1. Open test/cpu.pressure and establish epoll monitoring 2. Disable monitoring: echo 0 > test/cgroup.pressure 3. Re-enable monitoring: echo 1 > test/cgroup.pressure The race condition occurs because: 1. When cgroup.pressure is disabled (echo 0 > cgroup.pressure), it: - Releases PSI triggers via cgroup_file_release() - Frees of->priv through kernfs_drain_open_files() 2. While epoll still holds reference to the file and continues polling 3. Re-enabling (echo 1 > cgroup.pressure) accesses freed of->priv epolling disable/enable cgroup.pressure fd=open(cpu.pressure) while(1) ... epoll_wait kernfs_fop_poll kernfs_get_active = true echo 0 > cgroup.pressure ... cgroup_file_show kernfs_show // inactive kn kernfs_drain_open_files cft->release(of); kfree(ctx); ... kernfs_get_active = false echo 1 > cgroup.pressure kernfs_show kernfs_activate_one(kn); kernfs_fop_poll kernfs_get_active = true cgroup_file_poll psi_trigger_poll // UAF ... end: close(fd) To address this issue, introduce kernfs_get_active_of() for kernfs open files to obtain active references. This function will fail if the open file has been released. Replace kernfs_get_active() with kernfs_get_active_of() to prevent further operations on released file descriptors. Fixes: 34f26a1 ("sched/psi: Per-cgroup PSI accounting disable/re-enable interface") Cc: stable <stable@kernel.org> Reported-by: Zhang Zhaotian <zhangzhaotian@huawei.com> Signed-off-by: Chen Ridong <chenridong@huawei.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20250822070715.1565236-2-chenridong@huaweicloud.com [ Drop llseek bits ] Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent f1385ec commit 34d9caf

File tree

1 file changed

+36
-18
lines changed

1 file changed

+36
-18
lines changed

fs/kernfs/file.c

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,24 @@ static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
7070
!list_empty(&of->list));
7171
}
7272

73+
/* Get active reference to kernfs node for an open file */
74+
static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of)
75+
{
76+
/* Skip if file was already released */
77+
if (unlikely(of->released))
78+
return NULL;
79+
80+
if (!kernfs_get_active(of->kn))
81+
return NULL;
82+
83+
return of;
84+
}
85+
86+
static void kernfs_put_active_of(struct kernfs_open_file *of)
87+
{
88+
return kernfs_put_active(of->kn);
89+
}
90+
7391
/**
7492
* kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
7593
*
@@ -139,7 +157,7 @@ static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
139157

140158
if (ops->seq_stop)
141159
ops->seq_stop(sf, v);
142-
kernfs_put_active(of->kn);
160+
kernfs_put_active_of(of);
143161
}
144162

145163
static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
@@ -152,7 +170,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
152170
* the ops aren't called concurrently for the same open file.
153171
*/
154172
mutex_lock(&of->mutex);
155-
if (!kernfs_get_active(of->kn))
173+
if (!kernfs_get_active_of(of))
156174
return ERR_PTR(-ENODEV);
157175

158176
ops = kernfs_ops(of->kn);
@@ -238,7 +256,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
238256
* the ops aren't called concurrently for the same open file.
239257
*/
240258
mutex_lock(&of->mutex);
241-
if (!kernfs_get_active(of->kn)) {
259+
if (!kernfs_get_active_of(of)) {
242260
len = -ENODEV;
243261
mutex_unlock(&of->mutex);
244262
goto out_free;
@@ -252,7 +270,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
252270
else
253271
len = -EINVAL;
254272

255-
kernfs_put_active(of->kn);
273+
kernfs_put_active_of(of);
256274
mutex_unlock(&of->mutex);
257275

258276
if (len < 0)
@@ -323,7 +341,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
323341
* the ops aren't called concurrently for the same open file.
324342
*/
325343
mutex_lock(&of->mutex);
326-
if (!kernfs_get_active(of->kn)) {
344+
if (!kernfs_get_active_of(of)) {
327345
mutex_unlock(&of->mutex);
328346
len = -ENODEV;
329347
goto out_free;
@@ -335,7 +353,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
335353
else
336354
len = -EINVAL;
337355

338-
kernfs_put_active(of->kn);
356+
kernfs_put_active_of(of);
339357
mutex_unlock(&of->mutex);
340358

341359
if (len > 0)
@@ -357,13 +375,13 @@ static void kernfs_vma_open(struct vm_area_struct *vma)
357375
if (!of->vm_ops)
358376
return;
359377

360-
if (!kernfs_get_active(of->kn))
378+
if (!kernfs_get_active_of(of))
361379
return;
362380

363381
if (of->vm_ops->open)
364382
of->vm_ops->open(vma);
365383

366-
kernfs_put_active(of->kn);
384+
kernfs_put_active_of(of);
367385
}
368386

369387
static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
@@ -375,14 +393,14 @@ static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
375393
if (!of->vm_ops)
376394
return VM_FAULT_SIGBUS;
377395

378-
if (!kernfs_get_active(of->kn))
396+
if (!kernfs_get_active_of(of))
379397
return VM_FAULT_SIGBUS;
380398

381399
ret = VM_FAULT_SIGBUS;
382400
if (of->vm_ops->fault)
383401
ret = of->vm_ops->fault(vmf);
384402

385-
kernfs_put_active(of->kn);
403+
kernfs_put_active_of(of);
386404
return ret;
387405
}
388406

@@ -395,7 +413,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
395413
if (!of->vm_ops)
396414
return VM_FAULT_SIGBUS;
397415

398-
if (!kernfs_get_active(of->kn))
416+
if (!kernfs_get_active_of(of))
399417
return VM_FAULT_SIGBUS;
400418

401419
ret = 0;
@@ -404,7 +422,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
404422
else
405423
file_update_time(file);
406424

407-
kernfs_put_active(of->kn);
425+
kernfs_put_active_of(of);
408426
return ret;
409427
}
410428

@@ -418,14 +436,14 @@ static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
418436
if (!of->vm_ops)
419437
return -EINVAL;
420438

421-
if (!kernfs_get_active(of->kn))
439+
if (!kernfs_get_active_of(of))
422440
return -EINVAL;
423441

424442
ret = -EINVAL;
425443
if (of->vm_ops->access)
426444
ret = of->vm_ops->access(vma, addr, buf, len, write);
427445

428-
kernfs_put_active(of->kn);
446+
kernfs_put_active_of(of);
429447
return ret;
430448
}
431449

@@ -504,7 +522,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
504522
mutex_lock(&of->mutex);
505523

506524
rc = -ENODEV;
507-
if (!kernfs_get_active(of->kn))
525+
if (!kernfs_get_active_of(of))
508526
goto out_unlock;
509527

510528
ops = kernfs_ops(of->kn);
@@ -539,7 +557,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
539557
}
540558
vma->vm_ops = &kernfs_vm_ops;
541559
out_put:
542-
kernfs_put_active(of->kn);
560+
kernfs_put_active_of(of);
543561
out_unlock:
544562
mutex_unlock(&of->mutex);
545563

@@ -894,15 +912,15 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
894912
struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
895913
__poll_t ret;
896914

897-
if (!kernfs_get_active(kn))
915+
if (!kernfs_get_active_of(of))
898916
return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
899917

900918
if (kn->attr.ops->poll)
901919
ret = kn->attr.ops->poll(of, wait);
902920
else
903921
ret = kernfs_generic_poll(of, wait);
904922

905-
kernfs_put_active(kn);
923+
kernfs_put_active_of(of);
906924
return ret;
907925
}
908926

0 commit comments

Comments
 (0)