Skip to content

Commit 239cec2

Browse files
puranjaymohangregkh
authored andcommitted
bpf: fix mm lifecycle in open-coded task_vma iterator
[ Upstream commit d8e27d2 ] The open-coded task_vma iterator reads task->mm locklessly and acquires mmap_read_trylock() but never calls mmget(). If the task exits concurrently, the mm_struct can be freed as it is not SLAB_TYPESAFE_BY_RCU, resulting in a use-after-free. Safely read task->mm with a trylock on alloc_lock and acquire an mm reference. Drop the reference via bpf_iter_mmput_async() in _destroy() and error paths. bpf_iter_mmput_async() is a local wrapper around mmput_async() with a fallback to mmput() on !CONFIG_MMU. Reject irqs-disabled contexts (including NMI) up front. Operations used by _next() and _destroy() (mmap_read_unlock, bpf_iter_mmput_async) take spinlocks with IRQs disabled (pool->lock, pi_lock). Running from NMI or from a tracepoint that fires with those locks held could deadlock. A trylock on alloc_lock is used instead of the blocking task_lock() (get_task_mm) to avoid a deadlock when a softirq BPF program iterates a task that already holds its alloc_lock on the same CPU. Fixes: 4ac4546 ("bpf: Introduce task_vma open-coded iterator kfuncs") Signed-off-by: Puranjay Mohan <puranjay@kernel.org> Link: https://lore.kernel.org/r/20260408154539.3832150-2-puranjay@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 95b5a8c commit 239cec2

1 file changed

Lines changed: 51 additions & 3 deletions

File tree

kernel/bpf/task_iter.c

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <linux/bpf_mem_alloc.h>
1111
#include <linux/btf_ids.h>
1212
#include <linux/mm_types.h>
13+
#include <linux/sched/mm.h>
1314
#include "mmap_unlock_work.h"
1415

1516
static const char * const iter_task_type_names[] = {
@@ -798,6 +799,15 @@ const struct bpf_func_proto bpf_find_vma_proto = {
798799
.arg5_type = ARG_ANYTHING,
799800
};
800801

802+
static inline void bpf_iter_mmput_async(struct mm_struct *mm)
803+
{
804+
#ifdef CONFIG_MMU
805+
mmput_async(mm);
806+
#else
807+
mmput(mm);
808+
#endif
809+
}
810+
801811
struct bpf_iter_task_vma_kern_data {
802812
struct task_struct *task;
803813
struct mm_struct *mm;
@@ -829,6 +839,24 @@ __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
829839
BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern) != sizeof(struct bpf_iter_task_vma));
830840
BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern) != __alignof__(struct bpf_iter_task_vma));
831841

842+
/* bpf_iter_mmput_async() needs mmput_async() which requires CONFIG_MMU */
843+
if (!IS_ENABLED(CONFIG_MMU)) {
844+
kit->data = NULL;
845+
return -EOPNOTSUPP;
846+
}
847+
848+
/*
849+
* Reject irqs-disabled contexts including NMI. Operations used
850+
* by _next() and _destroy() (mmap_read_unlock, bpf_iter_mmput_async)
851+
* can take spinlocks with IRQs disabled (pi_lock, pool->lock).
852+
* Running from NMI or from a tracepoint that fires with those
853+
* locks held could deadlock.
854+
*/
855+
if (irqs_disabled()) {
856+
kit->data = NULL;
857+
return -EBUSY;
858+
}
859+
832860
/* is_iter_reg_valid_uninit guarantees that kit hasn't been initialized
833861
* before, so non-NULL kit->data doesn't point to previously
834862
* bpf_mem_alloc'd bpf_iter_task_vma_kern_data
@@ -838,7 +866,25 @@ __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
838866
return -ENOMEM;
839867

840868
kit->data->task = get_task_struct(task);
869+
/*
870+
* Safely read task->mm and acquire an mm reference.
871+
*
872+
* Cannot use get_task_mm() because its task_lock() is a
873+
* blocking spin_lock that would deadlock if the target task
874+
* already holds alloc_lock on this CPU (e.g. a softirq BPF
875+
* program iterating a task interrupted while holding its
876+
* alloc_lock).
877+
*/
878+
if (!spin_trylock(&task->alloc_lock)) {
879+
err = -EBUSY;
880+
goto err_cleanup_iter;
881+
}
841882
kit->data->mm = task->mm;
883+
if (kit->data->mm && !(task->flags & PF_KTHREAD))
884+
mmget(kit->data->mm);
885+
else
886+
kit->data->mm = NULL;
887+
spin_unlock(&task->alloc_lock);
842888
if (!kit->data->mm) {
843889
err = -ENOENT;
844890
goto err_cleanup_iter;
@@ -848,15 +894,16 @@ __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
848894
irq_work_busy = bpf_mmap_unlock_get_irq_work(&kit->data->work);
849895
if (irq_work_busy || !mmap_read_trylock(kit->data->mm)) {
850896
err = -EBUSY;
851-
goto err_cleanup_iter;
897+
goto err_cleanup_mmget;
852898
}
853899

854900
vma_iter_init(&kit->data->vmi, kit->data->mm, addr);
855901
return 0;
856902

903+
err_cleanup_mmget:
904+
bpf_iter_mmput_async(kit->data->mm);
857905
err_cleanup_iter:
858-
if (kit->data->task)
859-
put_task_struct(kit->data->task);
906+
put_task_struct(kit->data->task);
860907
bpf_mem_free(&bpf_global_ma, kit->data);
861908
/* NULL kit->data signals failed bpf_iter_task_vma initialization */
862909
kit->data = NULL;
@@ -879,6 +926,7 @@ __bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
879926
if (kit->data) {
880927
bpf_mmap_unlock_mm(kit->data->work, kit->data->mm);
881928
put_task_struct(kit->data->task);
929+
bpf_iter_mmput_async(kit->data->mm);
882930
bpf_mem_free(&bpf_global_ma, kit->data);
883931
}
884932
}

0 commit comments

Comments
 (0)