Skip to content

Commit 1d40f4e

Browse files
Dev Jainakpm00
authored andcommitted
mm: optimize mprotect() for MM_CP_PROT_NUMA by batch-skipping PTEs
For the MM_CP_PROT_NUMA skipping case, observe that, if we skip an iteration due to the underlying folio satisfying any of the skip conditions, then for all subsequent ptes which map the same folio, the iteration will be skipped for them too. Therefore, we can optimize by using folio_pte_batch() to batch skip the iterations. Use prot_numa_skip() introduced in the previous patch to determine whether we need to skip the iteration. Change its signature to have a double pointer to a folio, which will be used by mprotect_folio_pte_batch() to determine the number of iterations we can safely skip. Link: https://lkml.kernel.org/r/20250718090244.21092-3-dev.jain@arm.com Signed-off-by: Dev Jain <dev.jain@arm.com> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com> Reviewed-by: Zi Yan <ziy@nvidia.com> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Barry Song <baohua@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: David Hildenbrand <david@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jann Horn <jannh@google.com> Cc: Joey Gouly <joey.gouly@arm.com> Cc: Kevin Brodsky <kevin.brodsky@arm.com> Cc: Lance Yang <ioworker0@gmail.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Peter Xu <peterx@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Cc: Yang Shi <yang@os.amperecomputing.com> Cc: Yicong Yang <yangyicong@hisilicon.com> Cc: Zhenhua Huang <quic_zhenhuah@quicinc.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent b9bf6c2 commit 1d40f4e

File tree

1 file changed

+42
-13
lines changed

1 file changed

+42
-13
lines changed

mm/mprotect.c

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -83,44 +83,59 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
8383
return pte_dirty(pte);
8484
}
8585

86+
static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep,
87+
pte_t pte, int max_nr_ptes)
88+
{
89+
/* No underlying folio, so cannot batch */
90+
if (!folio)
91+
return 1;
92+
93+
if (!folio_test_large(folio))
94+
return 1;
95+
96+
return folio_pte_batch(folio, ptep, pte, max_nr_ptes);
97+
}
98+
8699
static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
87-
pte_t oldpte, pte_t *pte, int target_node)
100+
pte_t oldpte, pte_t *pte, int target_node,
101+
struct folio **foliop)
88102
{
89-
struct folio *folio;
103+
struct folio *folio = NULL;
104+
bool ret = true;
90105
bool toptier;
91106
int nid;
92107

93108
/* Avoid TLB flush if possible */
94109
if (pte_protnone(oldpte))
95-
return true;
110+
goto skip;
96111

97112
folio = vm_normal_folio(vma, addr, oldpte);
98113
if (!folio)
99-
return true;
114+
goto skip;
100115

101116
if (folio_is_zone_device(folio) || folio_test_ksm(folio))
102-
return true;
117+
goto skip;
103118

104119
/* Also skip shared copy-on-write pages */
105120
if (is_cow_mapping(vma->vm_flags) &&
106121
(folio_maybe_dma_pinned(folio) || folio_maybe_mapped_shared(folio)))
107-
return true;
122+
goto skip;
108123

109124
/*
110125
* While migration can move some dirty pages,
111126
* it cannot move them all from MIGRATE_ASYNC
112127
* context.
113128
*/
114129
if (folio_is_file_lru(folio) && folio_test_dirty(folio))
115-
return true;
130+
goto skip;
116131

117132
/*
118133
* Don't mess with PTEs if page is already on the node
119134
* a single-threaded process is running on.
120135
*/
121136
nid = folio_nid(folio);
122137
if (target_node == nid)
123-
return true;
138+
goto skip;
124139

125140
toptier = node_is_toptier(nid);
126141

@@ -129,11 +144,15 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
129144
* balancing is disabled
130145
*/
131146
if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier)
132-
return true;
147+
goto skip;
133148

149+
ret = false;
134150
if (folio_use_access_time(folio))
135151
folio_xchg_access_time(folio, jiffies_to_msecs(jiffies));
136-
return false;
152+
153+
skip:
154+
*foliop = folio;
155+
return ret;
137156
}
138157

139158
static long change_pte_range(struct mmu_gather *tlb,
@@ -147,6 +166,7 @@ static long change_pte_range(struct mmu_gather *tlb,
147166
bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
148167
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
149168
bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
169+
int nr_ptes;
150170

151171
tlb_change_page_size(tlb, PAGE_SIZE);
152172
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
@@ -161,18 +181,27 @@ static long change_pte_range(struct mmu_gather *tlb,
161181
flush_tlb_batched_pending(vma->vm_mm);
162182
arch_enter_lazy_mmu_mode();
163183
do {
184+
nr_ptes = 1;
164185
oldpte = ptep_get(pte);
165186
if (pte_present(oldpte)) {
187+
int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
188+
struct folio *folio;
166189
pte_t ptent;
167190

168191
/*
169192
* Avoid trapping faults against the zero or KSM
170193
* pages. See similar comment in change_huge_pmd.
171194
*/
172195
if (prot_numa) {
173-
if (prot_numa_skip(vma, addr, oldpte, pte,
174-
target_node))
196+
int ret = prot_numa_skip(vma, addr, oldpte, pte,
197+
target_node, &folio);
198+
if (ret) {
199+
200+
/* determine batch to skip */
201+
nr_ptes = mprotect_folio_pte_batch(folio,
202+
pte, oldpte, max_nr_ptes);
175203
continue;
204+
}
176205
}
177206

178207
oldpte = ptep_modify_prot_start(vma, addr, pte);
@@ -289,7 +318,7 @@ static long change_pte_range(struct mmu_gather *tlb,
289318
pages++;
290319
}
291320
}
292-
} while (pte++, addr += PAGE_SIZE, addr != end);
321+
} while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
293322
arch_leave_lazy_mmu_mode();
294323
pte_unmap_unlock(pte - 1, ptl);
295324

0 commit comments

Comments
 (0)