Skip to content

Commit a08c719

Browse files
sidkumar99akpm00
authored andcommitted
mm/filemap: remove hugetlb special casing in filemap.c
Remove special cased hugetlb handling code within the page cache by changing the granularity of ->index to the base page size rather than the huge page size. The motivation of this patch is to reduce complexity within the filemap code while also increasing performance by removing branches that are evaluated on every page cache lookup. To support the change in index, new wrappers for hugetlb page cache interactions are added. These wrappers perform the conversion to a linear index which is now expected by the page cache for huge pages. ========================= PERFORMANCE ====================================== Perf was used to check the performance differences after the patch. Overall the performance is similar to mainline with a very small larger overhead that occurs in __filemap_add_folio() and hugetlb_add_to_page_cache(). This is because of the larger overhead that occurs in xa_load() and xa_store() as the xarray is now using more entries to store hugetlb folios in the page cache. Timing aarch64 2MB Page Size 6.5-rc3 + this patch: [root@sidhakum-ol9-1 hugepages]# time fallocate -l 700GB test.txt real 1m49.568s user 0m0.000s sys 1m49.461s 6.5-rc3: [root]# time fallocate -l 700GB test.txt real 1m47.495s user 0m0.000s sys 1m47.370s 1GB Page Size 6.5-rc3 + this patch: [root@sidhakum-ol9-1 hugepages1G]# time fallocate -l 700GB test.txt real 1m47.024s user 0m0.000s sys 1m46.921s 6.5-rc3: [root@sidhakum-ol9-1 hugepages1G]# time fallocate -l 700GB test.txt real 1m44.551s user 0m0.000s sys 1m44.438s x86 2MB Page Size 6.5-rc3 + this patch: [root@sidhakum-ol9-2 hugepages]# time fallocate -l 100GB test.txt real 0m22.383s user 0m0.000s sys 0m22.255s 6.5-rc3: [opc@sidhakum-ol9-2 hugepages]$ time sudo fallocate -l 100GB /dev/hugepages/test.txt real 0m22.735s user 0m0.038s sys 0m22.567s 1GB Page Size 6.5-rc3 + this patch: [root@sidhakum-ol9-2 hugepages1GB]# time fallocate -l 100GB test.txt real 0m25.786s user 0m0.001s sys 0m25.589s 6.5-rc3: [root@sidhakum-ol9-2 hugepages1G]# time fallocate -l 100GB test.txt real 0m33.454s user 0m0.001s sys 0m33.193s aarch64: workload - fallocate a 700GB file backed by huge pages 6.5-rc3 + this patch: 2MB Page Size: --100.00%--__arm64_sys_fallocate ksys_fallocate vfs_fallocate hugetlbfs_fallocate | |--95.04%--__pi_clear_page | |--3.57%--clear_huge_page | | | |--2.63%--rcu_all_qs | | | --0.91%--__cond_resched | --0.67%--__cond_resched 0.17% 0.00% 0 fallocate [kernel.vmlinux] [k] hugetlb_add_to_page_cache 0.14% 0.10% 11 fallocate [kernel.vmlinux] [k] __filemap_add_folio 6.5-rc3 2MB Page Size: --100.00%--__arm64_sys_fallocate ksys_fallocate vfs_fallocate hugetlbfs_fallocate | |--94.91%--__pi_clear_page | |--4.11%--clear_huge_page | | | |--3.00%--rcu_all_qs | | | --1.10%--__cond_resched | --0.59%--__cond_resched 0.08% 0.01% 1 fallocate [kernel.kallsyms] [k] hugetlb_add_to_page_cache 0.05% 0.03% 3 fallocate [kernel.kallsyms] [k] __filemap_add_folio x86 workload - fallocate a 100GB file backed by huge pages 6.5-rc3 + this patch: 2MB Page Size: hugetlbfs_fallocate | --99.57%--clear_huge_page | --98.47%--clear_page_erms | --0.53%--asm_sysvec_apic_timer_interrupt 0.04% 0.04% 1 fallocate [kernel.kallsyms] [k] xa_load 0.04% 0.00% 0 fallocate [kernel.kallsyms] [k] hugetlb_add_to_page_cache 0.04% 0.00% 0 fallocate [kernel.kallsyms] [k] __filemap_add_folio 0.04% 0.00% 0 fallocate [kernel.kallsyms] [k] xas_store 6.5-rc3 2MB Page Size: --99.93%--__x64_sys_fallocate vfs_fallocate hugetlbfs_fallocate | --99.38%--clear_huge_page | |--98.40%--clear_page_erms | --0.59%--__cond_resched 0.03% 0.03% 1 fallocate [kernel.kallsyms] [k] __filemap_add_folio ========================= TESTING ====================================== This patch passes libhugetlbfs tests and LTP hugetlb tests ********** TEST SUMMARY * 2M * 32-bit 64-bit * Total testcases: 110 113 * Skipped: 0 0 * PASS: 107 113 * FAIL: 0 0 * Killed by signal: 3 0 * Bad configuration: 0 0 * Expected FAIL: 0 0 * Unexpected PASS: 0 0 * Test not present: 0 0 * Strange test result: 0 0 ********** Done executing testcases. LTP Version: 20220527-178-g2761a81c4 page migration was also tested using Mike Kravetz's test program.[8] [dan.carpenter@linaro.org: fix an NULL vs IS_ERR() bug] Link: https://lkml.kernel.org/r/1772c296-1417-486f-8eef-171af2192681@moroto.mountain Link: https://lkml.kernel.org/r/20230926192017.98183-1-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org> Reported-and-tested-by: syzbot+c225dea486da4d5592bd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c225dea486da4d5592bd Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Muchun Song <songmuchun@bytedance.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 0374af1 commit a08c719

File tree

6 files changed

+52
-101
lines changed

6 files changed

+52
-101
lines changed

fs/hugetlbfs/inode.c

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
334334
ssize_t retval = 0;
335335

336336
while (iov_iter_count(to)) {
337-
struct page *page;
337+
struct folio *folio;
338338
size_t nr, copied, want;
339339

340340
/* nr is the maximum number of bytes to copy from this page */
@@ -352,38 +352,38 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
352352
}
353353
nr = nr - offset;
354354

355-
/* Find the page */
356-
page = find_lock_page(mapping, index);
357-
if (unlikely(page == NULL)) {
355+
/* Find the folio */
356+
folio = filemap_lock_hugetlb_folio(h, mapping, index);
357+
if (IS_ERR(folio)) {
358358
/*
359359
* We have a HOLE, zero out the user-buffer for the
360360
* length of the hole or request.
361361
*/
362362
copied = iov_iter_zero(nr, to);
363363
} else {
364-
unlock_page(page);
364+
folio_unlock(folio);
365365

366-
if (!PageHWPoison(page))
366+
if (!folio_test_has_hwpoisoned(folio))
367367
want = nr;
368368
else {
369369
/*
370370
* Adjust how many bytes safe to read without
371371
* touching the 1st raw HWPOISON subpage after
372372
* offset.
373373
*/
374-
want = adjust_range_hwpoison(page, offset, nr);
374+
want = adjust_range_hwpoison(&folio->page, offset, nr);
375375
if (want == 0) {
376-
put_page(page);
376+
folio_put(folio);
377377
retval = -EIO;
378378
break;
379379
}
380380
}
381381

382382
/*
383-
* We have the page, copy it to user space buffer.
383+
* We have the folio, copy it to user space buffer.
384384
*/
385-
copied = copy_page_to_iter(page, offset, want, to);
386-
put_page(page);
385+
copied = copy_folio_to_iter(folio, offset, want, to);
386+
folio_put(folio);
387387
}
388388
offset += copied;
389389
retval += copied;
@@ -661,21 +661,20 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
661661
{
662662
struct hstate *h = hstate_inode(inode);
663663
struct address_space *mapping = &inode->i_data;
664-
const pgoff_t start = lstart >> huge_page_shift(h);
665-
const pgoff_t end = lend >> huge_page_shift(h);
664+
const pgoff_t end = lend >> PAGE_SHIFT;
666665
struct folio_batch fbatch;
667666
pgoff_t next, index;
668667
int i, freed = 0;
669668
bool truncate_op = (lend == LLONG_MAX);
670669

671670
folio_batch_init(&fbatch);
672-
next = start;
671+
next = lstart >> PAGE_SHIFT;
673672
while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
674673
for (i = 0; i < folio_batch_count(&fbatch); ++i) {
675674
struct folio *folio = fbatch.folios[i];
676675
u32 hash = 0;
677676

678-
index = folio->index;
677+
index = folio->index >> huge_page_order(h);
679678
hash = hugetlb_fault_mutex_hash(mapping, index);
680679
mutex_lock(&hugetlb_fault_mutex_table[hash]);
681680

@@ -693,7 +692,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
693692
}
694693

695694
if (truncate_op)
696-
(void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
695+
(void)hugetlb_unreserve_pages(inode,
696+
lstart >> huge_page_shift(h),
697+
LONG_MAX, freed);
697698
}
698699

699700
static void hugetlbfs_evict_inode(struct inode *inode)
@@ -741,7 +742,7 @@ static void hugetlbfs_zero_partial_page(struct hstate *h,
741742
pgoff_t idx = start >> huge_page_shift(h);
742743
struct folio *folio;
743744

744-
folio = filemap_lock_folio(mapping, idx);
745+
folio = filemap_lock_hugetlb_folio(h, mapping, idx);
745746
if (IS_ERR(folio))
746747
return;
747748

@@ -886,7 +887,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
886887
mutex_lock(&hugetlb_fault_mutex_table[hash]);
887888

888889
/* See if already present in mapping to avoid alloc/free */
889-
folio = filemap_get_folio(mapping, index);
890+
folio = filemap_get_folio(mapping, index << huge_page_order(h));
890891
if (!IS_ERR(folio)) {
891892
folio_put(folio);
892893
mutex_unlock(&hugetlb_fault_mutex_table[hash]);

include/linux/hugetlb.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,12 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
812812
return huge_page_size(h) / 512;
813813
}
814814

815+
static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
816+
struct address_space *mapping, pgoff_t idx)
817+
{
818+
return filemap_lock_folio(mapping, idx << huge_page_order(h));
819+
}
820+
815821
#include <asm/hugetlb.h>
816822

817823
#ifndef is_hugepage_only_range
@@ -1008,6 +1014,12 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
10081014
return NULL;
10091015
}
10101016

1017+
static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
1018+
struct address_space *mapping, pgoff_t idx)
1019+
{
1020+
return NULL;
1021+
}
1022+
10111023
static inline int isolate_or_dissolve_huge_page(struct page *page,
10121024
struct list_head *list)
10131025
{

include/linux/pagemap.h

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -789,9 +789,6 @@ static inline pgoff_t folio_next_index(struct folio *folio)
789789
*/
790790
static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
791791
{
792-
/* HugeTLBfs indexes the page cache in units of hpage_size */
793-
if (folio_test_hugetlb(folio))
794-
return &folio->page;
795792
return folio_page(folio, index & (folio_nr_pages(folio) - 1));
796793
}
797794

@@ -807,9 +804,6 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
807804
*/
808805
static inline bool folio_contains(struct folio *folio, pgoff_t index)
809806
{
810-
/* HugeTLBfs indexes the page cache in units of hpage_size */
811-
if (folio_test_hugetlb(folio))
812-
return folio->index == index;
813807
return index - folio_index(folio) < folio_nr_pages(folio);
814808
}
815809

@@ -867,10 +861,9 @@ static inline struct folio *read_mapping_folio(struct address_space *mapping,
867861
}
868862

869863
/*
870-
* Get index of the page within radix-tree (but not for hugetlb pages).
871-
* (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
864+
* Get the offset in PAGE_SIZE (even for hugetlb pages).
872865
*/
873-
static inline pgoff_t page_to_index(struct page *page)
866+
static inline pgoff_t page_to_pgoff(struct page *page)
874867
{
875868
struct page *head;
876869

@@ -885,19 +878,6 @@ static inline pgoff_t page_to_index(struct page *page)
885878
return head->index + page - head;
886879
}
887880

888-
extern pgoff_t hugetlb_basepage_index(struct page *page);
889-
890-
/*
891-
* Get the offset in PAGE_SIZE (even for hugetlb pages).
892-
* (TODO: hugetlb pages should have ->index in PAGE_SIZE)
893-
*/
894-
static inline pgoff_t page_to_pgoff(struct page *page)
895-
{
896-
if (unlikely(PageHuge(page)))
897-
return hugetlb_basepage_index(page);
898-
return page_to_index(page);
899-
}
900-
901881
/*
902882
* Return byte-offset into filesystem object for page.
903883
*/
@@ -934,24 +914,16 @@ static inline loff_t folio_file_pos(struct folio *folio)
934914

935915
/*
936916
* Get the offset in PAGE_SIZE (even for hugetlb folios).
937-
* (TODO: hugetlb folios should have ->index in PAGE_SIZE)
938917
*/
939918
static inline pgoff_t folio_pgoff(struct folio *folio)
940919
{
941-
if (unlikely(folio_test_hugetlb(folio)))
942-
return hugetlb_basepage_index(&folio->page);
943920
return folio->index;
944921
}
945922

946-
extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
947-
unsigned long address);
948-
949923
static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
950924
unsigned long address)
951925
{
952926
pgoff_t pgoff;
953-
if (unlikely(is_vm_hugetlb_page(vma)))
954-
return linear_hugepage_index(vma, address);
955927
pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
956928
pgoff += vma->vm_pgoff;
957929
return pgoff;

mm/filemap.c

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,8 @@ static void page_cache_delete(struct address_space *mapping,
131131

132132
mapping_set_update(&xas, mapping);
133133

134-
/* hugetlb pages are represented by a single entry in the xarray */
135-
if (!folio_test_hugetlb(folio)) {
136-
xas_set_order(&xas, folio->index, folio_order(folio));
137-
nr = folio_nr_pages(folio);
138-
}
134+
xas_set_order(&xas, folio->index, folio_order(folio));
135+
nr = folio_nr_pages(folio);
139136

140137
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
141138

@@ -234,7 +231,7 @@ void filemap_free_folio(struct address_space *mapping, struct folio *folio)
234231
if (free_folio)
235232
free_folio(folio);
236233

237-
if (folio_test_large(folio) && !folio_test_hugetlb(folio))
234+
if (folio_test_large(folio))
238235
refs = folio_nr_pages(folio);
239236
folio_put_refs(folio, refs);
240237
}
@@ -855,14 +852,15 @@ noinline int __filemap_add_folio(struct address_space *mapping,
855852

856853
if (!huge) {
857854
int error = mem_cgroup_charge(folio, NULL, gfp);
858-
VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
859855
if (error)
860856
return error;
861857
charged = true;
862-
xas_set_order(&xas, index, folio_order(folio));
863-
nr = folio_nr_pages(folio);
864858
}
865859

860+
VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
861+
xas_set_order(&xas, index, folio_order(folio));
862+
nr = folio_nr_pages(folio);
863+
866864
gfp &= GFP_RECLAIM_MASK;
867865
folio_ref_add(folio, nr);
868866
folio->mapping = mapping;
@@ -2040,7 +2038,7 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
20402038
int idx = folio_batch_count(fbatch) - 1;
20412039

20422040
folio = fbatch->folios[idx];
2043-
if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
2041+
if (!xa_is_value(folio))
20442042
nr = folio_nr_pages(folio);
20452043
*start = indices[idx] + nr;
20462044
}
@@ -2104,7 +2102,7 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
21042102
int idx = folio_batch_count(fbatch) - 1;
21052103

21062104
folio = fbatch->folios[idx];
2107-
if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
2105+
if (!xa_is_value(folio))
21082106
nr = folio_nr_pages(folio);
21092107
*start = indices[idx] + nr;
21102108
}
@@ -2145,9 +2143,6 @@ unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
21452143
continue;
21462144
if (!folio_batch_add(fbatch, folio)) {
21472145
unsigned long nr = folio_nr_pages(folio);
2148-
2149-
if (folio_test_hugetlb(folio))
2150-
nr = 1;
21512146
*start = folio->index + nr;
21522147
goto out;
21532148
}
@@ -2213,9 +2208,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
22132208

22142209
if (!folio_batch_add(fbatch, folio)) {
22152210
nr = folio_nr_pages(folio);
2216-
2217-
if (folio_test_hugetlb(folio))
2218-
nr = 1;
22192211
*start = folio->index + nr;
22202212
goto out;
22212213
}
@@ -2232,10 +2224,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
22322224

22332225
if (nr) {
22342226
folio = fbatch->folios[nr - 1];
2235-
if (folio_test_hugetlb(folio))
2236-
*start = folio->index + 1;
2237-
else
2238-
*start = folio_next_index(folio);
2227+
*start = folio->index + folio_nr_pages(folio);
22392228
}
22402229
out:
22412230
rcu_read_unlock();
@@ -2273,9 +2262,6 @@ unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
22732262
continue;
22742263
if (!folio_batch_add(fbatch, folio)) {
22752264
unsigned long nr = folio_nr_pages(folio);
2276-
2277-
if (folio_test_hugetlb(folio))
2278-
nr = 1;
22792265
*start = folio->index + nr;
22802266
goto out;
22812267
}

0 commit comments

Comments
 (0)