Skip to content

Commit d67e32f

Browse files
mjkravetzakpm00
authored andcommitted
hugetlb: restructure pool allocations
Allocation of a hugetlb page for the hugetlb pool is done by the routine alloc_pool_huge_page. This routine will allocate contiguous pages from a low level allocator, prep the pages for usage as a hugetlb page and then add the resulting hugetlb page to the pool. In the 'prep' stage, optional vmemmap optimization is done. For performance reasons we want to perform vmemmap optimization on multiple hugetlb pages at once. To do this, restructure the hugetlb pool allocation code such that vmemmap optimization can be isolated and later batched. The code to allocate hugetlb pages from bootmem was also modified to allow batching. No functional changes, only code restructure. Link: https://lkml.kernel.org/r/20231019023113.345257-3-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Reviewed-by: Muchun Song <songmuchun@bytedance.com> Tested-by: Sergey Senozhatsky <senozhatsky@chromium.org> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Barry Song <21cnbao@gmail.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: James Houghton <jthoughton@google.com> Cc: Joao Martins <joao.m.martins@oracle.com> Cc: Konrad Dybcio <konradybcio@kernel.org> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev> Cc: Oscar Salvador <osalvador@suse.de> Cc: Usama Arif <usama.arif@bytedance.com> Cc: Xiongchun Duan <duanxiongchun@bytedance.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent d2cf88c commit d67e32f

File tree

1 file changed

+141
-39
lines changed

1 file changed

+141
-39
lines changed

mm/hugetlb.c

Lines changed: 141 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1996,16 +1996,21 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
19961996
h->nr_huge_pages_node[nid]++;
19971997
}
19981998

1999-
static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
1999+
static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
20002000
{
20012001
folio_set_hugetlb(folio);
2002-
hugetlb_vmemmap_optimize(h, &folio->page);
20032002
INIT_LIST_HEAD(&folio->lru);
20042003
hugetlb_set_folio_subpool(folio, NULL);
20052004
set_hugetlb_cgroup(folio, NULL);
20062005
set_hugetlb_cgroup_rsvd(folio, NULL);
20072006
}
20082007

2008+
static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
2009+
{
2010+
init_new_hugetlb_folio(h, folio);
2011+
hugetlb_vmemmap_optimize(h, &folio->page);
2012+
}
2013+
20092014
static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid)
20102015
{
20112016
__prep_new_hugetlb_folio(h, folio);
@@ -2202,16 +2207,9 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
22022207
return page_folio(page);
22032208
}
22042209

2205-
/*
2206-
* Common helper to allocate a fresh hugetlb page. All specific allocators
2207-
* should use this function to get new hugetlb pages
2208-
*
2209-
* Note that returned page is 'frozen': ref count of head page and all tail
2210-
* pages is zero.
2211-
*/
2212-
static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
2213-
gfp_t gfp_mask, int nid, nodemask_t *nmask,
2214-
nodemask_t *node_alloc_noretry)
2210+
static struct folio *__alloc_fresh_hugetlb_folio(struct hstate *h,
2211+
gfp_t gfp_mask, int nid, nodemask_t *nmask,
2212+
nodemask_t *node_alloc_noretry)
22152213
{
22162214
struct folio *folio;
22172215
bool retry = false;
@@ -2224,6 +2222,7 @@ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
22242222
nid, nmask, node_alloc_noretry);
22252223
if (!folio)
22262224
return NULL;
2225+
22272226
if (hstate_is_gigantic(h)) {
22282227
if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) {
22292228
/*
@@ -2238,32 +2237,81 @@ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
22382237
return NULL;
22392238
}
22402239
}
2241-
prep_new_hugetlb_folio(h, folio, folio_nid(folio));
22422240

22432241
return folio;
22442242
}
22452243

2244+
static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
2245+
gfp_t gfp_mask, int nid, nodemask_t *nmask,
2246+
nodemask_t *node_alloc_noretry)
2247+
{
2248+
struct folio *folio;
2249+
2250+
folio = __alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask,
2251+
node_alloc_noretry);
2252+
if (folio)
2253+
init_new_hugetlb_folio(h, folio);
2254+
return folio;
2255+
}
2256+
22462257
/*
2247-
* Allocates a fresh page to the hugetlb allocator pool in the node interleaved
2248-
* manner.
2258+
* Common helper to allocate a fresh hugetlb page. All specific allocators
2259+
* should use this function to get new hugetlb pages
2260+
*
2261+
* Note that returned page is 'frozen': ref count of head page and all tail
2262+
* pages is zero.
22492263
*/
2250-
static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
2251-
nodemask_t *node_alloc_noretry)
2264+
static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
2265+
gfp_t gfp_mask, int nid, nodemask_t *nmask,
2266+
nodemask_t *node_alloc_noretry)
22522267
{
22532268
struct folio *folio;
2254-
int nr_nodes, node;
2269+
2270+
folio = __alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask,
2271+
node_alloc_noretry);
2272+
if (!folio)
2273+
return NULL;
2274+
2275+
prep_new_hugetlb_folio(h, folio, folio_nid(folio));
2276+
return folio;
2277+
}
2278+
2279+
static void prep_and_add_allocated_folios(struct hstate *h,
2280+
struct list_head *folio_list)
2281+
{
2282+
unsigned long flags;
2283+
struct folio *folio, *tmp_f;
2284+
2285+
/* Add all new pool pages to free lists in one lock cycle */
2286+
spin_lock_irqsave(&hugetlb_lock, flags);
2287+
list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
2288+
__prep_account_new_huge_page(h, folio_nid(folio));
2289+
enqueue_hugetlb_folio(h, folio);
2290+
}
2291+
spin_unlock_irqrestore(&hugetlb_lock, flags);
2292+
}
2293+
2294+
/*
2295+
* Allocates a fresh hugetlb page in a node interleaved manner. The page
2296+
* will later be added to the appropriate hugetlb pool.
2297+
*/
2298+
static struct folio *alloc_pool_huge_folio(struct hstate *h,
2299+
nodemask_t *nodes_allowed,
2300+
nodemask_t *node_alloc_noretry)
2301+
{
22552302
gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
2303+
int nr_nodes, node;
22562304

22572305
for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
2258-
folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node,
2306+
struct folio *folio;
2307+
2308+
folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, node,
22592309
nodes_allowed, node_alloc_noretry);
2260-
if (folio) {
2261-
free_huge_folio(folio); /* free it into the hugepage allocator */
2262-
return 1;
2263-
}
2310+
if (folio)
2311+
return folio;
22642312
}
22652313

2266-
return 0;
2314+
return NULL;
22672315
}
22682316

22692317
/*
@@ -3302,25 +3350,35 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio,
33023350
*/
33033351
static void __init gather_bootmem_prealloc(void)
33043352
{
3353+
LIST_HEAD(folio_list);
33053354
struct huge_bootmem_page *m;
3355+
struct hstate *h = NULL, *prev_h = NULL;
33063356

33073357
list_for_each_entry(m, &huge_boot_pages, list) {
33083358
struct page *page = virt_to_page(m);
33093359
struct folio *folio = (void *)page;
3310-
struct hstate *h = m->hstate;
3360+
3361+
h = m->hstate;
3362+
/*
3363+
* It is possible to have multiple huge page sizes (hstates)
3364+
* in this list. If so, process each size separately.
3365+
*/
3366+
if (h != prev_h && prev_h != NULL)
3367+
prep_and_add_allocated_folios(prev_h, &folio_list);
3368+
prev_h = h;
33113369

33123370
VM_BUG_ON(!hstate_is_gigantic(h));
33133371
WARN_ON(folio_ref_count(folio) != 1);
33143372

33153373
hugetlb_folio_init_vmemmap(folio, h,
33163374
HUGETLB_VMEMMAP_RESERVE_PAGES);
3317-
prep_new_hugetlb_folio(h, folio, folio_nid(folio));
3375+
__prep_new_hugetlb_folio(h, folio);
33183376
/* If HVO fails, initialize all tail struct pages */
33193377
if (!HPageVmemmapOptimized(&folio->page))
33203378
hugetlb_folio_init_tail_vmemmap(folio,
33213379
HUGETLB_VMEMMAP_RESERVE_PAGES,
33223380
pages_per_huge_page(h));
3323-
free_huge_folio(folio); /* add to the hugepage allocator */
3381+
list_add(&folio->lru, &folio_list);
33243382

33253383
/*
33263384
* We need to restore the 'stolen' pages to totalram_pages
@@ -3330,6 +3388,8 @@ static void __init gather_bootmem_prealloc(void)
33303388
adjust_managed_page_count(page, pages_per_huge_page(h));
33313389
cond_resched();
33323390
}
3391+
3392+
prep_and_add_allocated_folios(h, &folio_list);
33333393
}
33343394

33353395
static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
@@ -3363,9 +3423,22 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
33633423
h->max_huge_pages_node[nid] = i;
33643424
}
33653425

3426+
/*
3427+
* NOTE: this routine is called in different contexts for gigantic and
3428+
* non-gigantic pages.
3429+
* - For gigantic pages, this is called early in the boot process and
3430+
* pages are allocated from memblock allocated or something similar.
3431+
* Gigantic pages are actually added to pools later with the routine
3432+
* gather_bootmem_prealloc.
3433+
* - For non-gigantic pages, this is called later in the boot process after
3434+
* all of mm is up and functional. Pages are allocated from buddy and
3435+
* then added to hugetlb pools.
3436+
*/
33663437
static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
33673438
{
33683439
unsigned long i;
3440+
struct folio *folio;
3441+
LIST_HEAD(folio_list);
33693442
nodemask_t *node_alloc_noretry;
33703443
bool node_specific_alloc = false;
33713444

@@ -3407,14 +3480,25 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
34073480

34083481
for (i = 0; i < h->max_huge_pages; ++i) {
34093482
if (hstate_is_gigantic(h)) {
3483+
/*
3484+
* gigantic pages not added to list as they are not
3485+
* added to pools now.
3486+
*/
34103487
if (!alloc_bootmem_huge_page(h, NUMA_NO_NODE))
34113488
break;
3412-
} else if (!alloc_pool_huge_page(h,
3413-
&node_states[N_MEMORY],
3414-
node_alloc_noretry))
3415-
break;
3489+
} else {
3490+
folio = alloc_pool_huge_folio(h, &node_states[N_MEMORY],
3491+
node_alloc_noretry);
3492+
if (!folio)
3493+
break;
3494+
list_add(&folio->lru, &folio_list);
3495+
}
34163496
cond_resched();
34173497
}
3498+
3499+
/* list will be empty if hstate_is_gigantic */
3500+
prep_and_add_allocated_folios(h, &folio_list);
3501+
34183502
if (i < h->max_huge_pages) {
34193503
char buf[32];
34203504

@@ -3548,7 +3632,9 @@ static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
35483632
static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
35493633
nodemask_t *nodes_allowed)
35503634
{
3551-
unsigned long min_count, ret;
3635+
unsigned long min_count;
3636+
unsigned long allocated;
3637+
struct folio *folio;
35523638
LIST_HEAD(page_list);
35533639
NODEMASK_ALLOC(nodemask_t, node_alloc_noretry, GFP_KERNEL);
35543640

@@ -3625,7 +3711,8 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
36253711
break;
36263712
}
36273713

3628-
while (count > persistent_huge_pages(h)) {
3714+
allocated = 0;
3715+
while (count > (persistent_huge_pages(h) + allocated)) {
36293716
/*
36303717
* If this allocation races such that we no longer need the
36313718
* page, free_huge_folio will handle it by freeing the page
@@ -3636,15 +3723,32 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
36363723
/* yield cpu to avoid soft lockup */
36373724
cond_resched();
36383725

3639-
ret = alloc_pool_huge_page(h, nodes_allowed,
3726+
folio = alloc_pool_huge_folio(h, nodes_allowed,
36403727
node_alloc_noretry);
3641-
spin_lock_irq(&hugetlb_lock);
3642-
if (!ret)
3728+
if (!folio) {
3729+
prep_and_add_allocated_folios(h, &page_list);
3730+
spin_lock_irq(&hugetlb_lock);
36433731
goto out;
3732+
}
3733+
3734+
list_add(&folio->lru, &page_list);
3735+
allocated++;
36443736

36453737
/* Bail for signals. Probably ctrl-c from user */
3646-
if (signal_pending(current))
3738+
if (signal_pending(current)) {
3739+
prep_and_add_allocated_folios(h, &page_list);
3740+
spin_lock_irq(&hugetlb_lock);
36473741
goto out;
3742+
}
3743+
3744+
spin_lock_irq(&hugetlb_lock);
3745+
}
3746+
3747+
/* Add allocated pages to the pool */
3748+
if (!list_empty(&page_list)) {
3749+
spin_unlock_irq(&hugetlb_lock);
3750+
prep_and_add_allocated_folios(h, &page_list);
3751+
spin_lock_irq(&hugetlb_lock);
36483752
}
36493753

36503754
/*
@@ -3670,8 +3774,6 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
36703774
* Collect pages to be removed on list without dropping lock
36713775
*/
36723776
while (min_count < persistent_huge_pages(h)) {
3673-
struct folio *folio;
3674-
36753777
folio = remove_pool_hugetlb_folio(h, nodes_allowed, 0);
36763778
if (!folio)
36773779
break;

0 commit comments

Comments
 (0)