Skip to content

Commit 2d1f649

Browse files
kvaneeshakpm00
authored andcommitted
mm/memory_hotplug: support memmap_on_memory when memmap is not aligned to pageblocks
Currently, memmap_on_memory feature is only supported with memory block sizes that result in vmemmap pages covering full page blocks. This is because memory onlining/offlining code requires applicable ranges to be pageblock-aligned, for example, to set the migratetypes properly. This patch helps to lift that restriction by reserving more pages than required for vmemmap space. This helps the start address to be page block aligned with different memory block sizes. Using this facility implies the kernel will be reserving some pages for every memoryblock. This allows the memmap on memory feature to be widely useful with different memory block size values. For ex: with 64K page size and 256MiB memory block size, we require 4 pages to map vmemmap pages, To align things correctly we end up adding a reserve of 28 pages. ie, for every 4096 pages 28 pages get reserved. Link: https://lkml.kernel.org/r/20230808091501.287660-5-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Vishal Verma <vishal.l.verma@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 85a2b4b commit 2d1f649

File tree

2 files changed

+113
-19
lines changed

2 files changed

+113
-19
lines changed

Documentation/admin-guide/mm/memory-hotplug.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,18 @@ The following module parameters are currently defined:
433433
memory in a way that huge pages in bigger
434434
granularity cannot be formed on hotplugged
435435
memory.
436+
437+
With value "force" it could result in memory
438+
wastage due to memmap size limitations. For
439+
example, if the memmap for a memory block
440+
requires 1 MiB, but the pageblock size is 2
441+
MiB, 1 MiB of hotplugged memory will be wasted.
442+
Note that there are still cases where the
443+
feature cannot be enforced: for example, if the
444+
memmap is smaller than a single page, or if the
445+
architecture does not support the forced mode
446+
in all configurations.
447+
436448
``online_policy`` read-write: Set the basic policy used for
437449
automatic zone selection when onlining memory
438450
blocks without specifying a target zone.

mm/memory_hotplug.c

Lines changed: 101 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,83 @@
4141
#include "internal.h"
4242
#include "shuffle.h"
4343

44+
enum {
45+
MEMMAP_ON_MEMORY_DISABLE = 0,
46+
MEMMAP_ON_MEMORY_ENABLE,
47+
MEMMAP_ON_MEMORY_FORCE,
48+
};
49+
50+
static int memmap_mode __read_mostly = MEMMAP_ON_MEMORY_DISABLE;
51+
52+
static inline unsigned long memory_block_memmap_size(void)
53+
{
54+
return PHYS_PFN(memory_block_size_bytes()) * sizeof(struct page);
55+
}
56+
57+
static inline unsigned long memory_block_memmap_on_memory_pages(void)
58+
{
59+
unsigned long nr_pages = PFN_UP(memory_block_memmap_size());
60+
61+
/*
62+
* In "forced" memmap_on_memory mode, we add extra pages to align the
63+
* vmemmap size to cover full pageblocks. That way, we can add memory
64+
* even if the vmemmap size is not properly aligned, however, we might waste
65+
* memory.
66+
*/
67+
if (memmap_mode == MEMMAP_ON_MEMORY_FORCE)
68+
return pageblock_align(nr_pages);
69+
return nr_pages;
70+
}
71+
4472
#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
4573
/*
4674
* memory_hotplug.memmap_on_memory parameter
4775
*/
48-
static bool memmap_on_memory __ro_after_init;
49-
module_param(memmap_on_memory, bool, 0444);
50-
MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
76+
static int set_memmap_mode(const char *val, const struct kernel_param *kp)
77+
{
78+
int ret, mode;
79+
bool enabled;
80+
81+
if (sysfs_streq(val, "force") || sysfs_streq(val, "FORCE")) {
82+
mode = MEMMAP_ON_MEMORY_FORCE;
83+
} else {
84+
ret = kstrtobool(val, &enabled);
85+
if (ret < 0)
86+
return ret;
87+
if (enabled)
88+
mode = MEMMAP_ON_MEMORY_ENABLE;
89+
else
90+
mode = MEMMAP_ON_MEMORY_DISABLE;
91+
}
92+
*((int *)kp->arg) = mode;
93+
if (mode == MEMMAP_ON_MEMORY_FORCE) {
94+
unsigned long memmap_pages = memory_block_memmap_on_memory_pages();
95+
96+
pr_info_once("Memory hotplug will waste %ld pages in each memory block\n",
97+
memmap_pages - PFN_UP(memory_block_memmap_size()));
98+
}
99+
return 0;
100+
}
101+
102+
static int get_memmap_mode(char *buffer, const struct kernel_param *kp)
103+
{
104+
if (*((int *)kp->arg) == MEMMAP_ON_MEMORY_FORCE)
105+
return sprintf(buffer, "force\n");
106+
return param_get_bool(buffer, kp);
107+
}
108+
109+
static const struct kernel_param_ops memmap_mode_ops = {
110+
.set = set_memmap_mode,
111+
.get = get_memmap_mode,
112+
};
113+
module_param_cb(memmap_on_memory, &memmap_mode_ops, &memmap_mode, 0444);
114+
MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug\n"
115+
"With value \"force\" it could result in memory wastage due "
116+
"to memmap size limitations (Y/N/force)");
51117

52118
static inline bool mhp_memmap_on_memory(void)
53119
{
54-
return memmap_on_memory;
120+
return memmap_mode != MEMMAP_ON_MEMORY_DISABLE;
55121
}
56122
#else
57123
static inline bool mhp_memmap_on_memory(void)
@@ -1247,11 +1313,6 @@ static int online_memory_block(struct memory_block *mem, void *arg)
12471313
return device_online(&mem->dev);
12481314
}
12491315

1250-
static inline unsigned long memory_block_memmap_size(void)
1251-
{
1252-
return PHYS_PFN(memory_block_size_bytes()) * sizeof(struct page);
1253-
}
1254-
12551316
#ifndef arch_supports_memmap_on_memory
12561317
static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
12571318
{
@@ -1267,7 +1328,7 @@ static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
12671328
static bool mhp_supports_memmap_on_memory(unsigned long size)
12681329
{
12691330
unsigned long vmemmap_size = memory_block_memmap_size();
1270-
unsigned long remaining_size = size - vmemmap_size;
1331+
unsigned long memmap_pages = memory_block_memmap_on_memory_pages();
12711332

12721333
/*
12731334
* Besides having arch support and the feature enabled at runtime, we
@@ -1295,10 +1356,28 @@ static bool mhp_supports_memmap_on_memory(unsigned long size)
12951356
* altmap as an alternative source of memory, and we do not exactly
12961357
* populate a single PMD.
12971358
*/
1298-
return mhp_memmap_on_memory() &&
1299-
size == memory_block_size_bytes() &&
1300-
IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT)) &&
1301-
arch_supports_memmap_on_memory(vmemmap_size);
1359+
if (!mhp_memmap_on_memory() || size != memory_block_size_bytes())
1360+
return false;
1361+
1362+
/*
1363+
* Make sure the vmemmap allocation is fully contained
1364+
* so that we always allocate vmemmap memory from altmap area.
1365+
*/
1366+
if (!IS_ALIGNED(vmemmap_size, PAGE_SIZE))
1367+
return false;
1368+
1369+
/*
1370+
* start pfn should be pageblock_nr_pages aligned for correctly
1371+
* setting migrate types
1372+
*/
1373+
if (!pageblock_aligned(memmap_pages))
1374+
return false;
1375+
1376+
if (memmap_pages == PHYS_PFN(memory_block_size_bytes()))
1377+
/* No effective hotplugged memory doesn't make sense. */
1378+
return false;
1379+
1380+
return arch_supports_memmap_on_memory(vmemmap_size);
13021381
}
13031382

13041383
/*
@@ -1311,7 +1390,10 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
13111390
{
13121391
struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
13131392
enum memblock_flags memblock_flags = MEMBLOCK_NONE;
1314-
struct vmem_altmap mhp_altmap = {};
1393+
struct vmem_altmap mhp_altmap = {
1394+
.base_pfn = PHYS_PFN(res->start),
1395+
.end_pfn = PHYS_PFN(res->end),
1396+
};
13151397
struct memory_group *group = NULL;
13161398
u64 start, size;
13171399
bool new_node = false;
@@ -1356,8 +1438,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
13561438
*/
13571439
if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
13581440
if (mhp_supports_memmap_on_memory(size)) {
1359-
mhp_altmap.free = PHYS_PFN(size);
1360-
mhp_altmap.base_pfn = PHYS_PFN(start);
1441+
mhp_altmap.free = memory_block_memmap_on_memory_pages();
13611442
params.altmap = &mhp_altmap;
13621443
}
13631444
/* fallback to not using altmap */
@@ -1369,8 +1450,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
13691450
goto error;
13701451

13711452
/* create memory block devices after memory was added */
1372-
ret = create_memory_block_devices(start, size, mhp_altmap.alloc,
1373-
group);
1453+
ret = create_memory_block_devices(start, size, mhp_altmap.free, group);
13741454
if (ret) {
13751455
arch_remove_memory(start, size, NULL);
13761456
goto error;
@@ -2096,6 +2176,8 @@ static int __ref try_remove_memory(u64 start, u64 size)
20962176
* right thing if we used vmem_altmap when hot-adding
20972177
* the range.
20982178
*/
2179+
mhp_altmap.base_pfn = PHYS_PFN(start);
2180+
mhp_altmap.free = nr_vmemmap_pages;
20992181
mhp_altmap.alloc = nr_vmemmap_pages;
21002182
altmap = &mhp_altmap;
21012183
}

0 commit comments

Comments
 (0)