Skip to content

Commit 7dcf594

Browse files
rpptSasha Levin
authored andcommitted
x86/efi: defer freeing of boot services memory
commit a4b0bf6 upstream. efi_free_boot_services() frees memory occupied by EFI_BOOT_SERVICES_CODE and EFI_BOOT_SERVICES_DATA using memblock_free_late(). There are two issue with that: memblock_free_late() should be used for memory allocated with memblock_alloc() while the memory reserved with memblock_reserve() should be freed with free_reserved_area(). More acutely, with CONFIG_DEFERRED_STRUCT_PAGE_INIT=y efi_free_boot_services() is called before deferred initialization of the memory map is complete. Benjamin Herrenschmidt reports that this causes a leak of ~140MB of RAM on EC2 t3a.nano instances which only have 512MB or RAM. If the freed memory resides in the areas that memory map for them is still uninitialized, they won't be actually freed because memblock_free_late() calls memblock_free_pages() and the latter skips uninitialized pages. Using free_reserved_area() at this point is also problematic because __free_page() accesses the buddy of the freed page and that again might end up in uninitialized part of the memory map. Delaying the entire efi_free_boot_services() could be problematic because in addition to freeing boot services memory it updates efi.memmap without any synchronization and that's undesirable late in boot when there is concurrency. More robust approach is to only defer freeing of the EFI boot services memory. Split efi_free_boot_services() in two. First efi_unmap_boot_services() collects ranges that should be freed into an array then efi_free_boot_services() later frees them after deferred init is complete. Link: https://lore.kernel.org/all/ec2aaef14783869b3be6e3c253b2dcbf67dbc12a.camel@kernel.crashing.org Fixes: 916f676 ("x86, efi: Retain boot service code until after switching to virtual mode") Cc: <stable@vger.kernel.org> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> Reviewed-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent efe0b0b commit 7dcf594

File tree

4 files changed

+55
-6
lines changed

4 files changed

+55
-6
lines changed

arch/x86/include/asm/efi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ extern void __init efi_apply_memmap_quirks(void);
138138
extern int __init efi_reuse_config(u64 tables, int nr_tables);
139139
extern void efi_delete_dummy_variable(void);
140140
extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
141-
extern void efi_free_boot_services(void);
141+
extern void efi_unmap_boot_services(void);
142142

143143
void arch_efi_call_virt_setup(void);
144144
void arch_efi_call_virt_teardown(void);

arch/x86/platform/efi/efi.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,7 @@ static void __init __efi_enter_virtual_mode(void)
837837
}
838838

839839
efi_check_for_embedded_firmwares();
840-
efi_free_boot_services();
840+
efi_unmap_boot_services();
841841

842842
if (!efi_is_mixed())
843843
efi_native_runtime_setup();

arch/x86/platform/efi/quirks.c

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ void __init efi_reserve_boot_services(void)
341341

342342
/*
343343
* Because the following memblock_reserve() is paired
344-
* with memblock_free_late() for this region in
344+
* with free_reserved_area() for this region in
345345
* efi_free_boot_services(), we must be extremely
346346
* careful not to reserve, and subsequently free,
347347
* critical regions of memory (like the kernel image) or
@@ -404,17 +404,33 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
404404
pr_err("Failed to unmap VA mapping for 0x%llx\n", va);
405405
}
406406

407-
void __init efi_free_boot_services(void)
407+
struct efi_freeable_range {
408+
u64 start;
409+
u64 end;
410+
};
411+
412+
static struct efi_freeable_range *ranges_to_free;
413+
414+
void __init efi_unmap_boot_services(void)
408415
{
409416
struct efi_memory_map_data data = { 0 };
410417
efi_memory_desc_t *md;
411418
int num_entries = 0;
419+
int idx = 0;
420+
size_t sz;
412421
void *new, *new_md;
413422

414423
/* Keep all regions for /sys/kernel/debug/efi */
415424
if (efi_enabled(EFI_DBG))
416425
return;
417426

427+
sz = sizeof(*ranges_to_free) * efi.memmap.nr_map + 1;
428+
ranges_to_free = kzalloc(sz, GFP_KERNEL);
429+
if (!ranges_to_free) {
430+
pr_err("Failed to allocate storage for freeable EFI regions\n");
431+
return;
432+
}
433+
418434
for_each_efi_memory_desc(md) {
419435
unsigned long long start = md->phys_addr;
420436
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -471,7 +487,15 @@ void __init efi_free_boot_services(void)
471487
start = SZ_1M;
472488
}
473489

474-
memblock_free_late(start, size);
490+
/*
491+
* With CONFIG_DEFERRED_STRUCT_PAGE_INIT parts of the memory
492+
* map are still not initialized and we can't reliably free
493+
* memory here.
494+
* Queue the ranges to free at a later point.
495+
*/
496+
ranges_to_free[idx].start = start;
497+
ranges_to_free[idx].end = start + size;
498+
idx++;
475499
}
476500

477501
if (!num_entries)
@@ -512,6 +536,31 @@ void __init efi_free_boot_services(void)
512536
}
513537
}
514538

539+
static int __init efi_free_boot_services(void)
540+
{
541+
struct efi_freeable_range *range = ranges_to_free;
542+
unsigned long freed = 0;
543+
544+
if (!ranges_to_free)
545+
return 0;
546+
547+
while (range->start) {
548+
void *start = phys_to_virt(range->start);
549+
void *end = phys_to_virt(range->end);
550+
551+
free_reserved_area(start, end, -1, NULL);
552+
freed += (end - start);
553+
range++;
554+
}
555+
kfree(ranges_to_free);
556+
557+
if (freed)
558+
pr_info("Freeing EFI boot services memory: %ldK\n", freed / SZ_1K);
559+
560+
return 0;
561+
}
562+
arch_initcall(efi_free_boot_services);
563+
515564
/*
516565
* A number of config table entries get remapped to virtual addresses
517566
* after entering EFI virtual mode. However, the kexec kernel requires

drivers/firmware/efi/mokvar-table.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ static struct kobject *mokvar_kobj;
8585
* as an alternative to ordinary EFI variables, due to platform-dependent
8686
* limitations. The memory occupied by this table is marked as reserved.
8787
*
88-
* This routine must be called before efi_free_boot_services() in order
88+
* This routine must be called before efi_unmap_boot_services() in order
8989
* to guarantee that it can mark the table as reserved.
9090
*
9191
* Implicit inputs:

0 commit comments

Comments
 (0)