18 changes: 11 additions & 7 deletions Documentation/x86/x86_64/mm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
... unused hole ...
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
... unused hole ...
fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
vaddr_end for KASLR
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
Expand All @@ -37,13 +38,15 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ...
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
vaddr_end for KASLR
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
... unused hole ...
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
Expand All @@ -67,9 +70,10 @@ memory window (this size is arbitrary, it can be raised later if needed).
The mappings are not part of any other kernel PGD and are only available
during EFI runtime calls.

The module mapping space size changes based on the CONFIG requirements for the
following fixmap section.

Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time.

Be very careful vs. KASLR when changing anything here. The KASLR address
range must not overlap with anything except the KASAN shadow area, which is
correct as KASAN disables KASLR.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make

# Invoke a second make in the output directory, passing relevant variables
sub-make:
$(Q)$(MAKE) -C $(KBUILD_OUTPUT) KBUILD_SRC=$(CURDIR) \
$(Q)$(MAKE) -C $(KBUILD_OUTPUT) \
KBUILD_SRC=$(shell realpath --relative-to=$(KBUILD_OUTPUT) $(CURDIR)) \
-f $(CURDIR)/Makefile $(filter-out _all sub-make,$(MAKECMDGOALS))

# Leave processing to above invocation of make
Expand Down
5 changes: 5 additions & 0 deletions arch/arm64/kernel/efi-header.S
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ section_table:

.set section_count, (. - section_table) / 40

/* CoreOS 64 byte verity hash value. */
.org _head + 512
.ascii "verity-hash"
.org _head + 512 + 64

#ifdef CONFIG_DEBUG_EFI
/*
* The debug table is referenced via its Relative Virtual Address (RVA),
Expand Down
13 changes: 6 additions & 7 deletions arch/x86/entry/entry_64_compat.S
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,13 @@ ENTRY(entry_SYSCALL_compat)
/* Interrupts are off on entry. */
swapgs

/* Stash user ESP and switch to the kernel stack. */
/* Stash user ESP */
movl %esp, %r8d

/* Use %rsp as scratch reg. User ESP is stashed in r8 */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp

/* Switch to the kernel stack */
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp

/* Construct struct pt_regs on stack */
Expand Down Expand Up @@ -219,12 +224,6 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq $0 /* pt_regs->r14 = 0 */
pushq $0 /* pt_regs->r15 = 0 */

/*
* We just saved %rdi so it is safe to clobber. It is not
* preserved during the C calls inside TRACE_IRQS_OFF anyway.
*/
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi

/*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
Expand Down
16 changes: 16 additions & 0 deletions arch/x86/events/intel/ds.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/insn.h>

#include "../perf_event.h"
Expand Down Expand Up @@ -283,20 +284,35 @@ static DEFINE_PER_CPU(void *, insn_buffer);

static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{
unsigned long start = (unsigned long)cea;
phys_addr_t pa;
size_t msz = 0;

pa = virt_to_phys(addr);

preempt_disable();
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, pa, prot);

/*
* This is a cross-CPU update of the cpu_entry_area, we must shoot down
* all TLB entries for it.
*/
flush_tlb_kernel_range(start, start + size);
preempt_enable();
}

static void ds_clear_cea(void *cea, size_t size)
{
unsigned long start = (unsigned long)cea;
size_t msz = 0;

preempt_disable();
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, 0, PAGE_NONE);

flush_tlb_kernel_range(start, start + size);
preempt_enable();
}

static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
Expand Down
14 changes: 10 additions & 4 deletions arch/x86/include/asm/pgtable_64_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,13 @@ typedef struct { pteval_t pte; } pte_t;
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))

/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
/*
* See Documentation/x86/x86_64/mm.txt for a description of the memory map.
*
* Be very careful vs. KASLR when changing anything here. The KASLR address
* range must not overlap with anything except the KASAN shadow area, which
* is correct as KASAN disables KASLR.
*/
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)

#ifdef CONFIG_X86_5LEVEL
Expand All @@ -88,7 +94,7 @@ typedef struct { pteval_t pte; } pte_t;
# define VMALLOC_SIZE_TB _AC(32, UL)
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
# define LDT_PGD_ENTRY _AC(-4, UL)
# define LDT_PGD_ENTRY _AC(-3, UL)
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif

Expand All @@ -104,13 +110,13 @@ typedef struct { pteval_t pte; } pte_t;

#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)

#define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)

#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
#define CPU_ENTRY_AREA_PGD _AC(-4, UL)
#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)

#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kernel/cpu/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -899,8 +899,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)

setup_force_cpu_cap(X86_FEATURE_ALWAYS);

/* Assume for now that ALL x86 CPUs are insecure */
setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
if (c->x86_vendor != X86_VENDOR_AMD)
setup_force_cpu_bug(X86_BUG_CPU_INSECURE);

fpu__init_system(c);

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
.x86_tss = {
/*
* .sp0 is only used when entering ring 0 from a lower
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/mm/dump_pagetables.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ enum address_markers_idx {
KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR,
#endif
CPU_ENTRY_AREA_NR,
#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
LDT_NR,
#endif
CPU_ENTRY_AREA_NR,
#ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR,
#endif
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.next_asid = 1,
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
EXPORT_SYMBOL_GPL(cpu_tlbstate);
EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);

void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{
Expand Down
32 changes: 9 additions & 23 deletions arch/x86/mm/kaslr.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,14 @@
#define TB_SHIFT 40

/*
* Virtual address start and end range for randomization. The end changes base
* on configuration to have the highest amount of space for randomization.
* It increases the possible random position for each randomized region.
* Virtual address start and end range for randomization.
*
* You need to add an if/def entry if you introduce a new memory region
* compatible with KASLR. Your entry must be in logical order with memory
* layout. For example, ESPFIX is before EFI because its virtual address is
* before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
* ensure that this order is correct and won't be changed.
* The end address could depend on more configuration options to make the
* highest amount of space for randomization available, but that's too hard
* to keep straight and caused issues already.
*/
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;

#if defined(CONFIG_X86_ESPFIX64)
static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
#elif defined(CONFIG_EFI)
static const unsigned long vaddr_end = EFI_VA_END;
#else
static const unsigned long vaddr_end = __START_KERNEL_map;
#endif
static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;

/* Default values */
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
Expand Down Expand Up @@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void)
unsigned long remain_entropy;

/*
* All these BUILD_BUG_ON checks ensures the memory layout is
* consistent with the vaddr_start/vaddr_end variables.
* These BUILD_BUG_ON checks ensure the memory layout is consistent
* with the vaddr_start/vaddr_end variables. These checks are very
* limited....
*/
BUILD_BUG_ON(vaddr_start >= vaddr_end);
BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
vaddr_end >= EFI_VA_END);
BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
IS_ENABLED(CONFIG_EFI)) &&
vaddr_end >= __START_KERNEL_map);
BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);

if (!kaslr_memory_enabled())
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/mm/pti.c
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,8 @@ static void __init pti_setup_espfix64(void)
static void __init pti_clone_entry_text(void)
{
pti_clone_pmds((unsigned long) __entry_text_start,
(unsigned long) __irqentry_text_end, _PAGE_RW);
(unsigned long) __irqentry_text_end,
_PAGE_RW | _PAGE_GLOBAL);
}

/*
Expand Down
Loading