Skip to content

Commit

Permalink
amd64: store pcids pmap data in pcpu zone
Browse files Browse the repository at this point in the history
This change eliminates the struct pmap_pcid array embedded into struct
pmap and sized by MAXCPU, which would bloat with MAXCPU increase.  Also
it removes false sharing of cache lines, since the array elements are
mostly locally accessed by corresponding CPUs.

Suggested by:	mjg
Reviewed by:	markj
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D39890
  • Loading branch information
kostikbel committed May 2, 2023
1 parent 9c8cbf3 commit 42f722e
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 12 deletions.
38 changes: 29 additions & 9 deletions sys/amd64/amd64/pmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,15 @@ pmap_pku_mask_bit(pmap_t pmap)
#define VM_PAGE_TO_PV_LIST_LOCK(m) \
PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))

struct pmap kernel_pmap_store;
/*
* Statically allocate kernel pmap memory. However, memory for
* pm_pcids is obtained after the dynamic allocator is operational.
* Initialize it with a non-canonical pointer to catch early accesses
* regardless of the active mapping.
*/
struct pmap kernel_pmap_store = {
.pm_pcidp = (void *)0xdeadbeefdeadbeef,
};

vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
Expand Down Expand Up @@ -2026,9 +2034,14 @@ pmap_bootstrap(vm_paddr_t *firstaddr)

/* Initialize TLB Context Id. */
if (pmap_pcid_enabled) {
kernel_pmap->pm_pcidp = (void *)(uintptr_t)
offsetof(struct pcpu, pc_kpmap_store);
for (i = 0; i < MAXCPU; i++) {
kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN;
kernel_pmap->pm_pcids[i].pm_gen = 1;
struct pmap_pcid *pcidp;

pcidp = zpcpu_get_cpu(kernel_pmap->pm_pcidp, i);
pcidp->pm_pcid = PMAP_PCID_KERN;
pcidp->pm_gen = 1;
}

/*
Expand Down Expand Up @@ -3031,6 +3044,7 @@ pmap_invalidate_ept(pmap_t pmap)
static inline void
pmap_invalidate_preipi_pcid(pmap_t pmap)
{
struct pmap_pcid *pcidp;
u_int cpuid, i;

sched_pin();
Expand All @@ -3040,8 +3054,10 @@ pmap_invalidate_preipi_pcid(pmap_t pmap)
cpuid = 0xffffffff; /* An impossible value */

CPU_FOREACH(i) {
if (cpuid != i)
pmap->pm_pcids[i].pm_gen = 0;
if (cpuid != i) {
pcidp = zpcpu_get_cpu(pmap->pm_pcidp, i);
pcidp->pm_gen = 0;
}
}

/*
Expand Down Expand Up @@ -4192,7 +4208,7 @@ pmap_pinit_pcids(pmap_t pmap, uint32_t pcid, int gen)
int i;

CPU_FOREACH(i) {
pcidp = &pmap->pm_pcids[i];
pcidp = zpcpu_get_cpu(pmap->pm_pcidp, i);
pcidp->pm_pcid = pcid;
pcidp->pm_gen = gen;
}
Expand All @@ -4215,6 +4231,7 @@ pmap_pinit0(pmap_t pmap)
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
pmap->pm_flags = pmap_flags;
pmap->pm_pcidp = uma_zalloc_pcpu(pcpu_zone_8, M_WAITOK);
pmap_pinit_pcids(pmap, PMAP_PCID_KERN + 1, 1);
pmap_activate_boot(pmap);
td = curthread;
Expand Down Expand Up @@ -4398,6 +4415,9 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
pmap->pm_pmltop = (pml5_entry_t *)PHYS_TO_DMAP(pmltop_phys);

if (pmap_pcid_enabled) {
if (pmap->pm_pcidp == NULL)
pmap->pm_pcidp = uma_zalloc_pcpu(pcpu_zone_8,
M_WAITOK);
pmap_pinit_pcids(pmap, PMAP_PCID_NONE, 0);
}
pmap->pm_cr3 = PMAP_NO_CR3; /* initialize to an invalid value */
Expand Down Expand Up @@ -9993,11 +10013,11 @@ pmap_activate_sw_pcid_pti(struct thread *td, pmap_t pmap, u_int cpuid)
PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK);
old_pmap = PCPU_GET(curpmap);
MPASS(old_pmap->pm_ucr3 != PMAP_NO_CR3);
old_pcidp = &old_pmap->pm_pcids[cpuid];
old_pcidp = zpcpu_get_cpu(old_pmap->pm_pcidp, cpuid);
old_pcidp->pm_gen = 0;
}

pcidp = &pmap->pm_pcids[cpuid];
pcidp = zpcpu_get_cpu(pmap->pm_pcidp, cpuid);
cached = pmap_pcid_alloc_checked(pmap, pcidp);
cr3 = rcr3();
if ((cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
Expand Down Expand Up @@ -10027,7 +10047,7 @@ pmap_activate_sw_pcid_nopti(struct thread *td __unused, pmap_t pmap,
KASSERT((read_rflags() & PSL_I) == 0,
("PCID needs interrupts disabled in pmap_activate_sw()"));

pcidp = &pmap->pm_pcids[cpuid];
pcidp = zpcpu_get_cpu(pmap->pm_pcidp, cpuid);
cached = pmap_pcid_alloc_checked(pmap, pcidp);
cr3 = rcr3();
if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
Expand Down
4 changes: 3 additions & 1 deletion sys/amd64/include/pcpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#ifndef _MACHINE_PCPU_H_
#define _MACHINE_PCPU_H_

#include <machine/_pmap.h>
#include <machine/segments.h>
#include <machine/tss.h>

Expand Down Expand Up @@ -101,7 +102,8 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
uint64_t pc_ucr3_load_mask; \
u_int pc_small_core; \
u_int pc_pcid_invlpg_workaround; \
char __pad[2908] /* pad to UMA_PCPU_ALLOC_SIZE */
struct pmap_pcid pc_kpmap_store; \
char __pad[2900] /* pad to UMA_PCPU_ALLOC_SIZE */

#define PC_DBREG_CMD_NONE 0
#define PC_DBREG_CMD_LOAD 1
Expand Down
8 changes: 6 additions & 2 deletions sys/amd64/include/pmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@

#ifndef LOCORE

#include <sys/kassert.h>
#include <sys/queue.h>
#include <sys/_cpuset.h>
#include <sys/_lock.h>
Expand Down Expand Up @@ -390,7 +391,7 @@ struct pmap {
long pm_eptgen; /* EPT pmap generation id */
smr_t pm_eptsmr;
int pm_flags;
struct pmap_pcid pm_pcids[MAXCPU];
struct pmap_pcid *pm_pcidp;
struct rangeset pm_pkru;
};

Expand Down Expand Up @@ -537,8 +538,11 @@ pmap_invlpg(pmap_t pmap, vm_offset_t va)
static __inline uint32_t
pmap_get_pcid(pmap_t pmap)
{
struct pmap_pcid *pcidp;

MPASS(pmap_pcid_enabled);
return (pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid);
pcidp = zpcpu_get(pmap->pm_pcidp);
return (pcidp->pm_pcid);
}

#endif /* _KERNEL */
Expand Down

0 comments on commit 42f722e

Please sign in to comment.