Skip to content

Commit 4f128f8

Browse files
Marc Zyngieroupton
authored andcommitted
KVM: arm64: nv: Support multiple nested Stage-2 mmu structures
Add Stage-2 mmu data structures for virtual EL2 and for nested guests. We don't yet populate shadow Stage-2 page tables, but we now have a framework for getting to a shadow Stage-2 pgd. We allocate twice the number of vcpus as Stage-2 mmu structures because that's sufficient for each vcpu running two translation regimes without having to flush the Stage-2 page tables. Co-developed-by: Christoffer Dall <christoffer.dall@arm.com> Signed-off-by: Christoffer Dall <christoffer.dall@arm.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20240614144552.2773592-2-maz@kernel.org Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
1 parent 83a7eef commit 4f128f8

File tree

7 files changed

+349
-21
lines changed

7 files changed

+349
-21
lines changed

arch/arm64/include/asm/kvm_host.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,33 @@ struct kvm_s2_mmu {
189189
uint64_t split_page_chunk_size;
190190

191191
struct kvm_arch *arch;
192+
193+
/*
194+
* For a shadow stage-2 MMU, the virtual vttbr used by the
195+
* host to parse the guest S2.
196+
* This either contains:
197+
* - the virtual VTTBR programmed by the guest hypervisor with
198+
* CnP cleared
199+
* - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid
200+
*
201+
* We also cache the full VTCR which gets used for TLB invalidation,
202+
* taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted
203+
* to be cached in a TLB" to the letter.
204+
*/
205+
u64 tlb_vttbr;
206+
u64 tlb_vtcr;
207+
208+
/*
209+
* true when this represents a nested context where virtual
210+
* HCR_EL2.VM == 1
211+
*/
212+
bool nested_stage2_enabled;
213+
214+
/*
215+
* 0: Nobody is currently using this, check vttbr for validity
216+
* >0: Somebody is actively using this.
217+
*/
218+
atomic_t refcnt;
192219
};
193220

194221
struct kvm_arch_memory_slot {
@@ -256,6 +283,14 @@ struct kvm_arch {
256283
*/
257284
u64 fgu[__NR_FGT_GROUP_IDS__];
258285

286+
/*
287+
* Stage 2 paging state for VMs with nested S2 using a virtual
288+
* VMID.
289+
*/
290+
struct kvm_s2_mmu *nested_mmus;
291+
size_t nested_mmus_size;
292+
int nested_mmus_next;
293+
259294
/* Interrupt controller */
260295
struct vgic_dist vgic;
261296

@@ -1306,6 +1341,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
13061341
void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu);
13071342

13081343
int __init kvm_set_ipa_limit(void);
1344+
u32 kvm_get_pa_bits(struct kvm *kvm);
13091345

13101346
#define __KVM_HAVE_ARCH_VM_ALLOC
13111347
struct kvm *kvm_arch_alloc_vm(void);

arch/arm64/include/asm/kvm_mmu.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ alternative_cb_end
9898
#include <asm/mmu_context.h>
9999
#include <asm/kvm_emulate.h>
100100
#include <asm/kvm_host.h>
101+
#include <asm/kvm_nested.h>
101102

102103
void kvm_update_va_mask(struct alt_instr *alt,
103104
__le32 *origptr, __le32 *updptr, int nr_inst);
@@ -165,6 +166,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
165166
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
166167
void __init free_hyp_pgds(void);
167168

169+
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size);
170+
168171
void stage2_unmap_vm(struct kvm *kvm);
169172
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
170173
void kvm_uninit_stage2_mmu(struct kvm *kvm);
@@ -326,5 +329,26 @@ static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
326329
{
327330
return container_of(mmu->arch, struct kvm, arch);
328331
}
332+
333+
static inline u64 get_vmid(u64 vttbr)
334+
{
335+
return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >>
336+
VTTBR_VMID_SHIFT;
337+
}
338+
339+
static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu)
340+
{
341+
return !(mmu->tlb_vttbr & VTTBR_CNP_BIT);
342+
}
343+
344+
static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
345+
{
346+
/*
347+
* Be careful, mmu may not be fully initialised so do look at
348+
* *any* of its fields.
349+
*/
350+
return &kvm->arch.mmu != mmu;
351+
}
352+
329353
#endif /* __ASSEMBLY__ */
330354
#endif /* __ARM64_KVM_MMU_H__ */

arch/arm64/include/asm/kvm_nested.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
6161
}
6262

6363
extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
64+
extern void kvm_init_nested(struct kvm *kvm);
65+
extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
66+
extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
67+
extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
68+
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
69+
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
6470

6571
int kvm_init_nv_sysregs(struct kvm *kvm);
6672

arch/arm64/kvm/arm.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
170170
mutex_unlock(&kvm->lock);
171171
#endif
172172

173+
kvm_init_nested(kvm);
174+
173175
ret = kvm_share_hyp(kvm, kvm + 1);
174176
if (ret)
175177
return ret;
@@ -551,6 +553,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
551553
struct kvm_s2_mmu *mmu;
552554
int *last_ran;
553555

556+
if (vcpu_has_nv(vcpu))
557+
kvm_vcpu_load_hw_mmu(vcpu);
558+
554559
mmu = vcpu->arch.hw_mmu;
555560
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
556561

@@ -601,6 +606,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
601606
kvm_timer_vcpu_put(vcpu);
602607
kvm_vgic_put(vcpu);
603608
kvm_vcpu_pmu_restore_host(vcpu);
609+
if (vcpu_has_nv(vcpu))
610+
kvm_vcpu_put_hw_mmu(vcpu);
604611
kvm_arm_vmid_clear_active();
605612

606613
vcpu_clear_on_unsupported_cpu(vcpu);
@@ -1459,6 +1466,10 @@ static int kvm_setup_vcpu(struct kvm_vcpu *vcpu)
14591466
if (kvm_vcpu_has_pmu(vcpu) && !kvm->arch.arm_pmu)
14601467
ret = kvm_arm_set_default_pmu(kvm);
14611468

1469+
/* Prepare for nested if required */
1470+
if (!ret && vcpu_has_nv(vcpu))
1471+
ret = kvm_vcpu_init_nested(vcpu);
1472+
14621473
return ret;
14631474
}
14641475

arch/arm64/kvm/mmu.c

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
328328
may_block));
329329
}
330330

331-
static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
331+
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
332332
{
333333
__unmap_stage2_range(mmu, start, size, true);
334334
}
@@ -855,21 +855,9 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
855855
.icache_inval_pou = invalidate_icache_guest_page,
856856
};
857857

858-
/**
859-
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
860-
* @kvm: The pointer to the KVM structure
861-
* @mmu: The pointer to the s2 MMU structure
862-
* @type: The machine type of the virtual machine
863-
*
864-
* Allocates only the stage-2 HW PGD level table(s).
865-
* Note we don't need locking here as this is only called when the VM is
866-
* created, which can only be done once.
867-
*/
868-
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
858+
static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
869859
{
870860
u32 kvm_ipa_limit = get_kvm_ipa_limit();
871-
int cpu, err;
872-
struct kvm_pgtable *pgt;
873861
u64 mmfr0, mmfr1;
874862
u32 phys_shift;
875863

@@ -896,11 +884,51 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
896884
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
897885
mmu->vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
898886

887+
return 0;
888+
}
889+
890+
/**
891+
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
892+
* @kvm: The pointer to the KVM structure
893+
* @mmu: The pointer to the s2 MMU structure
894+
* @type: The machine type of the virtual machine
895+
*
896+
* Allocates only the stage-2 HW PGD level table(s).
897+
* Note we don't need locking here as this is only called in two cases:
898+
*
899+
* - when the VM is created, which can't race against anything
900+
*
901+
* - when secondary kvm_s2_mmu structures are initialised for NV
902+
* guests, and the caller must hold kvm->lock as this is called on a
903+
* per-vcpu basis.
904+
*/
905+
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
906+
{
907+
int cpu, err;
908+
struct kvm_pgtable *pgt;
909+
910+
/*
911+
* If we already have our page tables in place, and that the
912+
* MMU context is the canonical one, we have a bug somewhere,
913+
* as this is only supposed to ever happen once per VM.
914+
*
915+
* Otherwise, we're building nested page tables, and that's
916+
* probably because userspace called KVM_ARM_VCPU_INIT more
917+
* than once on the same vcpu. Since that's actually legal,
918+
* don't kick a fuss and leave gracefully.
919+
*/
899920
if (mmu->pgt != NULL) {
921+
if (kvm_is_nested_s2_mmu(kvm, mmu))
922+
return 0;
923+
900924
kvm_err("kvm_arch already initialized?\n");
901925
return -EINVAL;
902926
}
903927

928+
err = kvm_init_ipa_range(mmu, type);
929+
if (err)
930+
return err;
931+
904932
pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT);
905933
if (!pgt)
906934
return -ENOMEM;
@@ -925,6 +953,10 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
925953

926954
mmu->pgt = pgt;
927955
mmu->pgd_phys = __pa(pgt->pgd);
956+
957+
if (kvm_is_nested_s2_mmu(kvm, mmu))
958+
kvm_init_nested_s2_mmu(mmu);
959+
928960
return 0;
929961

930962
out_destroy_pgtable:
@@ -976,7 +1008,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
9761008

9771009
if (!(vma->vm_flags & VM_PFNMAP)) {
9781010
gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
979-
unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
1011+
kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
9801012
}
9811013
hva = vm_end;
9821014
} while (hva < reg_end);
@@ -2022,19 +2054,14 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
20222054
{
20232055
}
20242056

2025-
void kvm_arch_flush_shadow_all(struct kvm *kvm)
2026-
{
2027-
kvm_uninit_stage2_mmu(kvm);
2028-
}
2029-
20302057
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
20312058
struct kvm_memory_slot *slot)
20322059
{
20332060
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
20342061
phys_addr_t size = slot->npages << PAGE_SHIFT;
20352062

20362063
write_lock(&kvm->mmu_lock);
2037-
unmap_stage2_range(&kvm->arch.mmu, gpa, size);
2064+
kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size);
20382065
write_unlock(&kvm->mmu_lock);
20392066
}
20402067

0 commit comments

Comments
 (0)