[REVERTME] KVM: TDX, x86/tdp_mmu: Protect Secure-EPT page

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
intel · Apr 24, 2023 · 4c3e3b7 · 4c3e3b7
1 parent 75e8335
commit 4c3e3b7
Show file tree

Hide file tree

Showing 6 changed files with 99 additions and 11 deletions.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
@@ -1504,6 +1504,10 @@ struct kvm_arch {
 	struct kvm_mmu_memory_cache split_page_header_cache;
 	struct kvm_mmu_memory_cache split_private_spt_cache;
 
+#ifdef CONFIG_INTEL_TDX_HOST_DEBUG_MEMORY_CORRUPT
+	struct mutex private_spt_for_split_lock;
+	struct kvm_mmu_memory_cache private_spt_for_split_cache;
+#endif
 	/*
 	 * Memory cache used to allocate pte_list_desc structs while splitting
 	 * huge pages. In the worst case, to split one huge page, 512

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
@@ -685,8 +685,14 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
 	if (r)
 		return r;
 	if (kvm_gfn_shared_mask(vcpu->kvm)) {
-		r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_private_spt_cache,
-					       PT64_ROOT_MAX_LEVEL);
+		struct kvm_mmu_memory_cache *mc = &vcpu->arch.mmu_private_spt_cache;
+		int start, end, i;
+
+		start = kvm_mmu_memory_cache_nr_free_objects(mc);
+		r = kvm_mmu_topup_memory_cache(mc, PT64_ROOT_MAX_LEVEL);
+		end = kvm_mmu_memory_cache_nr_free_objects(mc);
+		for (i = start; i < end; i++)
+			kvm_mmu_split_direct_map(virt_to_page(mc->objects[i]));
 		if (r)
 			return r;
 	}
@@ -704,6 +710,26 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
 					  PT64_ROOT_MAX_LEVEL);
 }
 
+int kvm_mmu_topup_memory_cache_for_split(struct kvm *kvm)
+{
+	int r = 0;
+#ifdef CONFIG_INTEL_TDX_HOST_DEBUG_MEMORY_CORRUPT
+	struct kvm_mmu_memory_cache *mc;
+	int start, end, i;
+
+	mutex_lock(&kvm->arch.private_spt_for_split_lock);
+	mc = &kvm->arch.private_spt_for_split_cache;
+	start = kvm_mmu_memory_cache_nr_free_objects(mc);
+	r = kvm_mmu_topup_memory_cache(mc, KVM_MAX_HUGEPAGE_LEVEL *
+				       kvm->created_vcpus);
+	end = kvm_mmu_memory_cache_nr_free_objects(mc);
+	for (i = start; i < end; i++)
+		kvm_mmu_split_direct_map(virt_to_page(mc->objects[i]));
+	mutex_unlock(&kvm->arch.private_spt_for_split_lock);
+#endif
+	return r;
+}
+
 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache);
@@ -4786,6 +4812,11 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	r = mmu_topup_memory_caches(vcpu, false);
 	if (r)
 		return r;
+	if (fault->is_private) {
+		r = kvm_mmu_topup_memory_cache_for_split(vcpu->kvm);
+		if (r)
+			return r;
+	}
 
 	r = kvm_faultin_pfn(vcpu, fault, ACC_ALL);
 	if (r != RET_PF_CONTINUE)
@@ -6728,6 +6759,9 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
 	kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache);
 	kvm_mmu_free_memory_cache(&kvm->arch.split_shadow_page_cache);
 	kvm_mmu_free_memory_cache(&kvm->arch.split_private_spt_cache);
+#ifdef CONFIG_INTEL_TDX_HOST_DEBUG_MEMORY_CORRUPT
+	kvm_mmu_free_memory_cache(&kvm->arch.private_spt_for_split_cache);
+#endif
 }
 
 void kvm_mmu_uninit_vm(struct kvm *kvm)
@@ -6881,6 +6915,8 @@ static int topup_split_caches(struct kvm *kvm)
 	 */
 	const int capacity = SPLIT_DESC_CACHE_MIN_NR_OBJECTS +
 			     KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
+	struct kvm_mmu_memory_cache *mc;
+	int start, end, i;
 	int r;
 
 	lockdep_assert_held(&kvm->slots_lock);
@@ -6898,7 +6934,13 @@ static int topup_split_caches(struct kvm *kvm)
 	if (r)
 		return r;
 
-	return kvm_mmu_topup_memory_cache(&kvm->arch.split_private_spt_cache, 1);
+	mc = &kvm->arch.split_private_spt_cache;
+	start = kvm_mmu_memory_cache_nr_free_objects(mc);
+	r = kvm_mmu_topup_memory_cache(mc, KVM_MAX_HUGEPAGE_LEVEL);
+	end = kvm_mmu_memory_cache_nr_free_objects(mc);
+	for (i = start; i < end; i++)
+		kvm_mmu_split_direct_map(virt_to_page(mc->objects[i]));
+	return r;
 }
 
 static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep)

diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
@@ -201,12 +201,36 @@ static inline void kvm_mmu_alloc_private_spt(struct kvm_vcpu *vcpu, struct kvm_m
 	}
 }
 
-static inline int kvm_alloc_private_spt_for_split(struct kvm_mmu_page *sp, gfp_t gfp)
+static inline int kvm_alloc_private_spt_for_split(struct kvm *kvm, struct kvm_mmu_page *sp,
+						  gfp_t gfp, bool can_yield)
 {
 	gfp &= ~__GFP_ZERO;
+
+#ifdef CONFIG_INTEL_TDX_HOST_DEBUG_MEMORY_CORRUPT
+	/* This check is hacky. See the caller, tdp_mmu_alloc_sp_for_split(). */
+	if ((gfp & GFP_NOWAIT) && gfp != GFP_KERNEL_ACCOUNT) {
+		if (can_yield)
+			return -ENOMEM;
+		/*
+		 * This is hack to avoid blocking. Unless memory is severely
+		 * lacking, this works.  Correct way is to pre-allocate
+		 * potentially necessary memory for all the possible execution
+		 * path with kvm->mmu_lock.
+		 */
+		while (!mutex_trylock(&kvm->arch.private_spt_for_split_lock))
+			/* nothing */;
+		sp->private_spt = kvm_mmu_memory_cache_alloc(&kvm->arch.private_spt_for_split_cache);
+		mutex_unlock(&kvm->arch.private_spt_for_split_lock);
+		if (WARN_ON_ONCE(!sp->private_spt))
+			return -ENOMEM;
+		return 0;
+	}
+#endif
+
 	sp->private_spt = (void *)__get_free_page(gfp);
 	if (!sp->private_spt)
 		return -ENOMEM;
+	kvm_mmu_split_direct_map(virt_to_page(sp->private_spt));
 	return 0;
 }
 
@@ -246,7 +270,8 @@ static inline void kvm_mmu_alloc_private_spt(struct kvm_vcpu *vcpu, struct kvm_m
 {
 }
 
-static inline int kvm_alloc_private_spt_for_split(struct kvm_mmu_page *sp, gfp_t gfp)
+static inline int kvm_alloc_private_spt_for_split(struct kvm *kvm, struct kvm_mmu_page *sp,
+						  gfp_t gfp, bool can_yield)
 {
 	return -ENOMEM;
 }

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -23,6 +23,9 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
 	spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
 	kvm->arch.tdp_mmu_zap_wq = wq;
+#ifdef CONFIG_INTEL_TDX_HOST_DEBUG_MEMORY_CORRUPT
+	mutex_init(&kvm->arch.private_spt_for_split_lock);
+#endif
 	return 1;
 }
 
@@ -2055,7 +2058,8 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
 	return spte_set;
 }
 
-static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp, union kvm_mmu_page_role role)
+static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(struct kvm *kvm, gfp_t gfp,
+							 union kvm_mmu_page_role role, bool can_yield)
 {
 	struct kvm_mmu_page *sp;
 
@@ -2068,7 +2072,7 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp, union kvm_mm
 	sp->role = role;
 	sp->spt = (void *)__get_free_page(gfp);
 	if (kvm_mmu_page_role_is_private(role)) {
-		if (kvm_alloc_private_spt_for_split(sp, gfp)) {
+		if (kvm_alloc_private_spt_for_split(kvm, sp, gfp, can_yield)) {
 			free_page((unsigned long)sp->spt);
 			sp->spt = NULL;
 		}
@@ -2101,7 +2105,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
 	 * If this allocation fails we drop the lock and retry with reclaim
 	 * allowed.
 	 */
-	sp = __tdp_mmu_alloc_sp_for_split(GFP_NOWAIT | __GFP_ACCOUNT, role);
+	sp = __tdp_mmu_alloc_sp_for_split(kvm, GFP_NOWAIT | __GFP_ACCOUNT, role, can_yield);
 	if (sp || !can_yield)
 		return sp;
 
@@ -2113,7 +2117,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
 		write_unlock(&kvm->mmu_lock);
 
 	iter->yielded = true;
-	sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT, role);
+	sp = __tdp_mmu_alloc_sp_for_split(kvm, GFP_KERNEL_ACCOUNT, role, can_yield);
 
 	if (shared)
 		read_lock(&kvm->mmu_lock);

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
@@ -1979,6 +1979,7 @@ static int tdx_sept_merge_private_spt(struct kvm *kvm, gfn_t gfn,
 		return -EIO;
 	}
 
+	tdx_set_page_present(__pa(private_spt));
 	return 0;
 }
 
@@ -2131,6 +2132,8 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
 		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL);
 		return -EIO;
 	}
+
+	tdx_set_page_present(__pa(private_spt));
 	return 0;
 }
 

diff --git a/arch/x86/kvm/vmx/tdx_ops.h b/arch/x86/kvm/vmx/tdx_ops.h
@@ -153,8 +153,13 @@ static inline u64 tdh_mem_page_add(hpa_t tdr, gpa_t gpa, int level, hpa_t hpa,
 static inline u64 tdh_mem_sept_add(hpa_t tdr, gpa_t gpa, int level, hpa_t page,
 				   struct tdx_module_output *out)
 {
+	u64 r;
+
 	tdx_clflush_page(page, PG_LEVEL_4K);
-	return kvm_seamcall_sept(TDH_MEM_SEPT_ADD, gpa | level, tdr, page, 0, out);
+	r = kvm_seamcall_sept(TDH_MEM_SEPT_ADD, gpa | level, tdr, page, 0, out);
+	if (!r)
+		tdx_set_page_np(page);
+	return r;
 }
 
 static inline u64 tdh_mem_sept_remove(hpa_t tdr, gpa_t gpa, int level,
@@ -244,8 +249,13 @@ static inline u64 tdh_mng_rd(hpa_t tdr, u64 field, struct tdx_module_output *out
 static inline u64 tdh_mem_page_demote(hpa_t tdr, gpa_t gpa, int level, hpa_t page,
 				      struct tdx_module_output *out)
 {
+	u64 r;
+
 	tdx_clflush_page(page, PG_LEVEL_4K);
-	return kvm_seamcall_sept(TDH_MEM_PAGE_DEMOTE, gpa | level, tdr, page, 0, out);
+	r = kvm_seamcall_sept(TDH_MEM_PAGE_DEMOTE, gpa | level, tdr, page, 0, out);
+	if (!r)
+		tdx_set_page_np(page);
+	return r;
 }
 
 static inline u64 tdh_mem_page_promote(hpa_t tdr, gpa_t gpa, int level,