Skip to content

Commit 62cc6b9

Browse files
dmatlackbonzini
authored andcommitted
KVM: nVMX: support restore of VMX capability MSRs
The VMX capability MSRs advertise the set of features the KVM virtual CPU can support. This set of features varies across different host CPUs and KVM versions. This patch aims to addresses both sources of differences, allowing VMs to be migrated across CPUs and KVM versions without guest-visible changes to these MSRs. Note that cross-KVM- version migration is only supported from this point forward. When the VMX capability MSRs are restored, they are audited to check that the set of features advertised are a subset of what KVM and the CPU support. Since the VMX capability MSRs are read-only, they do not need to be on the default MSR save/restore lists. The userspace hypervisor can set the values of these MSRs or read them from KVM at VCPU creation time, and restore the same value after every save/restore. Signed-off-by: David Matlack <dmatlack@google.com> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
1 parent 0115f9c commit 62cc6b9

File tree

2 files changed

+297
-24
lines changed

2 files changed

+297
-24
lines changed

arch/x86/include/asm/vmx.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#define VMX_H
2626

2727

28+
#include <linux/bitops.h>
2829
#include <linux/types.h>
2930
#include <uapi/asm/vmx.h>
3031

@@ -111,6 +112,36 @@
111112
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
112113
#define VMX_MISC_ACTIVITY_HLT 0x00000040
113114

115+
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
116+
{
117+
return vmx_basic & GENMASK_ULL(30, 0);
118+
}
119+
120+
static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
121+
{
122+
return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
123+
}
124+
125+
static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
126+
{
127+
return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
128+
}
129+
130+
static inline int vmx_misc_cr3_count(u64 vmx_misc)
131+
{
132+
return (vmx_misc & GENMASK_ULL(24, 16)) >> 16;
133+
}
134+
135+
static inline int vmx_misc_max_msr(u64 vmx_misc)
136+
{
137+
return (vmx_misc & GENMASK_ULL(27, 25)) >> 25;
138+
}
139+
140+
static inline int vmx_misc_mseg_revid(u64 vmx_misc)
141+
{
142+
return (vmx_misc & GENMASK_ULL(63, 32)) >> 32;
143+
}
144+
114145
/* VMCS Encodings */
115146
enum vmcs_field {
116147
VIRTUAL_PROCESSOR_ID = 0x00000000,

arch/x86/kvm/vmx.c

Lines changed: 266 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,12 @@ struct nested_vmx {
471471
u32 nested_vmx_misc_high;
472472
u32 nested_vmx_ept_caps;
473473
u32 nested_vmx_vpid_caps;
474+
u64 nested_vmx_basic;
475+
u64 nested_vmx_cr0_fixed0;
476+
u64 nested_vmx_cr0_fixed1;
477+
u64 nested_vmx_cr4_fixed0;
478+
u64 nested_vmx_cr4_fixed1;
479+
u64 nested_vmx_vmcs_enum;
474480
};
475481

476482
#define POSTED_INTR_ON 0
@@ -2854,6 +2860,36 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
28542860
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
28552861
VMX_MISC_ACTIVITY_HLT;
28562862
vmx->nested.nested_vmx_misc_high = 0;
2863+
2864+
/*
2865+
* This MSR reports some information about VMX support. We
2866+
* should return information about the VMX we emulate for the
2867+
* guest, and the VMCS structure we give it - not about the
2868+
* VMX support of the underlying hardware.
2869+
*/
2870+
vmx->nested.nested_vmx_basic =
2871+
VMCS12_REVISION |
2872+
VMX_BASIC_TRUE_CTLS |
2873+
((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
2874+
(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
2875+
2876+
if (cpu_has_vmx_basic_inout())
2877+
vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
2878+
2879+
/*
2880+
* These MSRs specify bits which the guest must keep fixed (on or off)
2881+
* while L1 is in VMXON mode (in L1's root mode, or running an L2).
2882+
* We picked the standard core2 setting.
2883+
*/
2884+
#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
2885+
#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
2886+
vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
2887+
vmx->nested.nested_vmx_cr0_fixed1 = -1ULL;
2888+
vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
2889+
vmx->nested.nested_vmx_cr4_fixed1 = -1ULL;
2890+
2891+
/* highest index: VMX_PREEMPTION_TIMER_VALUE */
2892+
vmx->nested.nested_vmx_vmcs_enum = 0x2e;
28572893
}
28582894

28592895
static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
@@ -2869,24 +2905,233 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
28692905
return low | ((u64)high << 32);
28702906
}
28712907

2872-
/* Returns 0 on success, non-0 otherwise. */
2873-
static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2908+
static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
2909+
{
2910+
superset &= mask;
2911+
subset &= mask;
2912+
2913+
return (superset | subset) == superset;
2914+
}
2915+
2916+
static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
2917+
{
2918+
const u64 feature_and_reserved =
2919+
/* feature (except bit 48; see below) */
2920+
BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
2921+
/* reserved */
2922+
BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
2923+
u64 vmx_basic = vmx->nested.nested_vmx_basic;
2924+
2925+
if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
2926+
return -EINVAL;
2927+
2928+
/*
2929+
* KVM does not emulate a version of VMX that constrains physical
2930+
* addresses of VMX structures (e.g. VMCS) to 32-bits.
2931+
*/
2932+
if (data & BIT_ULL(48))
2933+
return -EINVAL;
2934+
2935+
if (vmx_basic_vmcs_revision_id(vmx_basic) !=
2936+
vmx_basic_vmcs_revision_id(data))
2937+
return -EINVAL;
2938+
2939+
if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
2940+
return -EINVAL;
2941+
2942+
vmx->nested.nested_vmx_basic = data;
2943+
return 0;
2944+
}
2945+
2946+
static int
2947+
vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
2948+
{
2949+
u64 supported;
2950+
u32 *lowp, *highp;
2951+
2952+
switch (msr_index) {
2953+
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
2954+
lowp = &vmx->nested.nested_vmx_pinbased_ctls_low;
2955+
highp = &vmx->nested.nested_vmx_pinbased_ctls_high;
2956+
break;
2957+
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
2958+
lowp = &vmx->nested.nested_vmx_procbased_ctls_low;
2959+
highp = &vmx->nested.nested_vmx_procbased_ctls_high;
2960+
break;
2961+
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
2962+
lowp = &vmx->nested.nested_vmx_exit_ctls_low;
2963+
highp = &vmx->nested.nested_vmx_exit_ctls_high;
2964+
break;
2965+
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
2966+
lowp = &vmx->nested.nested_vmx_entry_ctls_low;
2967+
highp = &vmx->nested.nested_vmx_entry_ctls_high;
2968+
break;
2969+
case MSR_IA32_VMX_PROCBASED_CTLS2:
2970+
lowp = &vmx->nested.nested_vmx_secondary_ctls_low;
2971+
highp = &vmx->nested.nested_vmx_secondary_ctls_high;
2972+
break;
2973+
default:
2974+
BUG();
2975+
}
2976+
2977+
supported = vmx_control_msr(*lowp, *highp);
2978+
2979+
/* Check must-be-1 bits are still 1. */
2980+
if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
2981+
return -EINVAL;
2982+
2983+
/* Check must-be-0 bits are still 0. */
2984+
if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
2985+
return -EINVAL;
2986+
2987+
*lowp = data;
2988+
*highp = data >> 32;
2989+
return 0;
2990+
}
2991+
2992+
static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
2993+
{
2994+
const u64 feature_and_reserved_bits =
2995+
/* feature */
2996+
BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
2997+
BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
2998+
/* reserved */
2999+
GENMASK_ULL(13, 9) | BIT_ULL(31);
3000+
u64 vmx_misc;
3001+
3002+
vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low,
3003+
vmx->nested.nested_vmx_misc_high);
3004+
3005+
if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
3006+
return -EINVAL;
3007+
3008+
if ((vmx->nested.nested_vmx_pinbased_ctls_high &
3009+
PIN_BASED_VMX_PREEMPTION_TIMER) &&
3010+
vmx_misc_preemption_timer_rate(data) !=
3011+
vmx_misc_preemption_timer_rate(vmx_misc))
3012+
return -EINVAL;
3013+
3014+
if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
3015+
return -EINVAL;
3016+
3017+
if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
3018+
return -EINVAL;
3019+
3020+
if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
3021+
return -EINVAL;
3022+
3023+
vmx->nested.nested_vmx_misc_low = data;
3024+
vmx->nested.nested_vmx_misc_high = data >> 32;
3025+
return 0;
3026+
}
3027+
3028+
static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
3029+
{
3030+
u64 vmx_ept_vpid_cap;
3031+
3032+
vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps,
3033+
vmx->nested.nested_vmx_vpid_caps);
3034+
3035+
/* Every bit is either reserved or a feature bit. */
3036+
if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
3037+
return -EINVAL;
3038+
3039+
vmx->nested.nested_vmx_ept_caps = data;
3040+
vmx->nested.nested_vmx_vpid_caps = data >> 32;
3041+
return 0;
3042+
}
3043+
3044+
static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
3045+
{
3046+
u64 *msr;
3047+
3048+
switch (msr_index) {
3049+
case MSR_IA32_VMX_CR0_FIXED0:
3050+
msr = &vmx->nested.nested_vmx_cr0_fixed0;
3051+
break;
3052+
case MSR_IA32_VMX_CR4_FIXED0:
3053+
msr = &vmx->nested.nested_vmx_cr4_fixed0;
3054+
break;
3055+
default:
3056+
BUG();
3057+
}
3058+
3059+
/*
3060+
* 1 bits (which indicates bits which "must-be-1" during VMX operation)
3061+
* must be 1 in the restored value.
3062+
*/
3063+
if (!is_bitwise_subset(data, *msr, -1ULL))
3064+
return -EINVAL;
3065+
3066+
*msr = data;
3067+
return 0;
3068+
}
3069+
3070+
/*
3071+
* Called when userspace is restoring VMX MSRs.
3072+
*
3073+
* Returns 0 on success, non-0 otherwise.
3074+
*/
3075+
static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
28743076
{
28753077
struct vcpu_vmx *vmx = to_vmx(vcpu);
28763078

28773079
switch (msr_index) {
28783080
case MSR_IA32_VMX_BASIC:
3081+
return vmx_restore_vmx_basic(vmx, data);
3082+
case MSR_IA32_VMX_PINBASED_CTLS:
3083+
case MSR_IA32_VMX_PROCBASED_CTLS:
3084+
case MSR_IA32_VMX_EXIT_CTLS:
3085+
case MSR_IA32_VMX_ENTRY_CTLS:
3086+
/*
3087+
* The "non-true" VMX capability MSRs are generated from the
3088+
* "true" MSRs, so we do not support restoring them directly.
3089+
*
3090+
* If userspace wants to emulate VMX_BASIC[55]=0, userspace
3091+
* should restore the "true" MSRs with the must-be-1 bits
3092+
* set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
3093+
* DEFAULT SETTINGS".
3094+
*/
3095+
return -EINVAL;
3096+
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
3097+
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
3098+
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
3099+
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
3100+
case MSR_IA32_VMX_PROCBASED_CTLS2:
3101+
return vmx_restore_control_msr(vmx, msr_index, data);
3102+
case MSR_IA32_VMX_MISC:
3103+
return vmx_restore_vmx_misc(vmx, data);
3104+
case MSR_IA32_VMX_CR0_FIXED0:
3105+
case MSR_IA32_VMX_CR4_FIXED0:
3106+
return vmx_restore_fixed0_msr(vmx, msr_index, data);
3107+
case MSR_IA32_VMX_CR0_FIXED1:
3108+
case MSR_IA32_VMX_CR4_FIXED1:
3109+
/*
3110+
* These MSRs are generated based on the vCPU's CPUID, so we
3111+
* do not support restoring them directly.
3112+
*/
3113+
return -EINVAL;
3114+
case MSR_IA32_VMX_EPT_VPID_CAP:
3115+
return vmx_restore_vmx_ept_vpid_cap(vmx, data);
3116+
case MSR_IA32_VMX_VMCS_ENUM:
3117+
vmx->nested.nested_vmx_vmcs_enum = data;
3118+
return 0;
3119+
default:
28793120
/*
2880-
* This MSR reports some information about VMX support. We
2881-
* should return information about the VMX we emulate for the
2882-
* guest, and the VMCS structure we give it - not about the
2883-
* VMX support of the underlying hardware.
3121+
* The rest of the VMX capability MSRs do not support restore.
28843122
*/
2885-
*pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
2886-
((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
2887-
(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
2888-
if (cpu_has_vmx_basic_inout())
2889-
*pdata |= VMX_BASIC_INOUT;
3123+
return -EINVAL;
3124+
}
3125+
}
3126+
3127+
/* Returns 0 on success, non-0 otherwise. */
3128+
static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
3129+
{
3130+
struct vcpu_vmx *vmx = to_vmx(vcpu);
3131+
3132+
switch (msr_index) {
3133+
case MSR_IA32_VMX_BASIC:
3134+
*pdata = vmx->nested.nested_vmx_basic;
28903135
break;
28913136
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
28923137
case MSR_IA32_VMX_PINBASED_CTLS:
@@ -2925,27 +3170,20 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
29253170
vmx->nested.nested_vmx_misc_low,
29263171
vmx->nested.nested_vmx_misc_high);
29273172
break;
2928-
/*
2929-
* These MSRs specify bits which the guest must keep fixed (on or off)
2930-
* while L1 is in VMXON mode (in L1's root mode, or running an L2).
2931-
* We picked the standard core2 setting.
2932-
*/
2933-
#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
2934-
#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
29353173
case MSR_IA32_VMX_CR0_FIXED0:
2936-
*pdata = VMXON_CR0_ALWAYSON;
3174+
*pdata = vmx->nested.nested_vmx_cr0_fixed0;
29373175
break;
29383176
case MSR_IA32_VMX_CR0_FIXED1:
2939-
*pdata = -1ULL;
3177+
*pdata = vmx->nested.nested_vmx_cr0_fixed1;
29403178
break;
29413179
case MSR_IA32_VMX_CR4_FIXED0:
2942-
*pdata = VMXON_CR4_ALWAYSON;
3180+
*pdata = vmx->nested.nested_vmx_cr4_fixed0;
29433181
break;
29443182
case MSR_IA32_VMX_CR4_FIXED1:
2945-
*pdata = -1ULL;
3183+
*pdata = vmx->nested.nested_vmx_cr4_fixed1;
29463184
break;
29473185
case MSR_IA32_VMX_VMCS_ENUM:
2948-
*pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
3186+
*pdata = vmx->nested.nested_vmx_vmcs_enum;
29493187
break;
29503188
case MSR_IA32_VMX_PROCBASED_CTLS2:
29513189
*pdata = vmx_control_msr(
@@ -3128,7 +3366,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
31283366
vmx_leave_nested(vcpu);
31293367
break;
31303368
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
3131-
return 1; /* they are read-only */
3369+
if (!msr_info->host_initiated)
3370+
return 1; /* they are read-only */
3371+
if (!nested_vmx_allowed(vcpu))
3372+
return 1;
3373+
return vmx_set_vmx_msr(vcpu, msr_index, data);
31323374
case MSR_IA32_XSS:
31333375
if (!vmx_xsaves_supported())
31343376
return 1;

0 commit comments

Comments
 (0)