@@ -471,6 +471,12 @@ struct nested_vmx {
471471 u32 nested_vmx_misc_high ;
472472 u32 nested_vmx_ept_caps ;
473473 u32 nested_vmx_vpid_caps ;
474+ u64 nested_vmx_basic ;
475+ u64 nested_vmx_cr0_fixed0 ;
476+ u64 nested_vmx_cr0_fixed1 ;
477+ u64 nested_vmx_cr4_fixed0 ;
478+ u64 nested_vmx_cr4_fixed1 ;
479+ u64 nested_vmx_vmcs_enum ;
474480};
475481
476482#define POSTED_INTR_ON 0
@@ -2854,6 +2860,36 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
28542860 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
28552861 VMX_MISC_ACTIVITY_HLT ;
28562862 vmx -> nested .nested_vmx_misc_high = 0 ;
2863+
2864+ /*
2865+ * This MSR reports some information about VMX support. We
2866+ * should return information about the VMX we emulate for the
2867+ * guest, and the VMCS structure we give it - not about the
2868+ * VMX support of the underlying hardware.
2869+ */
2870+ vmx -> nested .nested_vmx_basic =
2871+ VMCS12_REVISION |
2872+ VMX_BASIC_TRUE_CTLS |
2873+ ((u64 )VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT ) |
2874+ (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT );
2875+
2876+ if (cpu_has_vmx_basic_inout ())
2877+ vmx -> nested .nested_vmx_basic |= VMX_BASIC_INOUT ;
2878+
2879+ /*
2880+ * These MSRs specify bits which the guest must keep fixed (on or off)
2881+ * while L1 is in VMXON mode (in L1's root mode, or running an L2).
2882+ * We picked the standard core2 setting.
2883+ */
2884+ #define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
2885+ #define VMXON_CR4_ALWAYSON X86_CR4_VMXE
2886+ vmx -> nested .nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON ;
2887+ vmx -> nested .nested_vmx_cr0_fixed1 = -1ULL ;
2888+ vmx -> nested .nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON ;
2889+ vmx -> nested .nested_vmx_cr4_fixed1 = -1ULL ;
2890+
2891+ /* highest index: VMX_PREEMPTION_TIMER_VALUE */
2892+ vmx -> nested .nested_vmx_vmcs_enum = 0x2e ;
28572893}
28582894
28592895static inline bool vmx_control_verify (u32 control , u32 low , u32 high )
@@ -2869,24 +2905,233 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
28692905 return low | ((u64 )high << 32 );
28702906}
28712907
2872- /* Returns 0 on success, non-0 otherwise. */
2873- static int vmx_get_vmx_msr (struct kvm_vcpu * vcpu , u32 msr_index , u64 * pdata )
2908+ static bool is_bitwise_subset (u64 superset , u64 subset , u64 mask )
2909+ {
2910+ superset &= mask ;
2911+ subset &= mask ;
2912+
2913+ return (superset | subset ) == superset ;
2914+ }
2915+
2916+ static int vmx_restore_vmx_basic (struct vcpu_vmx * vmx , u64 data )
2917+ {
2918+ const u64 feature_and_reserved =
2919+ /* feature (except bit 48; see below) */
2920+ BIT_ULL (49 ) | BIT_ULL (54 ) | BIT_ULL (55 ) |
2921+ /* reserved */
2922+ BIT_ULL (31 ) | GENMASK_ULL (47 , 45 ) | GENMASK_ULL (63 , 56 );
2923+ u64 vmx_basic = vmx -> nested .nested_vmx_basic ;
2924+
2925+ if (!is_bitwise_subset (vmx_basic , data , feature_and_reserved ))
2926+ return - EINVAL ;
2927+
2928+ /*
2929+ * KVM does not emulate a version of VMX that constrains physical
2930+ * addresses of VMX structures (e.g. VMCS) to 32-bits.
2931+ */
2932+ if (data & BIT_ULL (48 ))
2933+ return - EINVAL ;
2934+
2935+ if (vmx_basic_vmcs_revision_id (vmx_basic ) !=
2936+ vmx_basic_vmcs_revision_id (data ))
2937+ return - EINVAL ;
2938+
2939+ if (vmx_basic_vmcs_size (vmx_basic ) > vmx_basic_vmcs_size (data ))
2940+ return - EINVAL ;
2941+
2942+ vmx -> nested .nested_vmx_basic = data ;
2943+ return 0 ;
2944+ }
2945+
2946+ static int
2947+ vmx_restore_control_msr (struct vcpu_vmx * vmx , u32 msr_index , u64 data )
2948+ {
2949+ u64 supported ;
2950+ u32 * lowp , * highp ;
2951+
2952+ switch (msr_index ) {
2953+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS :
2954+ lowp = & vmx -> nested .nested_vmx_pinbased_ctls_low ;
2955+ highp = & vmx -> nested .nested_vmx_pinbased_ctls_high ;
2956+ break ;
2957+ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS :
2958+ lowp = & vmx -> nested .nested_vmx_procbased_ctls_low ;
2959+ highp = & vmx -> nested .nested_vmx_procbased_ctls_high ;
2960+ break ;
2961+ case MSR_IA32_VMX_TRUE_EXIT_CTLS :
2962+ lowp = & vmx -> nested .nested_vmx_exit_ctls_low ;
2963+ highp = & vmx -> nested .nested_vmx_exit_ctls_high ;
2964+ break ;
2965+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS :
2966+ lowp = & vmx -> nested .nested_vmx_entry_ctls_low ;
2967+ highp = & vmx -> nested .nested_vmx_entry_ctls_high ;
2968+ break ;
2969+ case MSR_IA32_VMX_PROCBASED_CTLS2 :
2970+ lowp = & vmx -> nested .nested_vmx_secondary_ctls_low ;
2971+ highp = & vmx -> nested .nested_vmx_secondary_ctls_high ;
2972+ break ;
2973+ default :
2974+ BUG ();
2975+ }
2976+
2977+ supported = vmx_control_msr (* lowp , * highp );
2978+
2979+ /* Check must-be-1 bits are still 1. */
2980+ if (!is_bitwise_subset (data , supported , GENMASK_ULL (31 , 0 )))
2981+ return - EINVAL ;
2982+
2983+ /* Check must-be-0 bits are still 0. */
2984+ if (!is_bitwise_subset (supported , data , GENMASK_ULL (63 , 32 )))
2985+ return - EINVAL ;
2986+
2987+ * lowp = data ;
2988+ * highp = data >> 32 ;
2989+ return 0 ;
2990+ }
2991+
2992+ static int vmx_restore_vmx_misc (struct vcpu_vmx * vmx , u64 data )
2993+ {
2994+ const u64 feature_and_reserved_bits =
2995+ /* feature */
2996+ BIT_ULL (5 ) | GENMASK_ULL (8 , 6 ) | BIT_ULL (14 ) | BIT_ULL (15 ) |
2997+ BIT_ULL (28 ) | BIT_ULL (29 ) | BIT_ULL (30 ) |
2998+ /* reserved */
2999+ GENMASK_ULL (13 , 9 ) | BIT_ULL (31 );
3000+ u64 vmx_misc ;
3001+
3002+ vmx_misc = vmx_control_msr (vmx -> nested .nested_vmx_misc_low ,
3003+ vmx -> nested .nested_vmx_misc_high );
3004+
3005+ if (!is_bitwise_subset (vmx_misc , data , feature_and_reserved_bits ))
3006+ return - EINVAL ;
3007+
3008+ if ((vmx -> nested .nested_vmx_pinbased_ctls_high &
3009+ PIN_BASED_VMX_PREEMPTION_TIMER ) &&
3010+ vmx_misc_preemption_timer_rate (data ) !=
3011+ vmx_misc_preemption_timer_rate (vmx_misc ))
3012+ return - EINVAL ;
3013+
3014+ if (vmx_misc_cr3_count (data ) > vmx_misc_cr3_count (vmx_misc ))
3015+ return - EINVAL ;
3016+
3017+ if (vmx_misc_max_msr (data ) > vmx_misc_max_msr (vmx_misc ))
3018+ return - EINVAL ;
3019+
3020+ if (vmx_misc_mseg_revid (data ) != vmx_misc_mseg_revid (vmx_misc ))
3021+ return - EINVAL ;
3022+
3023+ vmx -> nested .nested_vmx_misc_low = data ;
3024+ vmx -> nested .nested_vmx_misc_high = data >> 32 ;
3025+ return 0 ;
3026+ }
3027+
3028+ static int vmx_restore_vmx_ept_vpid_cap (struct vcpu_vmx * vmx , u64 data )
3029+ {
3030+ u64 vmx_ept_vpid_cap ;
3031+
3032+ vmx_ept_vpid_cap = vmx_control_msr (vmx -> nested .nested_vmx_ept_caps ,
3033+ vmx -> nested .nested_vmx_vpid_caps );
3034+
3035+ /* Every bit is either reserved or a feature bit. */
3036+ if (!is_bitwise_subset (vmx_ept_vpid_cap , data , -1ULL ))
3037+ return - EINVAL ;
3038+
3039+ vmx -> nested .nested_vmx_ept_caps = data ;
3040+ vmx -> nested .nested_vmx_vpid_caps = data >> 32 ;
3041+ return 0 ;
3042+ }
3043+
3044+ static int vmx_restore_fixed0_msr (struct vcpu_vmx * vmx , u32 msr_index , u64 data )
3045+ {
3046+ u64 * msr ;
3047+
3048+ switch (msr_index ) {
3049+ case MSR_IA32_VMX_CR0_FIXED0 :
3050+ msr = & vmx -> nested .nested_vmx_cr0_fixed0 ;
3051+ break ;
3052+ case MSR_IA32_VMX_CR4_FIXED0 :
3053+ msr = & vmx -> nested .nested_vmx_cr4_fixed0 ;
3054+ break ;
3055+ default :
3056+ BUG ();
3057+ }
3058+
3059+ /*
3060+ * 1 bits (which indicates bits which "must-be-1" during VMX operation)
3061+ * must be 1 in the restored value.
3062+ */
3063+ if (!is_bitwise_subset (data , * msr , -1ULL ))
3064+ return - EINVAL ;
3065+
3066+ * msr = data ;
3067+ return 0 ;
3068+ }
3069+
3070+ /*
3071+ * Called when userspace is restoring VMX MSRs.
3072+ *
3073+ * Returns 0 on success, non-0 otherwise.
3074+ */
3075+ static int vmx_set_vmx_msr (struct kvm_vcpu * vcpu , u32 msr_index , u64 data )
28743076{
28753077 struct vcpu_vmx * vmx = to_vmx (vcpu );
28763078
28773079 switch (msr_index ) {
28783080 case MSR_IA32_VMX_BASIC :
3081+ return vmx_restore_vmx_basic (vmx , data );
3082+ case MSR_IA32_VMX_PINBASED_CTLS :
3083+ case MSR_IA32_VMX_PROCBASED_CTLS :
3084+ case MSR_IA32_VMX_EXIT_CTLS :
3085+ case MSR_IA32_VMX_ENTRY_CTLS :
3086+ /*
3087+ * The "non-true" VMX capability MSRs are generated from the
3088+ * "true" MSRs, so we do not support restoring them directly.
3089+ *
3090+ * If userspace wants to emulate VMX_BASIC[55]=0, userspace
3091+ * should restore the "true" MSRs with the must-be-1 bits
3092+ * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
3093+ * DEFAULT SETTINGS".
3094+ */
3095+ return - EINVAL ;
3096+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS :
3097+ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS :
3098+ case MSR_IA32_VMX_TRUE_EXIT_CTLS :
3099+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS :
3100+ case MSR_IA32_VMX_PROCBASED_CTLS2 :
3101+ return vmx_restore_control_msr (vmx , msr_index , data );
3102+ case MSR_IA32_VMX_MISC :
3103+ return vmx_restore_vmx_misc (vmx , data );
3104+ case MSR_IA32_VMX_CR0_FIXED0 :
3105+ case MSR_IA32_VMX_CR4_FIXED0 :
3106+ return vmx_restore_fixed0_msr (vmx , msr_index , data );
3107+ case MSR_IA32_VMX_CR0_FIXED1 :
3108+ case MSR_IA32_VMX_CR4_FIXED1 :
3109+ /*
3110+ * These MSRs are generated based on the vCPU's CPUID, so we
3111+ * do not support restoring them directly.
3112+ */
3113+ return - EINVAL ;
3114+ case MSR_IA32_VMX_EPT_VPID_CAP :
3115+ return vmx_restore_vmx_ept_vpid_cap (vmx , data );
3116+ case MSR_IA32_VMX_VMCS_ENUM :
3117+ vmx -> nested .nested_vmx_vmcs_enum = data ;
3118+ return 0 ;
3119+ default :
28793120 /*
2880- * This MSR reports some information about VMX support. We
2881- * should return information about the VMX we emulate for the
2882- * guest, and the VMCS structure we give it - not about the
2883- * VMX support of the underlying hardware.
3121+ * The rest of the VMX capability MSRs do not support restore.
28843122 */
2885- * pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
2886- ((u64 )VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT ) |
2887- (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT );
2888- if (cpu_has_vmx_basic_inout ())
2889- * pdata |= VMX_BASIC_INOUT ;
3123+ return - EINVAL ;
3124+ }
3125+ }
3126+
3127+ /* Returns 0 on success, non-0 otherwise. */
3128+ static int vmx_get_vmx_msr (struct kvm_vcpu * vcpu , u32 msr_index , u64 * pdata )
3129+ {
3130+ struct vcpu_vmx * vmx = to_vmx (vcpu );
3131+
3132+ switch (msr_index ) {
3133+ case MSR_IA32_VMX_BASIC :
3134+ * pdata = vmx -> nested .nested_vmx_basic ;
28903135 break ;
28913136 case MSR_IA32_VMX_TRUE_PINBASED_CTLS :
28923137 case MSR_IA32_VMX_PINBASED_CTLS :
@@ -2925,27 +3170,20 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
29253170 vmx -> nested .nested_vmx_misc_low ,
29263171 vmx -> nested .nested_vmx_misc_high );
29273172 break ;
2928- /*
2929- * These MSRs specify bits which the guest must keep fixed (on or off)
2930- * while L1 is in VMXON mode (in L1's root mode, or running an L2).
2931- * We picked the standard core2 setting.
2932- */
2933- #define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
2934- #define VMXON_CR4_ALWAYSON X86_CR4_VMXE
29353173 case MSR_IA32_VMX_CR0_FIXED0 :
2936- * pdata = VMXON_CR0_ALWAYSON ;
3174+ * pdata = vmx -> nested . nested_vmx_cr0_fixed0 ;
29373175 break ;
29383176 case MSR_IA32_VMX_CR0_FIXED1 :
2939- * pdata = -1ULL ;
3177+ * pdata = vmx -> nested . nested_vmx_cr0_fixed1 ;
29403178 break ;
29413179 case MSR_IA32_VMX_CR4_FIXED0 :
2942- * pdata = VMXON_CR4_ALWAYSON ;
3180+ * pdata = vmx -> nested . nested_vmx_cr4_fixed0 ;
29433181 break ;
29443182 case MSR_IA32_VMX_CR4_FIXED1 :
2945- * pdata = -1ULL ;
3183+ * pdata = vmx -> nested . nested_vmx_cr4_fixed1 ;
29463184 break ;
29473185 case MSR_IA32_VMX_VMCS_ENUM :
2948- * pdata = 0x2e ; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
3186+ * pdata = vmx -> nested . nested_vmx_vmcs_enum ;
29493187 break ;
29503188 case MSR_IA32_VMX_PROCBASED_CTLS2 :
29513189 * pdata = vmx_control_msr (
@@ -3128,7 +3366,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
31283366 vmx_leave_nested (vcpu );
31293367 break ;
31303368 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC :
3131- return 1 ; /* they are read-only */
3369+ if (!msr_info -> host_initiated )
3370+ return 1 ; /* they are read-only */
3371+ if (!nested_vmx_allowed (vcpu ))
3372+ return 1 ;
3373+ return vmx_set_vmx_msr (vcpu , msr_index , data );
31323374 case MSR_IA32_XSS :
31333375 if (!vmx_xsaves_supported ())
31343376 return 1 ;
0 commit comments