@@ -144,6 +144,15 @@ struct shared_msr_entry {
144144 u64 mask ;
145145};
146146
147+ /*
148+ * The nested_vmx structure is part of vcpu_vmx, and holds information we need
149+ * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
150+ */
151+ struct nested_vmx {
152+ /* Has the level1 guest done vmxon? */
153+ bool vmxon ;
154+ };
155+
147156struct vcpu_vmx {
148157 struct kvm_vcpu vcpu ;
149158 unsigned long host_rsp ;
@@ -203,6 +212,9 @@ struct vcpu_vmx {
203212 u32 exit_reason ;
204213
205214 bool rdtscp_enabled ;
215+
216+ /* Support for a guest hypervisor (nested VMX) */
217+ struct nested_vmx nested ;
206218};
207219
208220enum segment_cache_field {
@@ -3933,6 +3945,99 @@ static int handle_invalid_op(struct kvm_vcpu *vcpu)
39333945 return 1 ;
39343946}
39353947
3948+ /*
3949+ * Emulate the VMXON instruction.
3950+ * Currently, we just remember that VMX is active, and do not save or even
3951+ * inspect the argument to VMXON (the so-called "VMXON pointer") because we
3952+ * do not currently need to store anything in that guest-allocated memory
3953+ * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
3954+ * argument is different from the VMXON pointer (which the spec says they do).
3955+ */
3956+ static int handle_vmon (struct kvm_vcpu * vcpu )
3957+ {
3958+ struct kvm_segment cs ;
3959+ struct vcpu_vmx * vmx = to_vmx (vcpu );
3960+
3961+ /* The Intel VMX Instruction Reference lists a bunch of bits that
3962+ * are prerequisite to running VMXON, most notably cr4.VMXE must be
3963+ * set to 1 (see vmx_set_cr4() for when we allow the guest to set this).
3964+ * Otherwise, we should fail with #UD. We test these now:
3965+ */
3966+ if (!kvm_read_cr4_bits (vcpu , X86_CR4_VMXE ) ||
3967+ !kvm_read_cr0_bits (vcpu , X86_CR0_PE ) ||
3968+ (vmx_get_rflags (vcpu ) & X86_EFLAGS_VM )) {
3969+ kvm_queue_exception (vcpu , UD_VECTOR );
3970+ return 1 ;
3971+ }
3972+
3973+ vmx_get_segment (vcpu , & cs , VCPU_SREG_CS );
3974+ if (is_long_mode (vcpu ) && !cs .l ) {
3975+ kvm_queue_exception (vcpu , UD_VECTOR );
3976+ return 1 ;
3977+ }
3978+
3979+ if (vmx_get_cpl (vcpu )) {
3980+ kvm_inject_gp (vcpu , 0 );
3981+ return 1 ;
3982+ }
3983+
3984+ vmx -> nested .vmxon = true;
3985+
3986+ skip_emulated_instruction (vcpu );
3987+ return 1 ;
3988+ }
3989+
3990+ /*
3991+ * Intel's VMX Instruction Reference specifies a common set of prerequisites
3992+ * for running VMX instructions (except VMXON, whose prerequisites are
3993+ * slightly different). It also specifies what exception to inject otherwise.
3994+ */
3995+ static int nested_vmx_check_permission (struct kvm_vcpu * vcpu )
3996+ {
3997+ struct kvm_segment cs ;
3998+ struct vcpu_vmx * vmx = to_vmx (vcpu );
3999+
4000+ if (!vmx -> nested .vmxon ) {
4001+ kvm_queue_exception (vcpu , UD_VECTOR );
4002+ return 0 ;
4003+ }
4004+
4005+ vmx_get_segment (vcpu , & cs , VCPU_SREG_CS );
4006+ if ((vmx_get_rflags (vcpu ) & X86_EFLAGS_VM ) ||
4007+ (is_long_mode (vcpu ) && !cs .l )) {
4008+ kvm_queue_exception (vcpu , UD_VECTOR );
4009+ return 0 ;
4010+ }
4011+
4012+ if (vmx_get_cpl (vcpu )) {
4013+ kvm_inject_gp (vcpu , 0 );
4014+ return 0 ;
4015+ }
4016+
4017+ return 1 ;
4018+ }
4019+
4020+ /*
4021+ * Free whatever needs to be freed from vmx->nested when L1 goes down, or
4022+ * just stops using VMX.
4023+ */
4024+ static void free_nested (struct vcpu_vmx * vmx )
4025+ {
4026+ if (!vmx -> nested .vmxon )
4027+ return ;
4028+ vmx -> nested .vmxon = false;
4029+ }
4030+
4031+ /* Emulate the VMXOFF instruction */
4032+ static int handle_vmoff (struct kvm_vcpu * vcpu )
4033+ {
4034+ if (!nested_vmx_check_permission (vcpu ))
4035+ return 1 ;
4036+ free_nested (to_vmx (vcpu ));
4037+ skip_emulated_instruction (vcpu );
4038+ return 1 ;
4039+ }
4040+
39364041/*
39374042 * The exit handlers return 1 if the exit was handled fully and guest execution
39384043 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -3961,8 +4066,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
39614066 [EXIT_REASON_VMREAD ] = handle_vmx_insn ,
39624067 [EXIT_REASON_VMRESUME ] = handle_vmx_insn ,
39634068 [EXIT_REASON_VMWRITE ] = handle_vmx_insn ,
3964- [EXIT_REASON_VMOFF ] = handle_vmx_insn ,
3965- [EXIT_REASON_VMON ] = handle_vmx_insn ,
4069+ [EXIT_REASON_VMOFF ] = handle_vmoff ,
4070+ [EXIT_REASON_VMON ] = handle_vmon ,
39664071 [EXIT_REASON_TPR_BELOW_THRESHOLD ] = handle_tpr_below_threshold ,
39674072 [EXIT_REASON_APIC_ACCESS ] = handle_apic_access ,
39684073 [EXIT_REASON_WBINVD ] = handle_wbinvd ,
@@ -4363,6 +4468,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
43634468 struct vcpu_vmx * vmx = to_vmx (vcpu );
43644469
43654470 free_vpid (vmx );
4471+ free_nested (vmx );
43664472 free_loaded_vmcs (vmx -> loaded_vmcs );
43674473 kfree (vmx -> guest_msrs );
43684474 kvm_vcpu_uninit (vcpu );
0 commit comments