@@ -145,6 +145,26 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
145145 RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
146146 RTIT_STATUS_BYTECNT))
147147
148+ /*
149+ * List of MSRs that can be directly passed to the guest.
150+ * In addition to these x2apic and PT MSRs are handled specially.
151+ */
152+ static u32 vmx_possible_passthrough_msrs [MAX_POSSIBLE_PASSTHROUGH_MSRS ] = {
153+ MSR_IA32_SPEC_CTRL ,
154+ MSR_IA32_PRED_CMD ,
155+ MSR_IA32_TSC ,
156+ MSR_FS_BASE ,
157+ MSR_GS_BASE ,
158+ MSR_KERNEL_GS_BASE ,
159+ MSR_IA32_SYSENTER_CS ,
160+ MSR_IA32_SYSENTER_ESP ,
161+ MSR_IA32_SYSENTER_EIP ,
162+ MSR_CORE_C1_RES ,
163+ MSR_CORE_C3_RESIDENCY ,
164+ MSR_CORE_C6_RESIDENCY ,
165+ MSR_CORE_C7_RESIDENCY ,
166+ };
167+
148168/*
149169 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
150170 * ple_gap: upper bound on the amount of time between two successive
@@ -611,6 +631,41 @@ static inline bool report_flexpriority(void)
611631 return flexpriority_enabled ;
612632}
613633
634+ static int possible_passthrough_msr_slot (u32 msr )
635+ {
636+ u32 i ;
637+
638+ for (i = 0 ; i < ARRAY_SIZE (vmx_possible_passthrough_msrs ); i ++ )
639+ if (vmx_possible_passthrough_msrs [i ] == msr )
640+ return i ;
641+
642+ return - ENOENT ;
643+ }
644+
645+ static bool is_valid_passthrough_msr (u32 msr )
646+ {
647+ bool r ;
648+
649+ switch (msr ) {
650+ case 0x800 ... 0x8ff :
651+ /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
652+ return true;
653+ case MSR_IA32_RTIT_STATUS :
654+ case MSR_IA32_RTIT_OUTPUT_BASE :
655+ case MSR_IA32_RTIT_OUTPUT_MASK :
656+ case MSR_IA32_RTIT_CR3_MATCH :
657+ case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B :
658+ /* PT MSRs. These are handled in pt_update_intercept_for_msr() */
659+ return true;
660+ }
661+
662+ r = possible_passthrough_msr_slot (msr ) != - ENOENT ;
663+
664+ WARN (!r , "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]" , msr );
665+
666+ return r ;
667+ }
668+
614669static inline int __vmx_find_uret_msr (struct vcpu_vmx * vmx , u32 msr )
615670{
616671 int i ;
@@ -3583,12 +3638,51 @@ void free_vpid(int vpid)
35833638 spin_unlock (& vmx_vpid_lock );
35843639}
35853640
3641+ static void vmx_clear_msr_bitmap_read (ulong * msr_bitmap , u32 msr )
3642+ {
3643+ int f = sizeof (unsigned long );
3644+
3645+ if (msr <= 0x1fff )
3646+ __clear_bit (msr , msr_bitmap + 0x000 / f );
3647+ else if ((msr >= 0xc0000000 ) && (msr <= 0xc0001fff ))
3648+ __clear_bit (msr & 0x1fff , msr_bitmap + 0x400 / f );
3649+ }
3650+
3651+ static void vmx_clear_msr_bitmap_write (ulong * msr_bitmap , u32 msr )
3652+ {
3653+ int f = sizeof (unsigned long );
3654+
3655+ if (msr <= 0x1fff )
3656+ __clear_bit (msr , msr_bitmap + 0x800 / f );
3657+ else if ((msr >= 0xc0000000 ) && (msr <= 0xc0001fff ))
3658+ __clear_bit (msr & 0x1fff , msr_bitmap + 0xc00 / f );
3659+ }
3660+
3661+ static void vmx_set_msr_bitmap_read (ulong * msr_bitmap , u32 msr )
3662+ {
3663+ int f = sizeof (unsigned long );
3664+
3665+ if (msr <= 0x1fff )
3666+ __set_bit (msr , msr_bitmap + 0x000 / f );
3667+ else if ((msr >= 0xc0000000 ) && (msr <= 0xc0001fff ))
3668+ __set_bit (msr & 0x1fff , msr_bitmap + 0x400 / f );
3669+ }
3670+
3671+ static void vmx_set_msr_bitmap_write (ulong * msr_bitmap , u32 msr )
3672+ {
3673+ int f = sizeof (unsigned long );
3674+
3675+ if (msr <= 0x1fff )
3676+ __set_bit (msr , msr_bitmap + 0x800 / f );
3677+ else if ((msr >= 0xc0000000 ) && (msr <= 0xc0001fff ))
3678+ __set_bit (msr & 0x1fff , msr_bitmap + 0xc00 / f );
3679+ }
3680+
35863681static __always_inline void vmx_disable_intercept_for_msr (struct kvm_vcpu * vcpu ,
35873682 u32 msr , int type )
35883683{
35893684 struct vcpu_vmx * vmx = to_vmx (vcpu );
35903685 unsigned long * msr_bitmap = vmx -> vmcs01 .msr_bitmap ;
3591- int f = sizeof (unsigned long );
35923686
35933687 if (!cpu_has_vmx_msr_bitmap ())
35943688 return ;
@@ -3597,38 +3691,44 @@ static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
35973691 evmcs_touch_msr_bitmap ();
35983692
35993693 /*
3600- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
3601- * have the write-low and read-high bitmap offsets the wrong way round.
3602- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
3603- */
3604- if (msr <= 0x1fff ) {
3605- if (type & MSR_TYPE_R )
3606- /* read-low */
3607- __clear_bit (msr , msr_bitmap + 0x000 / f );
3694+ * Mark the desired intercept state in shadow bitmap, this is needed
3695+ * for resync when the MSR filters change.
3696+ */
3697+ if (is_valid_passthrough_msr (msr )) {
3698+ int idx = possible_passthrough_msr_slot (msr );
3699+
3700+ if (idx != - ENOENT ) {
3701+ if (type & MSR_TYPE_R )
3702+ clear_bit (idx , vmx -> shadow_msr_intercept .read );
3703+ if (type & MSR_TYPE_W )
3704+ clear_bit (idx , vmx -> shadow_msr_intercept .write );
3705+ }
3706+ }
36083707
3609- if (type & MSR_TYPE_W )
3610- /* write-low */
3611- __clear_bit (msr , msr_bitmap + 0x800 / f );
3708+ if ((type & MSR_TYPE_R ) &&
3709+ !kvm_msr_allowed (vcpu , msr , KVM_MSR_FILTER_READ )) {
3710+ vmx_set_msr_bitmap_read (msr_bitmap , msr );
3711+ type &= ~MSR_TYPE_R ;
3712+ }
36123713
3613- } else if ((msr >= 0xc0000000 ) && ( msr <= 0xc0001fff )) {
3614- msr &= 0x1fff ;
3615- if ( type & MSR_TYPE_R )
3616- /* read-high */
3617- __clear_bit ( msr , msr_bitmap + 0x400 / f );
3714+ if ((type & MSR_TYPE_W ) &&
3715+ ! kvm_msr_allowed ( vcpu , msr , KVM_MSR_FILTER_WRITE )) {
3716+ vmx_set_msr_bitmap_write ( msr_bitmap , msr );
3717+ type &= ~ MSR_TYPE_W ;
3718+ }
36183719
3619- if (type & MSR_TYPE_W )
3620- /* write-high */
3621- __clear_bit (msr , msr_bitmap + 0xc00 / f );
3720+ if (type & MSR_TYPE_R )
3721+ vmx_clear_msr_bitmap_read (msr_bitmap , msr );
36223722
3623- }
3723+ if (type & MSR_TYPE_W )
3724+ vmx_clear_msr_bitmap_write (msr_bitmap , msr );
36243725}
36253726
36263727static __always_inline void vmx_enable_intercept_for_msr (struct kvm_vcpu * vcpu ,
36273728 u32 msr , int type )
36283729{
36293730 struct vcpu_vmx * vmx = to_vmx (vcpu );
36303731 unsigned long * msr_bitmap = vmx -> vmcs01 .msr_bitmap ;
3631- int f = sizeof (unsigned long );
36323732
36333733 if (!cpu_has_vmx_msr_bitmap ())
36343734 return ;
@@ -3637,30 +3737,25 @@ static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
36373737 evmcs_touch_msr_bitmap ();
36383738
36393739 /*
3640- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
3641- * have the write-low and read-high bitmap offsets the wrong way round.
3642- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
3643- */
3644- if (msr <= 0x1fff ) {
3645- if (type & MSR_TYPE_R )
3646- /* read-low */
3647- __set_bit (msr , msr_bitmap + 0x000 / f );
3648-
3649- if (type & MSR_TYPE_W )
3650- /* write-low */
3651- __set_bit (msr , msr_bitmap + 0x800 / f );
3652-
3653- } else if ((msr >= 0xc0000000 ) && (msr <= 0xc0001fff )) {
3654- msr &= 0x1fff ;
3655- if (type & MSR_TYPE_R )
3656- /* read-high */
3657- __set_bit (msr , msr_bitmap + 0x400 / f );
3740+ * Mark the desired intercept state in shadow bitmap, this is needed
3741+ * for resync when the MSR filter changes.
3742+ */
3743+ if (is_valid_passthrough_msr (msr )) {
3744+ int idx = possible_passthrough_msr_slot (msr );
3745+
3746+ if (idx != - ENOENT ) {
3747+ if (type & MSR_TYPE_R )
3748+ set_bit (idx , vmx -> shadow_msr_intercept .read );
3749+ if (type & MSR_TYPE_W )
3750+ set_bit (idx , vmx -> shadow_msr_intercept .write );
3751+ }
3752+ }
36583753
3659- if (type & MSR_TYPE_W )
3660- /* write-high */
3661- __set_bit (msr , msr_bitmap + 0xc00 / f );
3754+ if (type & MSR_TYPE_R )
3755+ vmx_set_msr_bitmap_read (msr_bitmap , msr );
36623756
3663- }
3757+ if (type & MSR_TYPE_W )
3758+ vmx_set_msr_bitmap_write (msr_bitmap , msr );
36643759}
36653760
36663761static __always_inline void vmx_set_intercept_for_msr (struct kvm_vcpu * vcpu ,
@@ -3687,15 +3782,14 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
36873782 return mode ;
36883783}
36893784
3690- static void vmx_update_msr_bitmap_x2apic (struct kvm_vcpu * vcpu ,
3691- unsigned long * msr_bitmap , u8 mode )
3785+ static void vmx_update_msr_bitmap_x2apic (struct kvm_vcpu * vcpu , u8 mode )
36923786{
36933787 int msr ;
36943788
3695- for (msr = 0x800 ; msr <= 0x8ff ; msr += BITS_PER_LONG ) {
3696- unsigned word = msr / BITS_PER_LONG ;
3697- msr_bitmap [ word ] = ( mode & MSR_BITMAP_MODE_X2APIC_APICV ) ? 0 : ~ 0 ;
3698- msr_bitmap [ word + ( 0x800 / sizeof ( long ))] = ~ 0 ;
3789+ for (msr = 0x800 ; msr <= 0x8ff ; msr ++ ) {
3790+ bool intercepted = !!( mode & MSR_BITMAP_MODE_X2APIC_APICV ) ;
3791+
3792+ vmx_set_intercept_for_msr ( vcpu , msr , MSR_TYPE_RW , intercepted ) ;
36993793 }
37003794
37013795 if (mode & MSR_BITMAP_MODE_X2APIC ) {
@@ -3715,15 +3809,14 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
37153809void vmx_update_msr_bitmap (struct kvm_vcpu * vcpu )
37163810{
37173811 struct vcpu_vmx * vmx = to_vmx (vcpu );
3718- unsigned long * msr_bitmap = vmx -> vmcs01 .msr_bitmap ;
37193812 u8 mode = vmx_msr_bitmap_mode (vcpu );
37203813 u8 changed = mode ^ vmx -> msr_bitmap_mode ;
37213814
37223815 if (!changed )
37233816 return ;
37243817
37253818 if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV ))
3726- vmx_update_msr_bitmap_x2apic (vcpu , msr_bitmap , mode );
3819+ vmx_update_msr_bitmap_x2apic (vcpu , mode );
37273820
37283821 vmx -> msr_bitmap_mode = mode ;
37293822}
@@ -3764,6 +3857,29 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
37643857 return ((rvi & 0xf0 ) > (vppr & 0xf0 ));
37653858}
37663859
3860+ static void vmx_msr_filter_changed (struct kvm_vcpu * vcpu )
3861+ {
3862+ struct vcpu_vmx * vmx = to_vmx (vcpu );
3863+ u32 i ;
3864+
3865+ /*
3866+ * Set intercept permissions for all potentially passed through MSRs
3867+ * again. They will automatically get filtered through the MSR filter,
3868+ * so we are back in sync after this.
3869+ */
3870+ for (i = 0 ; i < ARRAY_SIZE (vmx_possible_passthrough_msrs ); i ++ ) {
3871+ u32 msr = vmx_possible_passthrough_msrs [i ];
3872+ bool read = test_bit (i , vmx -> shadow_msr_intercept .read );
3873+ bool write = test_bit (i , vmx -> shadow_msr_intercept .write );
3874+
3875+ vmx_set_intercept_for_msr (vcpu , msr , MSR_TYPE_R , read );
3876+ vmx_set_intercept_for_msr (vcpu , msr , MSR_TYPE_W , write );
3877+ }
3878+
3879+ pt_update_intercept_for_msr (vcpu );
3880+ vmx_update_msr_bitmap_x2apic (vcpu , vmx_msr_bitmap_mode (vcpu ));
3881+ }
3882+
37673883static inline bool kvm_vcpu_trigger_posted_interrupt (struct kvm_vcpu * vcpu ,
37683884 bool nested )
37693885{
@@ -6749,6 +6865,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
67496865 if (err < 0 )
67506866 goto free_pml ;
67516867
6868+ /* The MSR bitmap starts with all ones */
6869+ bitmap_fill (vmx -> shadow_msr_intercept .read , MAX_POSSIBLE_PASSTHROUGH_MSRS );
6870+ bitmap_fill (vmx -> shadow_msr_intercept .write , MAX_POSSIBLE_PASSTHROUGH_MSRS );
6871+
67526872 msr_bitmap = vmx -> vmcs01 .msr_bitmap ;
67536873 vmx_disable_intercept_for_msr (vcpu , MSR_IA32_TSC , MSR_TYPE_R );
67546874 vmx_disable_intercept_for_msr (vcpu , MSR_FS_BASE , MSR_TYPE_RW );
@@ -7563,6 +7683,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
75637683 .can_emulate_instruction = vmx_can_emulate_instruction ,
75647684 .apic_init_signal_blocked = vmx_apic_init_signal_blocked ,
75657685 .migrate_timers = vmx_migrate_timers ,
7686+
7687+ .msr_filter_changed = vmx_msr_filter_changed ,
75667688};
75677689
75687690static __init int hardware_setup (void )
0 commit comments