Skip to content

Commit 3eb9001

Browse files
agrafbonzini
authored andcommitted
KVM: x86: VMX: Prevent MSR passthrough when MSR access is denied
We will introduce the concept of MSRs that may not be handled in kernel space soon. Some MSRs are directly passed through to the guest, effectively making them handled by KVM from user space's point of view. This patch introduces all logic required to ensure that MSRs that user space wants trapped are not marked as direct access for guests. Signed-off-by: Alexander Graf <graf@amazon.com> Message-Id: <20200925143422.21718-7-graf@amazon.com> [Replace "_idx" with "_slot". - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent fd6fa73 commit 3eb9001

File tree

2 files changed

+181
-52
lines changed

2 files changed

+181
-52
lines changed

arch/x86/kvm/vmx/vmx.c

Lines changed: 174 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,26 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
145145
RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
146146
RTIT_STATUS_BYTECNT))
147147

148+
/*
149+
* List of MSRs that can be directly passed to the guest.
150+
* In addition to these x2apic and PT MSRs are handled specially.
151+
*/
152+
static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
153+
MSR_IA32_SPEC_CTRL,
154+
MSR_IA32_PRED_CMD,
155+
MSR_IA32_TSC,
156+
MSR_FS_BASE,
157+
MSR_GS_BASE,
158+
MSR_KERNEL_GS_BASE,
159+
MSR_IA32_SYSENTER_CS,
160+
MSR_IA32_SYSENTER_ESP,
161+
MSR_IA32_SYSENTER_EIP,
162+
MSR_CORE_C1_RES,
163+
MSR_CORE_C3_RESIDENCY,
164+
MSR_CORE_C6_RESIDENCY,
165+
MSR_CORE_C7_RESIDENCY,
166+
};
167+
148168
/*
149169
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
150170
* ple_gap: upper bound on the amount of time between two successive
@@ -611,6 +631,41 @@ static inline bool report_flexpriority(void)
611631
return flexpriority_enabled;
612632
}
613633

634+
static int possible_passthrough_msr_slot(u32 msr)
635+
{
636+
u32 i;
637+
638+
for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
639+
if (vmx_possible_passthrough_msrs[i] == msr)
640+
return i;
641+
642+
return -ENOENT;
643+
}
644+
645+
static bool is_valid_passthrough_msr(u32 msr)
646+
{
647+
bool r;
648+
649+
switch (msr) {
650+
case 0x800 ... 0x8ff:
651+
/* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
652+
return true;
653+
case MSR_IA32_RTIT_STATUS:
654+
case MSR_IA32_RTIT_OUTPUT_BASE:
655+
case MSR_IA32_RTIT_OUTPUT_MASK:
656+
case MSR_IA32_RTIT_CR3_MATCH:
657+
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
658+
/* PT MSRs. These are handled in pt_update_intercept_for_msr() */
659+
return true;
660+
}
661+
662+
r = possible_passthrough_msr_slot(msr) != -ENOENT;
663+
664+
WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
665+
666+
return r;
667+
}
668+
614669
static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
615670
{
616671
int i;
@@ -3583,12 +3638,51 @@ void free_vpid(int vpid)
35833638
spin_unlock(&vmx_vpid_lock);
35843639
}
35853640

3641+
static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
3642+
{
3643+
int f = sizeof(unsigned long);
3644+
3645+
if (msr <= 0x1fff)
3646+
__clear_bit(msr, msr_bitmap + 0x000 / f);
3647+
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
3648+
__clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
3649+
}
3650+
3651+
static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
3652+
{
3653+
int f = sizeof(unsigned long);
3654+
3655+
if (msr <= 0x1fff)
3656+
__clear_bit(msr, msr_bitmap + 0x800 / f);
3657+
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
3658+
__clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
3659+
}
3660+
3661+
static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
3662+
{
3663+
int f = sizeof(unsigned long);
3664+
3665+
if (msr <= 0x1fff)
3666+
__set_bit(msr, msr_bitmap + 0x000 / f);
3667+
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
3668+
__set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
3669+
}
3670+
3671+
static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
3672+
{
3673+
int f = sizeof(unsigned long);
3674+
3675+
if (msr <= 0x1fff)
3676+
__set_bit(msr, msr_bitmap + 0x800 / f);
3677+
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
3678+
__set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
3679+
}
3680+
35863681
static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
35873682
u32 msr, int type)
35883683
{
35893684
struct vcpu_vmx *vmx = to_vmx(vcpu);
35903685
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
3591-
int f = sizeof(unsigned long);
35923686

35933687
if (!cpu_has_vmx_msr_bitmap())
35943688
return;
@@ -3597,38 +3691,44 @@ static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
35973691
evmcs_touch_msr_bitmap();
35983692

35993693
/*
3600-
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
3601-
* have the write-low and read-high bitmap offsets the wrong way round.
3602-
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
3603-
*/
3604-
if (msr <= 0x1fff) {
3605-
if (type & MSR_TYPE_R)
3606-
/* read-low */
3607-
__clear_bit(msr, msr_bitmap + 0x000 / f);
3694+
* Mark the desired intercept state in shadow bitmap, this is needed
3695+
* for resync when the MSR filters change.
3696+
*/
3697+
if (is_valid_passthrough_msr(msr)) {
3698+
int idx = possible_passthrough_msr_slot(msr);
3699+
3700+
if (idx != -ENOENT) {
3701+
if (type & MSR_TYPE_R)
3702+
clear_bit(idx, vmx->shadow_msr_intercept.read);
3703+
if (type & MSR_TYPE_W)
3704+
clear_bit(idx, vmx->shadow_msr_intercept.write);
3705+
}
3706+
}
36083707

3609-
if (type & MSR_TYPE_W)
3610-
/* write-low */
3611-
__clear_bit(msr, msr_bitmap + 0x800 / f);
3708+
if ((type & MSR_TYPE_R) &&
3709+
!kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) {
3710+
vmx_set_msr_bitmap_read(msr_bitmap, msr);
3711+
type &= ~MSR_TYPE_R;
3712+
}
36123713

3613-
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3614-
msr &= 0x1fff;
3615-
if (type & MSR_TYPE_R)
3616-
/* read-high */
3617-
__clear_bit(msr, msr_bitmap + 0x400 / f);
3714+
if ((type & MSR_TYPE_W) &&
3715+
!kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) {
3716+
vmx_set_msr_bitmap_write(msr_bitmap, msr);
3717+
type &= ~MSR_TYPE_W;
3718+
}
36183719

3619-
if (type & MSR_TYPE_W)
3620-
/* write-high */
3621-
__clear_bit(msr, msr_bitmap + 0xc00 / f);
3720+
if (type & MSR_TYPE_R)
3721+
vmx_clear_msr_bitmap_read(msr_bitmap, msr);
36223722

3623-
}
3723+
if (type & MSR_TYPE_W)
3724+
vmx_clear_msr_bitmap_write(msr_bitmap, msr);
36243725
}
36253726

36263727
static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
36273728
u32 msr, int type)
36283729
{
36293730
struct vcpu_vmx *vmx = to_vmx(vcpu);
36303731
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
3631-
int f = sizeof(unsigned long);
36323732

36333733
if (!cpu_has_vmx_msr_bitmap())
36343734
return;
@@ -3637,30 +3737,25 @@ static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
36373737
evmcs_touch_msr_bitmap();
36383738

36393739
/*
3640-
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
3641-
* have the write-low and read-high bitmap offsets the wrong way round.
3642-
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
3643-
*/
3644-
if (msr <= 0x1fff) {
3645-
if (type & MSR_TYPE_R)
3646-
/* read-low */
3647-
__set_bit(msr, msr_bitmap + 0x000 / f);
3648-
3649-
if (type & MSR_TYPE_W)
3650-
/* write-low */
3651-
__set_bit(msr, msr_bitmap + 0x800 / f);
3652-
3653-
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3654-
msr &= 0x1fff;
3655-
if (type & MSR_TYPE_R)
3656-
/* read-high */
3657-
__set_bit(msr, msr_bitmap + 0x400 / f);
3740+
* Mark the desired intercept state in shadow bitmap, this is needed
3741+
* for resync when the MSR filter changes.
3742+
*/
3743+
if (is_valid_passthrough_msr(msr)) {
3744+
int idx = possible_passthrough_msr_slot(msr);
3745+
3746+
if (idx != -ENOENT) {
3747+
if (type & MSR_TYPE_R)
3748+
set_bit(idx, vmx->shadow_msr_intercept.read);
3749+
if (type & MSR_TYPE_W)
3750+
set_bit(idx, vmx->shadow_msr_intercept.write);
3751+
}
3752+
}
36583753

3659-
if (type & MSR_TYPE_W)
3660-
/* write-high */
3661-
__set_bit(msr, msr_bitmap + 0xc00 / f);
3754+
if (type & MSR_TYPE_R)
3755+
vmx_set_msr_bitmap_read(msr_bitmap, msr);
36623756

3663-
}
3757+
if (type & MSR_TYPE_W)
3758+
vmx_set_msr_bitmap_write(msr_bitmap, msr);
36643759
}
36653760

36663761
static __always_inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu,
@@ -3687,15 +3782,14 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
36873782
return mode;
36883783
}
36893784

3690-
static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
3691-
unsigned long *msr_bitmap, u8 mode)
3785+
static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
36923786
{
36933787
int msr;
36943788

3695-
for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
3696-
unsigned word = msr / BITS_PER_LONG;
3697-
msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
3698-
msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
3789+
for (msr = 0x800; msr <= 0x8ff; msr++) {
3790+
bool intercepted = !!(mode & MSR_BITMAP_MODE_X2APIC_APICV);
3791+
3792+
vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_RW, intercepted);
36993793
}
37003794

37013795
if (mode & MSR_BITMAP_MODE_X2APIC) {
@@ -3715,15 +3809,14 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
37153809
void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
37163810
{
37173811
struct vcpu_vmx *vmx = to_vmx(vcpu);
3718-
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
37193812
u8 mode = vmx_msr_bitmap_mode(vcpu);
37203813
u8 changed = mode ^ vmx->msr_bitmap_mode;
37213814

37223815
if (!changed)
37233816
return;
37243817

37253818
if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
3726-
vmx_update_msr_bitmap_x2apic(vcpu, msr_bitmap, mode);
3819+
vmx_update_msr_bitmap_x2apic(vcpu, mode);
37273820

37283821
vmx->msr_bitmap_mode = mode;
37293822
}
@@ -3764,6 +3857,29 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
37643857
return ((rvi & 0xf0) > (vppr & 0xf0));
37653858
}
37663859

3860+
static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
3861+
{
3862+
struct vcpu_vmx *vmx = to_vmx(vcpu);
3863+
u32 i;
3864+
3865+
/*
3866+
* Set intercept permissions for all potentially passed through MSRs
3867+
* again. They will automatically get filtered through the MSR filter,
3868+
* so we are back in sync after this.
3869+
*/
3870+
for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
3871+
u32 msr = vmx_possible_passthrough_msrs[i];
3872+
bool read = test_bit(i, vmx->shadow_msr_intercept.read);
3873+
bool write = test_bit(i, vmx->shadow_msr_intercept.write);
3874+
3875+
vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_R, read);
3876+
vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_W, write);
3877+
}
3878+
3879+
pt_update_intercept_for_msr(vcpu);
3880+
vmx_update_msr_bitmap_x2apic(vcpu, vmx_msr_bitmap_mode(vcpu));
3881+
}
3882+
37673883
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
37683884
bool nested)
37693885
{
@@ -6749,6 +6865,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
67496865
if (err < 0)
67506866
goto free_pml;
67516867

6868+
/* The MSR bitmap starts with all ones */
6869+
bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
6870+
bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
6871+
67526872
msr_bitmap = vmx->vmcs01.msr_bitmap;
67536873
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
67546874
vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
@@ -7563,6 +7683,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
75637683
.can_emulate_instruction = vmx_can_emulate_instruction,
75647684
.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
75657685
.migrate_timers = vmx_migrate_timers,
7686+
7687+
.msr_filter_changed = vmx_msr_filter_changed,
75667688
};
75677689

75687690
static __init int hardware_setup(void)

arch/x86/kvm/vmx/vmx.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,13 @@ struct vcpu_vmx {
279279
u64 ept_pointer;
280280

281281
struct pt_desc pt_desc;
282+
283+
/* Save desired MSR intercept (read: pass-through) state */
284+
#define MAX_POSSIBLE_PASSTHROUGH_MSRS 13
285+
struct {
286+
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
287+
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
288+
} shadow_msr_intercept;
282289
};
283290

284291
enum ept_pointers_status {

0 commit comments

Comments
 (0)