Skip to content

Commit

Permalink
KVM: x86: do not go through ctxt->ops when emulating rsm
Browse files Browse the repository at this point in the history
Now that RSM is implemented in a single emulator callback, there is no
point in going through other callbacks for the sake of modifying
processor state.  Just invoke KVM's own internal functions directly,
and remove the callbacks that were only used by em_rsm; the only
substantial difference is in the handling of the segment registers
and descriptor cache, which have to be parsed into a struct kvm_segment
instead of a struct desc_struct.

This also fixes a bug where emulator_set_segment was shifting the
limit left by 12 if the G bit is set, but the limit had not been
shifted right upon entry to SMM.

The emulator context is still used to restore EIP and the general
purpose registers.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
bonzini authored and intel-lab-lkp committed Sep 29, 2022
1 parent cc6eb1e commit cc73a76
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 142 deletions.
13 changes: 0 additions & 13 deletions arch/x86/kvm/kvm_emulate.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,6 @@ struct x86_emulate_ops {
unsigned int bytes,
struct x86_exception *fault, bool system);

/*
* read_phys: Read bytes of standard (non-emulated/special) memory.
* Used for descriptor reading.
* @addr: [IN ] Physical address from which to read.
* @val: [OUT] Value read from memory.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
void *val, unsigned int bytes);

/*
* write_std: Write bytes of standard (non-emulated/special) memory.
* Used for descriptor writing.
Expand Down Expand Up @@ -209,11 +199,8 @@ struct x86_emulate_ops {
int (*cpl)(struct x86_emulate_ctxt *ctxt);
void (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
Expand Down
177 changes: 81 additions & 96 deletions arch/x86/kvm/smm.c
Original file line number Diff line number Diff line change
Expand Up @@ -271,71 +271,59 @@ void enter_smm(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}

static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
{
#ifdef CONFIG_X86_64
return ctxt->ops->guest_has_long_mode(ctxt);
#else
return false;
#endif
}

static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
{
desc->g = (flags >> 23) & 1;
desc->d = (flags >> 22) & 1;
desc->db = (flags >> 22) & 1;
desc->l = (flags >> 21) & 1;
desc->avl = (flags >> 20) & 1;
desc->p = (flags >> 15) & 1;
desc->present = (flags >> 15) & 1;
desc->dpl = (flags >> 13) & 3;
desc->s = (flags >> 12) & 1;
desc->type = (flags >> 8) & 15;

desc->unusable = !desc->present;
desc->padding = 0;
}

static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
static int rsm_load_seg_32(struct kvm_vcpu *vcpu, const char *smstate,
int n)
{
struct desc_struct desc;
struct kvm_segment desc;
int offset;
u16 selector;

selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);

if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;

set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
desc.selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
desc.base = GET_SMSTATE(u32, smstate, offset + 8);
desc.limit = GET_SMSTATE(u32, smstate, offset + 4);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
kvm_set_segment(vcpu, &desc, n);
return X86EMUL_CONTINUE;
}

#ifdef CONFIG_X86_64
static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
static int rsm_load_seg_64(struct kvm_vcpu *vcpu, const char *smstate,
int n)
{
struct desc_struct desc;
struct kvm_segment desc;
int offset;
u16 selector;
u32 base3;

offset = 0x7e00 + n * 16;

selector = GET_SMSTATE(u16, smstate, offset);
desc.selector = GET_SMSTATE(u16, smstate, offset);
rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
base3 = GET_SMSTATE(u32, smstate, offset + 12);

ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
desc.limit = GET_SMSTATE(u32, smstate, offset + 4);
desc.base = GET_SMSTATE(u64, smstate, offset + 8);
kvm_set_segment(vcpu, &desc, n);
return X86EMUL_CONTINUE;
}
#endif

static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
u64 cr0, u64 cr3, u64 cr4)
{
int bad;
Expand All @@ -348,7 +336,7 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
cr3 &= ~0xfff;
}

bad = ctxt->ops->set_cr(ctxt, 3, cr3);
bad = kvm_set_cr3(vcpu, cr3);
if (bad)
return X86EMUL_UNHANDLEABLE;

Expand All @@ -357,20 +345,20 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
* Then enable protected mode. However, PCID cannot be enabled
* if EFER.LMA=0, so set it separately.
*/
bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
if (bad)
return X86EMUL_UNHANDLEABLE;

bad = ctxt->ops->set_cr(ctxt, 0, cr0);
bad = kvm_set_cr0(vcpu, cr0);
if (bad)
return X86EMUL_UNHANDLEABLE;

if (cr4 & X86_CR4_PCIDE) {
bad = ctxt->ops->set_cr(ctxt, 4, cr4);
bad = kvm_set_cr4(vcpu, cr4);
if (bad)
return X86EMUL_UNHANDLEABLE;
if (pcid) {
bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
bad = kvm_set_cr3(vcpu, cr3 | pcid);
if (bad)
return X86EMUL_UNHANDLEABLE;
}
Expand All @@ -383,9 +371,9 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
const char *smstate)
{
struct desc_struct desc;
struct kvm_vcpu *vcpu = ctxt->vcpu;
struct kvm_segment desc;
struct desc_ptr dt;
u16 selector;
u32 val, cr0, cr3, cr4;
int i;

Expand All @@ -399,56 +387,55 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,

val = GET_SMSTATE(u32, smstate, 0x7fcc);

if (ctxt->ops->set_dr(ctxt, 6, val))
if (kvm_set_dr(vcpu, 6, val))
return X86EMUL_UNHANDLEABLE;

val = GET_SMSTATE(u32, smstate, 0x7fc8);

if (ctxt->ops->set_dr(ctxt, 7, val))
if (kvm_set_dr(vcpu, 7, val))
return X86EMUL_UNHANDLEABLE;

selector = GET_SMSTATE(u32, smstate, 0x7fc4);
set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
desc.selector = GET_SMSTATE(u32, smstate, 0x7fc4);
desc.base = GET_SMSTATE(u32, smstate, 0x7f64);
desc.limit = GET_SMSTATE(u32, smstate, 0x7f60);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);

selector = GET_SMSTATE(u32, smstate, 0x7fc0);
set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
desc.selector = GET_SMSTATE(u32, smstate, 0x7fc0);
desc.base = GET_SMSTATE(u32, smstate, 0x7f80);
desc.limit = GET_SMSTATE(u32, smstate, 0x7f7c);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);

dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
ctxt->ops->set_gdt(ctxt, &dt);
static_call(kvm_x86_set_gdt)(vcpu, &dt);

dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
ctxt->ops->set_idt(ctxt, &dt);
static_call(kvm_x86_set_idt)(vcpu, &dt);

for (i = 0; i < 6; i++) {
int r = rsm_load_seg_32(ctxt, smstate, i);
int r = rsm_load_seg_32(vcpu, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}

cr4 = GET_SMSTATE(u32, smstate, 0x7f14);

ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7ef8);

return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
return rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
}

#ifdef CONFIG_X86_64
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
const char *smstate)
{
struct desc_struct desc;
struct kvm_vcpu *vcpu = ctxt->vcpu;
struct kvm_segment desc;
struct desc_ptr dt;
u64 val, cr0, cr3, cr4;
u32 base3;
u16 selector;
int i, r;

for (i = 0; i < NR_EMULATOR_GPRS; i++)
Expand All @@ -459,51 +446,49 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,

val = GET_SMSTATE(u64, smstate, 0x7f68);

if (ctxt->ops->set_dr(ctxt, 6, val))
if (kvm_set_dr(vcpu, 6, val))
return X86EMUL_UNHANDLEABLE;

val = GET_SMSTATE(u64, smstate, 0x7f60);

if (ctxt->ops->set_dr(ctxt, 7, val))
if (kvm_set_dr(vcpu, 7, val))
return X86EMUL_UNHANDLEABLE;

cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7f00);
val = GET_SMSTATE(u64, smstate, 0x7ed0);

if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
if (kvm_set_msr(vcpu, MSR_EFER, val & ~EFER_LMA))
return X86EMUL_UNHANDLEABLE;

selector = GET_SMSTATE(u32, smstate, 0x7e90);
desc.selector = GET_SMSTATE(u32, smstate, 0x7e90);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
desc.limit = GET_SMSTATE(u32, smstate, 0x7e94);
desc.base = GET_SMSTATE(u64, smstate, 0x7e98);
kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);

dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
ctxt->ops->set_idt(ctxt, &dt);
static_call(kvm_x86_set_idt)(vcpu, &dt);

selector = GET_SMSTATE(u32, smstate, 0x7e70);
desc.selector = GET_SMSTATE(u32, smstate, 0x7e70);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
desc.limit = GET_SMSTATE(u32, smstate, 0x7e74);
desc.base = GET_SMSTATE(u64, smstate, 0x7e78);
kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);

dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
static_call(kvm_x86_set_gdt)(vcpu, &dt);

r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
r = rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
if (r != X86EMUL_CONTINUE)
return r;

for (i = 0; i < 6; i++) {
r = rsm_load_seg_64(ctxt, smstate, i);
r = rsm_load_seg_64(vcpu, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
Expand All @@ -520,56 +505,56 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
u64 smbase;
int ret;

smbase = ctxt->ops->get_smbase(ctxt);
smbase = vcpu->arch.smbase;

ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
if (ret != X86EMUL_CONTINUE)
ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
if (ret < 0)
return X86EMUL_UNHANDLEABLE;

if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
ctxt->ops->set_nmi_mask(ctxt, false);
if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
static_call(kvm_x86_set_nmi_mask)(vcpu, false);

kvm_smm_changed(vcpu, false);

/*
* Get back to real mode, to prepare a safe state in which to load
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
* supports long mode.
*
* The ctxt->ops callbacks will handle all side effects when writing
* writing MSRs and CRs, e.g. MMU context resets, CPUID
* runtime updates, etc.
*/
if (emulator_has_longmode(ctxt)) {
struct desc_struct cs_desc;
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
struct kvm_segment cs_desc;

/* Zero CR4.PCIDE before CR0.PG. */
cr4 = ctxt->ops->get_cr(ctxt, 4);
cr4 = kvm_read_cr4(vcpu);
if (cr4 & X86_CR4_PCIDE)
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);

/* A 32-bit code segment is required to clear EFER.LMA. */
memset(&cs_desc, 0, sizeof(cs_desc));
cs_desc.type = 0xb;
cs_desc.s = cs_desc.g = cs_desc.p = 1;
ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
cs_desc.s = cs_desc.g = cs_desc.present = 1;
kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
}
#endif

/* For the 64-bit case, this will clear EFER.LMA. */
cr0 = ctxt->ops->get_cr(ctxt, 0);
cr0 = kvm_read_cr0(vcpu);
if (cr0 & X86_CR0_PE)
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));

if (emulator_has_longmode(ctxt)) {
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
/* Clear CR4.PAE before clearing EFER.LME. */
cr4 = ctxt->ops->get_cr(ctxt, 4);
cr4 = kvm_read_cr4(vcpu);
if (cr4 & X86_CR4_PAE)
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);

/* And finally go back to 32-bit mode. */
efer = 0;
ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
kvm_set_msr(vcpu, MSR_EFER, efer);
}
#endif

/*
* Give leave_smm() a chance to make ISA-specific changes to the vCPU
Expand All @@ -580,7 +565,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
return X86EMUL_UNHANDLEABLE;

#ifdef CONFIG_X86_64
if (emulator_has_longmode(ctxt))
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
return rsm_load_state_64(ctxt, buf);
else
#endif
Expand Down
Loading

0 comments on commit cc73a76

Please sign in to comment.