Skip to content

Commit 05066ca

Browse files
Claudio Imbrendahcahca
authored andcommitted
s390/mm/fault: Handle guest-related program interrupts in KVM
Any program interrupt that happens in the host during the execution of a KVM guest will now short circuit the fault handler and return to KVM immediately. Guest fault handling (including pfault) will happen entirely inside KVM. When sie64a() returns zero, current->thread.gmap_int_code will contain the program interrupt number that caused the exit, or zero if the exit was not caused by a host program interrupt. KVM will now take care of handling all guest faults in vcpu_post_run(). Since gmap faults will not be visible by the rest of the kernel, remove GMAP_FAULT, the linux fault handlers for secure execution faults, the exception table entries for the sie instruction, the nop padding after the sie instruction, and all other references to guest faults from the s390 code. Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> Co-developed-by: Heiko Carstens <hca@linux.ibm.com> Link: https://lore.kernel.org/r/20241022120601.167009-6-imbrenda@linux.ibm.com Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
1 parent 473aaf5 commit 05066ca

File tree

8 files changed

+143
-157
lines changed

8 files changed

+143
-157
lines changed

arch/s390/include/asm/kvm_host.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,9 @@ struct kvm_vcpu_stat {
527527
#define PGM_REGION_FIRST_TRANS 0x39
528528
#define PGM_REGION_SECOND_TRANS 0x3a
529529
#define PGM_REGION_THIRD_TRANS 0x3b
530+
#define PGM_SECURE_STORAGE_ACCESS 0x3d
531+
#define PGM_NON_SECURE_STORAGE_ACCESS 0x3e
532+
#define PGM_SECURE_STORAGE_VIOLATION 0x3f
530533
#define PGM_MONITOR 0x40
531534
#define PGM_PER 0x80
532535
#define PGM_CRYPTO_OPERATION 0x119

arch/s390/include/asm/processor.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <asm/runtime_instr.h>
4040
#include <asm/irqflags.h>
4141
#include <asm/alternative.h>
42+
#include <asm/fault.h>
4243

4344
struct pcpu {
4445
unsigned long ec_mask; /* bit mask for ec_xxx functions */
@@ -187,10 +188,8 @@ struct thread_struct {
187188
unsigned long hardirq_timer; /* task cputime in hardirq context */
188189
unsigned long softirq_timer; /* task cputime in softirq context */
189190
const sys_call_ptr_t *sys_call_table; /* system call table address */
190-
unsigned long gmap_addr; /* address of last gmap fault. */
191-
unsigned int gmap_write_flag; /* gmap fault write indication */
191+
union teid gmap_teid; /* address and flags of last gmap fault */
192192
unsigned int gmap_int_code; /* int code of last gmap fault */
193-
unsigned int gmap_pfault; /* signal of a pending guest pfault */
194193
int ufpu_flags; /* user fpu flags */
195194
int kfpu_flags; /* kernel fpu flags */
196195

arch/s390/kernel/entry.S

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -222,17 +222,6 @@ SYM_FUNC_START(__sie64a)
222222
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce
223223
lg %r14,__LC_CURRENT(%r14)
224224
mvi __TI_sie(%r14),0
225-
# some program checks are suppressing. C code (e.g. do_protection_exception)
226-
# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
227-
# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
228-
# Other instructions between __sie64a and .Lsie_done should not cause program
229-
# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
230-
.Lrewind_pad6:
231-
nopr 7
232-
.Lrewind_pad4:
233-
nopr 7
234-
.Lrewind_pad2:
235-
nopr 7
236225
SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
237226
lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area
238227
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
@@ -244,15 +233,6 @@ SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
244233
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
245234
lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
246235
BR_EX %r14
247-
.Lsie_fault:
248-
lghi %r14,-EFAULT
249-
stg %r14,__SF_SIE_REASON(%r15) # set exit reason code
250-
j sie_exit
251-
252-
EX_TABLE(.Lrewind_pad6,.Lsie_fault)
253-
EX_TABLE(.Lrewind_pad4,.Lsie_fault)
254-
EX_TABLE(.Lrewind_pad2,.Lsie_fault)
255-
EX_TABLE(sie_exit,.Lsie_fault)
256236
SYM_FUNC_END(__sie64a)
257237
EXPORT_SYMBOL(__sie64a)
258238
EXPORT_SYMBOL(sie_exit)
@@ -329,7 +309,6 @@ SYM_CODE_START(pgm_check_handler)
329309
BPOFF
330310
lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
331311
xgr %r10,%r10
332-
xgr %r12,%r12
333312
tmhh %r8,0x0001 # coming from user space?
334313
jno .Lpgm_skip_asce
335314
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
@@ -341,7 +320,6 @@ SYM_CODE_START(pgm_check_handler)
341320
jz 1f
342321
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
343322
SIEEXIT __SF_SIE_CONTROL(%r15),%r13
344-
lg %r12,__SF_SIE_GUEST_ASCE(%r15)
345323
lghi %r10,_PIF_GUEST_FAULT
346324
#endif
347325
1: tmhh %r8,0x4000 # PER bit set in old PSW ?
@@ -355,7 +333,6 @@ SYM_CODE_START(pgm_check_handler)
355333
3: lg %r15,__LC_KERNEL_STACK(%r13)
356334
4: la %r11,STACK_FRAME_OVERHEAD(%r15)
357335
stg %r10,__PT_FLAGS(%r11)
358-
stg %r12,__PT_CR1(%r11)
359336
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
360337
stmg %r0,%r7,__PT_R0(%r11)
361338
mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
@@ -369,6 +346,7 @@ SYM_CODE_START(pgm_check_handler)
369346
xgr %r5,%r5
370347
xgr %r6,%r6
371348
xgr %r7,%r7
349+
xgr %r12,%r12
372350
lgr %r2,%r11
373351
brasl %r14,__do_pgm_check
374352
tmhh %r8,0x0001 # returning to user space?

arch/s390/kernel/traps.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <asm/asm-extable.h>
3232
#include <asm/vtime.h>
3333
#include <asm/fpu.h>
34+
#include <asm/fault.h>
3435
#include "entry.h"
3536

3637
static inline void __user *get_trap_ip(struct pt_regs *regs)
@@ -317,9 +318,24 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
317318
struct lowcore *lc = get_lowcore();
318319
irqentry_state_t state;
319320
unsigned int trapnr;
321+
union teid teid;
320322

323+
teid.val = lc->trans_exc_code;
321324
regs->int_code = lc->pgm_int_code;
322-
regs->int_parm_long = lc->trans_exc_code;
325+
regs->int_parm_long = teid.val;
326+
327+
/*
328+
* In case of a guest fault, short-circuit the fault handler and return.
329+
* This way the sie64a() function will return 0; fault address and
330+
* other relevant bits are saved in current->thread.gmap_teid, and
331+
* the fault number in current->thread.gmap_int_code. KVM will be
332+
* able to use this information to handle the fault.
333+
*/
334+
if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) {
335+
current->thread.gmap_teid.val = regs->int_parm_long;
336+
current->thread.gmap_int_code = regs->int_code & 0xffff;
337+
return;
338+
}
323339

324340
state = irqentry_enter(regs);
325341

@@ -408,8 +424,8 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
408424
[0x3b] = do_dat_exception,
409425
[0x3c] = default_trap_handler,
410426
[0x3d] = do_secure_storage_access,
411-
[0x3e] = do_non_secure_storage_access,
412-
[0x3f] = do_secure_storage_violation,
427+
[0x3e] = default_trap_handler,
428+
[0x3f] = default_trap_handler,
413429
[0x40] = monitor_event_exception,
414430
[0x41 ... 0x7f] = default_trap_handler,
415431
};
@@ -420,5 +436,3 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
420436
__stringify(default_trap_handler))
421437

422438
COND_TRAP(do_secure_storage_access);
423-
COND_TRAP(do_non_secure_storage_access);
424-
COND_TRAP(do_secure_storage_violation);

arch/s390/kvm/kvm-s390.c

Lines changed: 103 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4646,12 +4646,11 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
46464646
if (!vcpu->arch.gmap->pfault_enabled)
46474647
return false;
46484648

4649-
hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4650-
hva += current->thread.gmap_addr & ~PAGE_MASK;
4649+
hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr);
46514650
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
46524651
return false;
46534652

4654-
return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4653+
return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch);
46554654
}
46564655

46574656
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
@@ -4689,14 +4688,15 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
46894688
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
46904689

46914690
vcpu->arch.sie_block->icptcode = 0;
4691+
current->thread.gmap_int_code = 0;
46924692
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
46934693
VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
46944694
trace_kvm_s390_sie_enter(vcpu, cpuflags);
46954695

46964696
return 0;
46974697
}
46984698

4699-
static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4699+
static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
47004700
{
47014701
struct kvm_s390_pgm_info pgm_info = {
47024702
.code = PGM_ADDRESSING,
@@ -4732,10 +4732,106 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
47324732
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
47334733
}
47344734

4735+
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
4736+
{
4737+
unsigned long gaddr;
4738+
unsigned int flags;
4739+
int rc = 0;
4740+
4741+
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
4742+
if (kvm_s390_cur_gmap_fault_is_write())
4743+
flags = FAULT_FLAG_WRITE;
4744+
4745+
switch (current->thread.gmap_int_code) {
4746+
case 0:
4747+
vcpu->stat.exit_null++;
4748+
break;
4749+
case PGM_NON_SECURE_STORAGE_ACCESS:
4750+
KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4751+
"Unexpected program interrupt 0x%x, TEID 0x%016lx",
4752+
current->thread.gmap_int_code, current->thread.gmap_teid.val);
4753+
/*
4754+
* This is normal operation; a page belonging to a protected
4755+
* guest has not been imported yet. Try to import the page into
4756+
* the protected guest.
4757+
*/
4758+
if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL)
4759+
send_sig(SIGSEGV, current, 0);
4760+
break;
4761+
case PGM_SECURE_STORAGE_ACCESS:
4762+
case PGM_SECURE_STORAGE_VIOLATION:
4763+
KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4764+
"Unexpected program interrupt 0x%x, TEID 0x%016lx",
4765+
current->thread.gmap_int_code, current->thread.gmap_teid.val);
4766+
/*
4767+
* This can happen after a reboot with asynchronous teardown;
4768+
* the new guest (normal or protected) will run on top of the
4769+
* previous protected guest. The old pages need to be destroyed
4770+
* so the new guest can use them.
4771+
*/
4772+
if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) {
4773+
/*
4774+
* Either KVM messed up the secure guest mapping or the
4775+
* same page is mapped into multiple secure guests.
4776+
*
4777+
* This exception is only triggered when a guest 2 is
4778+
* running and can therefore never occur in kernel
4779+
* context.
4780+
*/
4781+
pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
4782+
current->thread.gmap_int_code, current->comm,
4783+
current->pid);
4784+
send_sig(SIGSEGV, current, 0);
4785+
}
4786+
break;
4787+
case PGM_PROTECTION:
4788+
case PGM_SEGMENT_TRANSLATION:
4789+
case PGM_PAGE_TRANSLATION:
4790+
case PGM_ASCE_TYPE:
4791+
case PGM_REGION_FIRST_TRANS:
4792+
case PGM_REGION_SECOND_TRANS:
4793+
case PGM_REGION_THIRD_TRANS:
4794+
KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4795+
"Unexpected program interrupt 0x%x, TEID 0x%016lx",
4796+
current->thread.gmap_int_code, current->thread.gmap_teid.val);
4797+
if (vcpu->arch.gmap->pfault_enabled) {
4798+
rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT);
4799+
if (rc == -EFAULT)
4800+
return vcpu_post_run_addressing_exception(vcpu);
4801+
if (rc == -EAGAIN) {
4802+
trace_kvm_s390_major_guest_pfault(vcpu);
4803+
if (kvm_arch_setup_async_pf(vcpu))
4804+
return 0;
4805+
vcpu->stat.pfault_sync++;
4806+
} else {
4807+
return rc;
4808+
}
4809+
}
4810+
rc = gmap_fault(vcpu->arch.gmap, gaddr, flags);
4811+
if (rc == -EFAULT) {
4812+
if (kvm_is_ucontrol(vcpu->kvm)) {
4813+
vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4814+
vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
4815+
vcpu->run->s390_ucontrol.pgm_code = 0x10;
4816+
return -EREMOTE;
4817+
}
4818+
return vcpu_post_run_addressing_exception(vcpu);
4819+
}
4820+
break;
4821+
default:
4822+
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
4823+
current->thread.gmap_int_code, current->thread.gmap_teid.val);
4824+
send_sig(SIGSEGV, current, 0);
4825+
break;
4826+
}
4827+
return rc;
4828+
}
4829+
47354830
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
47364831
{
47374832
struct mcck_volatile_info *mcck_info;
47384833
struct sie_page *sie_page;
4834+
int rc;
47394835

47404836
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
47414837
vcpu->arch.sie_block->icptcode);
@@ -4757,7 +4853,7 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
47574853
}
47584854

47594855
if (vcpu->arch.sie_block->icptcode > 0) {
4760-
int rc = kvm_handle_sie_intercept(vcpu);
4856+
rc = kvm_handle_sie_intercept(vcpu);
47614857

47624858
if (rc != -EOPNOTSUPP)
47634859
return rc;
@@ -4766,24 +4862,9 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
47664862
vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
47674863
vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
47684864
return -EREMOTE;
4769-
} else if (exit_reason != -EFAULT) {
4770-
vcpu->stat.exit_null++;
4771-
return 0;
4772-
} else if (kvm_is_ucontrol(vcpu->kvm)) {
4773-
vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4774-
vcpu->run->s390_ucontrol.trans_exc_code =
4775-
current->thread.gmap_addr;
4776-
vcpu->run->s390_ucontrol.pgm_code = 0x10;
4777-
return -EREMOTE;
4778-
} else if (current->thread.gmap_pfault) {
4779-
trace_kvm_s390_major_guest_pfault(vcpu);
4780-
current->thread.gmap_pfault = 0;
4781-
if (kvm_arch_setup_async_pf(vcpu))
4782-
return 0;
4783-
vcpu->stat.pfault_sync++;
4784-
return gmap_fault(vcpu->arch.gmap, current->thread.gmap_addr, FAULT_FLAG_WRITE);
47854865
}
4786-
return vcpu_post_run_fault_in_sie(vcpu);
4866+
4867+
return vcpu_post_run_handle_fault(vcpu);
47874868
}
47884869

47894870
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)

arch/s390/kvm/kvm-s390.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,13 @@ static inline int kvm_s390_use_sca_entries(void)
528528
void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
529529
struct mcck_volatile_info *mcck_info);
530530

531+
static inline bool kvm_s390_cur_gmap_fault_is_write(void)
532+
{
533+
if (current->thread.gmap_int_code == PGM_PROTECTION)
534+
return true;
535+
return test_facility(75) && (current->thread.gmap_teid.fsi == TEID_FSI_STORE);
536+
}
537+
531538
/**
532539
* kvm_s390_vcpu_crypto_reset_all
533540
*

arch/s390/kvm/vsie.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -925,16 +925,16 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
925925
if (current->thread.gmap_int_code == PGM_PROTECTION)
926926
/* we can directly forward all protection exceptions */
927927
return inject_fault(vcpu, PGM_PROTECTION,
928-
current->thread.gmap_addr, 1);
928+
current->thread.gmap_teid.addr * PAGE_SIZE, 1);
929929

930930
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
931-
current->thread.gmap_addr, NULL);
931+
current->thread.gmap_teid.addr * PAGE_SIZE, NULL);
932932
if (rc > 0) {
933933
rc = inject_fault(vcpu, rc,
934-
current->thread.gmap_addr,
935-
current->thread.gmap_write_flag);
934+
current->thread.gmap_teid.addr * PAGE_SIZE,
935+
kvm_s390_cur_gmap_fault_is_write());
936936
if (rc >= 0)
937-
vsie_page->fault_addr = current->thread.gmap_addr;
937+
vsie_page->fault_addr = current->thread.gmap_teid.addr * PAGE_SIZE;
938938
}
939939
return rc;
940940
}
@@ -1148,6 +1148,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
11481148
* also kick the vSIE.
11491149
*/
11501150
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
1151+
current->thread.gmap_int_code = 0;
11511152
barrier();
11521153
if (!kvm_s390_vcpu_sie_inhibited(vcpu))
11531154
rc = sie64a(scb_s, vcpu->run->s.regs.gprs, gmap_get_enabled()->asce);
@@ -1172,7 +1173,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
11721173

11731174
if (rc > 0)
11741175
rc = 0; /* we could still have an icpt */
1175-
else if (rc == -EFAULT)
1176+
else if (current->thread.gmap_int_code)
11761177
return handle_fault(vcpu, vsie_page);
11771178

11781179
switch (scb_s->icptcode) {

0 commit comments

Comments
 (0)