@@ -40,6 +40,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
4040#include <ipxe/malloc.h>
4141#include <ipxe/device.h>
4242#include <ipxe/timer.h>
43+ #include <ipxe/quiesce.h>
4344#include <ipxe/cpuid.h>
4445#include <ipxe/msr.h>
4546#include <ipxe/hyperv.h>
@@ -299,6 +300,10 @@ static void hv_map_synic ( struct hv_hypervisor *hv ) {
299300 uint64_t siefp ;
300301 uint64_t scontrol ;
301302
303+ /* Zero SynIC message and event pages */
304+ memset ( hv -> synic .message , 0 , PAGE_SIZE );
305+ memset ( hv -> synic .event , 0 , PAGE_SIZE );
306+
302307 /* Map SynIC message page */
303308 simp = rdmsr ( HV_X64_MSR_SIMP );
304309 simp &= ( PAGE_SIZE - 1 );
@@ -321,21 +326,14 @@ static void hv_map_synic ( struct hv_hypervisor *hv ) {
321326}
322327
323328/**
324- * Unmap synthetic interrupt controller
329+ * Unmap synthetic interrupt controller, leaving SCONTROL untouched
325330 *
326331 * @v hv Hyper-V hypervisor
327332 */
328- static void hv_unmap_synic ( struct hv_hypervisor * hv ) {
329- uint64_t scontrol ;
333+ static void hv_unmap_synic_no_scontrol ( struct hv_hypervisor * hv ) {
330334 uint64_t siefp ;
331335 uint64_t simp ;
332336
333- /* Disable SynIC */
334- scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
335- scontrol &= ~HV_SCONTROL_ENABLE ;
336- DBGC2 ( hv , "HV %p SCONTROL MSR is %#08llx\n" , hv , scontrol );
337- wrmsr ( HV_X64_MSR_SCONTROL , scontrol );
338-
339337 /* Unmap SynIC event page */
340338 siefp = rdmsr ( HV_X64_MSR_SIEFP );
341339 siefp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIEFP_ENABLE );
@@ -349,6 +347,24 @@ static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
349347 wrmsr ( HV_X64_MSR_SIMP , simp );
350348}
351349
350+ /**
351+ * Unmap synthetic interrupt controller
352+ *
353+ * @v hv Hyper-V hypervisor
354+ */
355+ static void hv_unmap_synic ( struct hv_hypervisor * hv ) {
356+ uint64_t scontrol ;
357+
358+ /* Disable SynIC */
359+ scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
360+ scontrol &= ~HV_SCONTROL_ENABLE ;
361+ DBGC2 ( hv , "HV %p SCONTROL MSR is %#08llx\n" , hv , scontrol );
362+ wrmsr ( HV_X64_MSR_SCONTROL , scontrol );
363+
364+ /* Unmap SynIC event and message pages */
365+ hv_unmap_synic_no_scontrol ( hv );
366+ }
367+
352368/**
353369 * Enable synthetic interrupt
354370 *
@@ -385,8 +401,12 @@ void hv_disable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) {
385401 unsigned long msr = HV_X64_MSR_SINT ( sintx );
386402 uint64_t sint ;
387403
388- /* Disable synthetic interrupt */
404+ /* Do nothing if interrupt is already disabled */
389405 sint = rdmsr ( msr );
406+ if ( sint & HV_SINT_MASKED )
407+ return ;
408+
409+ /* Disable synthetic interrupt */
390410 sint &= ~HV_SINT_AUTO_EOI ;
391411 sint |= HV_SINT_MASKED ;
392412 DBGC2 ( hv , "HV %p SINT%d MSR is %#08llx\n" , hv , sintx , sint );
@@ -589,6 +609,7 @@ static void hv_remove ( struct root_device *rootdev ) {
589609 hv_free_pages ( hv , hv -> hypercall , hv -> synic .message , hv -> synic .event ,
590610 NULL );
591611 free ( hv );
612+ rootdev_set_drvdata ( rootdev , NULL );
592613}
593614
594615/** Hyper-V root device driver */
@@ -603,6 +624,100 @@ struct root_device hv_root_device __root_device = {
603624 .driver = & hv_root_driver ,
604625};
605626
627+ /**
628+ * Quiesce system
629+ *
630+ */
631+ static void hv_quiesce ( void ) {
632+ struct hv_hypervisor * hv = rootdev_get_drvdata ( & hv_root_device );
633+ unsigned int i ;
634+
635+ /* Do nothing if we are not running in Hyper-V */
636+ if ( ! hv )
637+ return ;
638+
639+ /* The "enlightened" portions of the Windows Server 2016 boot
640+ * process will not cleanly take ownership of an active
641+ * Hyper-V connection. Experimentation shows that the minimum
642+ * requirement is that we disable the SynIC message page
643+ * (i.e. zero the SIMP MSR).
644+ *
645+ * We cannot perform a full shutdown of the Hyper-V
646+ * connection. Experimentation shows that if we disable the
647+ * SynIC (i.e. zero the SCONTROL MSR) then Windows Server 2016
648+ * will enter an indefinite wait loop.
649+ *
650+ * Attempt to create a safe handover environment by resetting
651+ * all MSRs except for SCONTROL.
652+ *
653+ * Note that we do not shut down our VMBus devices, since we
654+ * may need to unquiesce the system and continue operation.
655+ */
656+
657+ /* Disable all synthetic interrupts */
658+ for ( i = 0 ; i <= HV_SINT_MAX ; i ++ )
659+ hv_disable_sint ( hv , i );
660+
661+ /* Unmap synthetic interrupt controller, leaving SCONTROL
662+ * enabled (see above).
663+ */
664+ hv_unmap_synic_no_scontrol ( hv );
665+
666+ /* Unmap hypercall page */
667+ hv_unmap_hypercall ( hv );
668+
669+ DBGC ( hv , "HV %p quiesced\n" , hv );
670+ }
671+
672+ /**
673+ * Unquiesce system
674+ *
675+ */
676+ static void hv_unquiesce ( void ) {
677+ struct hv_hypervisor * hv = rootdev_get_drvdata ( & hv_root_device );
678+ uint64_t simp ;
679+ int rc ;
680+
681+ /* Do nothing if we are not running in Hyper-V */
682+ if ( ! hv )
683+ return ;
684+
685+ /* Experimentation shows that the "enlightened" portions of
686+ * Windows Server 2016 will break our Hyper-V connection at
687+ * some point during a SAN boot. Surprisingly it does not
688+ * change the guest OS ID MSR, but it does leave the SynIC
689+ * message page disabled.
690+ *
691+ * Our own explicit quiescing procedure will also disable the
692+ * SynIC message page. We can therefore use the SynIC message
693+ * page enable bit as a heuristic to determine when we need to
694+ * reestablish our Hyper-V connection.
695+ */
696+ simp = rdmsr ( HV_X64_MSR_SIMP );
697+ if ( simp & HV_SIMP_ENABLE )
698+ return ;
699+
700+ /* Remap hypercall page */
701+ hv_map_hypercall ( hv );
702+
703+ /* Remap synthetic interrupt controller */
704+ hv_map_synic ( hv );
705+
706+ /* Reset Hyper-V devices */
707+ if ( ( rc = vmbus_reset ( hv , & hv_root_device .dev ) ) != 0 ) {
708+ DBGC ( hv , "HV %p could not unquiesce: %s\n" ,
709+ hv , strerror ( rc ) );
710+ /* Nothing we can do */
711+ return ;
712+ }
713+ }
714+
715+ /** Hyper-V quiescer */
716+ struct quiescer hv_quiescer __quiescer = {
717+ .quiesce = hv_quiesce ,
718+ .unquiesce = hv_unquiesce ,
719+ };
720+
606721/**
607722 * Probe timer
608723 *
0 commit comments