Skip to content

Commit b91cc98

Browse files
committed
[hyperv] Cope with Windows Server 2016 enlightenments
An "enlightened" external bootloader (such as Windows Server 2016's winload.exe) may take ownership of the Hyper-V connection before all INT 13 operations have been completed. When this happens, all VMBus devices are implicitly closed and we are left with a non-functional network connection. Detect when our Hyper-V connection has been lost (by checking the SynIC message page MSR). Reclaim ownership of the Hyper-V connection and reestablish any VMBus devices, without disrupting any existing iPXE state (such as IPv4 settings attached to the network device). Windows Server 2016 will not cleanly take ownership of an active Hyper-V connection. Experimentation shows that we can quiesce by resetting only the SynIC message page MSR; this results in a successful SAN boot (on a Windows 2012 R2 physical host). Choose to quiesce by resetting (almost) all MSRs, in the hope that this will be more robust against corner cases such as a stray synthetic interrupt occurring during the handover. Signed-off-by: Michael Brown <mcb30@ipxe.org>
1 parent 276d618 commit b91cc98

File tree

6 files changed

+345
-14
lines changed

6 files changed

+345
-14
lines changed

src/arch/x86/drivers/hyperv/hyperv.c

Lines changed: 125 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
4040
#include <ipxe/malloc.h>
4141
#include <ipxe/device.h>
4242
#include <ipxe/timer.h>
43+
#include <ipxe/quiesce.h>
4344
#include <ipxe/cpuid.h>
4445
#include <ipxe/msr.h>
4546
#include <ipxe/hyperv.h>
@@ -299,6 +300,10 @@ static void hv_map_synic ( struct hv_hypervisor *hv ) {
299300
uint64_t siefp;
300301
uint64_t scontrol;
301302

303+
/* Zero SynIC message and event pages */
304+
memset ( hv->synic.message, 0, PAGE_SIZE );
305+
memset ( hv->synic.event, 0, PAGE_SIZE );
306+
302307
/* Map SynIC message page */
303308
simp = rdmsr ( HV_X64_MSR_SIMP );
304309
simp &= ( PAGE_SIZE - 1 );
@@ -321,21 +326,14 @@ static void hv_map_synic ( struct hv_hypervisor *hv ) {
321326
}
322327

323328
/**
324-
* Unmap synthetic interrupt controller
329+
* Unmap synthetic interrupt controller, leaving SCONTROL untouched
325330
*
326331
* @v hv Hyper-V hypervisor
327332
*/
328-
static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
329-
uint64_t scontrol;
333+
static void hv_unmap_synic_no_scontrol ( struct hv_hypervisor *hv ) {
330334
uint64_t siefp;
331335
uint64_t simp;
332336

333-
/* Disable SynIC */
334-
scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
335-
scontrol &= ~HV_SCONTROL_ENABLE;
336-
DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
337-
wrmsr ( HV_X64_MSR_SCONTROL, scontrol );
338-
339337
/* Unmap SynIC event page */
340338
siefp = rdmsr ( HV_X64_MSR_SIEFP );
341339
siefp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIEFP_ENABLE );
@@ -349,6 +347,24 @@ static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
349347
wrmsr ( HV_X64_MSR_SIMP, simp );
350348
}
351349

350+
/**
351+
* Unmap synthetic interrupt controller
352+
*
353+
* @v hv Hyper-V hypervisor
354+
*/
355+
static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
356+
uint64_t scontrol;
357+
358+
/* Disable SynIC */
359+
scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
360+
scontrol &= ~HV_SCONTROL_ENABLE;
361+
DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
362+
wrmsr ( HV_X64_MSR_SCONTROL, scontrol );
363+
364+
/* Unmap SynIC event and message pages */
365+
hv_unmap_synic_no_scontrol ( hv );
366+
}
367+
352368
/**
353369
* Enable synthetic interrupt
354370
*
@@ -385,8 +401,12 @@ void hv_disable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) {
385401
unsigned long msr = HV_X64_MSR_SINT ( sintx );
386402
uint64_t sint;
387403

388-
/* Disable synthetic interrupt */
404+
/* Do nothing if interrupt is already disabled */
389405
sint = rdmsr ( msr );
406+
if ( sint & HV_SINT_MASKED )
407+
return;
408+
409+
/* Disable synthetic interrupt */
390410
sint &= ~HV_SINT_AUTO_EOI;
391411
sint |= HV_SINT_MASKED;
392412
DBGC2 ( hv, "HV %p SINT%d MSR is %#08llx\n", hv, sintx, sint );
@@ -589,6 +609,7 @@ static void hv_remove ( struct root_device *rootdev ) {
589609
hv_free_pages ( hv, hv->hypercall, hv->synic.message, hv->synic.event,
590610
NULL );
591611
free ( hv );
612+
rootdev_set_drvdata ( rootdev, NULL );
592613
}
593614

594615
/** Hyper-V root device driver */
@@ -603,6 +624,100 @@ struct root_device hv_root_device __root_device = {
603624
.driver = &hv_root_driver,
604625
};
605626

627+
/**
628+
* Quiesce system
629+
*
630+
*/
631+
static void hv_quiesce ( void ) {
632+
struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device );
633+
unsigned int i;
634+
635+
/* Do nothing if we are not running in Hyper-V */
636+
if ( ! hv )
637+
return;
638+
639+
/* The "enlightened" portions of the Windows Server 2016 boot
640+
* process will not cleanly take ownership of an active
641+
* Hyper-V connection. Experimentation shows that the minimum
642+
* requirement is that we disable the SynIC message page
643+
* (i.e. zero the SIMP MSR).
644+
*
645+
* We cannot perform a full shutdown of the Hyper-V
646+
* connection. Experimentation shows that if we disable the
647+
* SynIC (i.e. zero the SCONTROL MSR) then Windows Server 2016
648+
* will enter an indefinite wait loop.
649+
*
650+
* Attempt to create a safe handover environment by resetting
651+
* all MSRs except for SCONTROL.
652+
*
653+
* Note that we do not shut down our VMBus devices, since we
654+
* may need to unquiesce the system and continue operation.
655+
*/
656+
657+
/* Disable all synthetic interrupts */
658+
for ( i = 0 ; i <= HV_SINT_MAX ; i++ )
659+
hv_disable_sint ( hv, i );
660+
661+
/* Unmap synthetic interrupt controller, leaving SCONTROL
662+
* enabled (see above).
663+
*/
664+
hv_unmap_synic_no_scontrol ( hv );
665+
666+
/* Unmap hypercall page */
667+
hv_unmap_hypercall ( hv );
668+
669+
DBGC ( hv, "HV %p quiesced\n", hv );
670+
}
671+
672+
/**
673+
* Unquiesce system
674+
*
675+
*/
676+
static void hv_unquiesce ( void ) {
677+
struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device );
678+
uint64_t simp;
679+
int rc;
680+
681+
/* Do nothing if we are not running in Hyper-V */
682+
if ( ! hv )
683+
return;
684+
685+
/* Experimentation shows that the "enlightened" portions of
686+
* Windows Server 2016 will break our Hyper-V connection at
687+
* some point during a SAN boot. Surprisingly it does not
688+
* change the guest OS ID MSR, but it does leave the SynIC
689+
* message page disabled.
690+
*
691+
* Our own explicit quiescing procedure will also disable the
692+
* SynIC message page. We can therefore use the SynIC message
693+
* page enable bit as a heuristic to determine when we need to
694+
* reestablish our Hyper-V connection.
695+
*/
696+
simp = rdmsr ( HV_X64_MSR_SIMP );
697+
if ( simp & HV_SIMP_ENABLE )
698+
return;
699+
700+
/* Remap hypercall page */
701+
hv_map_hypercall ( hv );
702+
703+
/* Remap synthetic interrupt controller */
704+
hv_map_synic ( hv );
705+
706+
/* Reset Hyper-V devices */
707+
if ( ( rc = vmbus_reset ( hv, &hv_root_device.dev ) ) != 0 ) {
708+
DBGC ( hv, "HV %p could not unquiesce: %s\n",
709+
hv, strerror ( rc ) );
710+
/* Nothing we can do */
711+
return;
712+
}
713+
}
714+
715+
/** Hyper-V quiescer */
716+
struct quiescer hv_quiescer __quiescer = {
717+
.quiesce = hv_quiesce,
718+
.unquiesce = hv_unquiesce,
719+
};
720+
606721
/**
607722
* Probe timer
608723
*

src/drivers/net/netvsc.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,15 @@ static int netvsc_revoke_buffer ( struct netvsc_device *netvsc,
259259
struct netvsc_revoke_buffer_message msg;
260260
int rc;
261261

262+
/* If the buffer's GPADL is obsolete (i.e. was created before
263+
* the most recent Hyper-V reset), then we will never receive
264+
* a response to the revoke message. Since the GPADL is
265+
* already destroyed as far as the hypervisor is concerned, no
266+
* further action is required.
267+
*/
268+
if ( netvsc_is_obsolete ( netvsc ) )
269+
return 0;
270+
262271
/* Construct message */
263272
memset ( &msg, 0, sizeof ( msg ) );
264273
msg.header.type = cpu_to_le32 ( buffer->revoke_type );
@@ -474,6 +483,14 @@ static int netvsc_transmit ( struct rndis_device *rndis,
474483
uint64_t xid;
475484
int rc;
476485

486+
/* If the device is obsolete (i.e. was opened before the most
487+
* recent Hyper-V reset), then we will never receive transmit
488+
* completions. Fail transmissions immediately to minimise
489+
* the delay in closing and reopening the device.
490+
*/
491+
if ( netvsc_is_obsolete ( netvsc ) )
492+
return -EPIPE;
493+
477494
/* Sanity check */
478495
assert ( iob_len ( iobuf ) >= sizeof ( *header ) );
479496
assert ( iob_len ( iobuf ) == le32_to_cpu ( header->len ) );
@@ -823,6 +840,35 @@ static int netvsc_probe ( struct vmbus_device *vmdev ) {
823840
return rc;
824841
}
825842

843+
/**
844+
* Reset device
845+
*
846+
* @v vmdev VMBus device
847+
* @ret rc Return status code
848+
*/
849+
static int netvsc_reset ( struct vmbus_device *vmdev ) {
850+
struct rndis_device *rndis = vmbus_get_drvdata ( vmdev );
851+
struct netvsc_device *netvsc = rndis->priv;
852+
struct net_device *netdev = rndis->netdev;
853+
int rc;
854+
855+
/* A closed device holds no NetVSC (or RNDIS) state, so there
856+
* is nothing to reset.
857+
*/
858+
if ( ! netdev_is_open ( netdev ) )
859+
return 0;
860+
861+
/* Close and reopen device to reset any stale state */
862+
netdev_close ( netdev );
863+
if ( ( rc = netdev_open ( netdev ) ) != 0 ) {
864+
DBGC ( netvsc, "NETVSC %s could not reopen: %s\n",
865+
netvsc->name, strerror ( rc ) );
866+
return rc;
867+
}
868+
869+
return 0;
870+
}
871+
826872
/**
827873
* Remove device
828874
*
@@ -844,5 +890,6 @@ struct vmbus_driver netvsc_driver __vmbus_driver = {
844890
.type = VMBUS_TYPE ( 0xf8615163, 0xdf3e, 0x46c5, 0x913f,
845891
0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e ),
846892
.probe = netvsc_probe,
893+
.reset = netvsc_reset,
847894
.remove = netvsc_remove,
848895
};

src/drivers/net/netvsc.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,4 +362,19 @@ struct netvsc_device {
362362
int wait_rc;
363363
};
364364

365+
/**
366+
* Check if NetVSC device is obsolete
367+
*
368+
* @v netvsc NetVSC device
369+
* @v is_obsolete NetVSC device is obsolete
370+
*
371+
* Check if NetVSC device is obsolete (i.e. was opened before the most
372+
* recent Hyper-V reset).
373+
*/
374+
static inline __attribute__ (( always_inline )) int
375+
netvsc_is_obsolete ( struct netvsc_device *netvsc ) {
376+
377+
return vmbus_gpadl_is_obsolete ( netvsc->rx.gpadl );
378+
}
379+
365380
#endif /* _NETVSC_H */

src/include/ipxe/hyperv.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
6161
/** Synthetic interrupt vector mask */
6262
#define HV_SINT_VECTOR_MASK HV_SINT_VECTOR ( 0xff )
6363

64+
/** Maximum synthetic interrupt number */
65+
#define HV_SINT_MAX 15
66+
6467
/** Post message */
6568
#define HV_POST_MESSAGE 0x005c
6669

src/include/ipxe/vmbus.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,8 @@ struct vmbus_device {
479479
/** Hyper-V hypervisor */
480480
struct hv_hypervisor *hv;
481481

482+
/** Channel instance */
483+
union uuid instance;
482484
/** Channel ID */
483485
unsigned int channel;
484486
/** Monitor ID */
@@ -527,6 +529,12 @@ struct vmbus_driver {
527529
* @ret rc Return status code
528530
*/
529531
int ( * probe ) ( struct vmbus_device *vmdev );
532+
/** Reset device
533+
*
534+
* @v vmdev VMBus device
535+
* @ret rc Return status code
536+
*/
537+
int ( * reset ) ( struct vmbus_device *vmdev );
530538
/** Remove device
531539
*
532540
* @v vmdev VMBus device
@@ -609,6 +617,23 @@ vmbus_unregister_pages ( struct vmbus_device *vmdev,
609617
list_del ( &pages->list );
610618
}
611619

620+
extern unsigned int vmbus_obsolete_gpadl;
621+
622+
/**
623+
* Check if GPADL is obsolete
624+
*
625+
* @v gpadl GPADL ID
626+
* @v is_obsolete GPADL ID is obsolete
627+
*
628+
* Check if GPADL is obsolete (i.e. was created before the most recent
629+
* Hyper-V reset).
630+
*/
631+
static inline __attribute__ (( always_inline )) int
632+
vmbus_gpadl_is_obsolete ( unsigned int gpadl ) {
633+
634+
return ( gpadl <= vmbus_obsolete_gpadl );
635+
}
636+
612637
extern int vmbus_establish_gpadl ( struct vmbus_device *vmdev, userptr_t data,
613638
size_t len );
614639
extern int vmbus_gpadl_teardown ( struct vmbus_device *vmdev,
@@ -629,6 +654,7 @@ extern int vmbus_poll ( struct vmbus_device *vmdev );
629654
extern void vmbus_dump_channel ( struct vmbus_device *vmdev );
630655

631656
extern int vmbus_probe ( struct hv_hypervisor *hv, struct device *parent );
657+
extern int vmbus_reset ( struct hv_hypervisor *hv, struct device *parent );
632658
extern void vmbus_remove ( struct hv_hypervisor *hv, struct device *parent );
633659

634660
#endif /* _IPXE_VMBUS_H */

0 commit comments

Comments
 (0)