Skip to content
Permalink
Browse files
[hyperv] Cope with Windows Server 2016 enlightenments
An "enlightened" external bootloader (such as Windows Server 2016's
winload.exe) may take ownership of the Hyper-V connection before all
INT 13 operations have been completed.  When this happens, all VMBus
devices are implicitly closed and we are left with a non-functional
network connection.

Detect when our Hyper-V connection has been lost (by checking the
SynIC message page MSR).  Reclaim ownership of the Hyper-V connection
and reestablish any VMBus devices, without disrupting any existing
iPXE state (such as IPv4 settings attached to the network device).

Windows Server 2016 will not cleanly take ownership of an active
Hyper-V connection.  Experimentation shows that we can quiesce by
resetting only the SynIC message page MSR; this results in a
successful SAN boot (on a Windows 2012 R2 physical host).  Choose to
quiesce by resetting (almost) all MSRs, in the hope that this will be
more robust against corner cases such as a stray synthetic interrupt
occurring during the handover.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
  • Loading branch information
mcb30 committed Apr 28, 2017
1 parent 276d618 commit b91cc983da48b2791a672431551f7859e33126ec
Showing 6 changed files with 345 additions and 14 deletions.
@@ -40,6 +40,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <ipxe/malloc.h>
#include <ipxe/device.h>
#include <ipxe/timer.h>
#include <ipxe/quiesce.h>
#include <ipxe/cpuid.h>
#include <ipxe/msr.h>
#include <ipxe/hyperv.h>
@@ -299,6 +300,10 @@ static void hv_map_synic ( struct hv_hypervisor *hv ) {
uint64_t siefp;
uint64_t scontrol;

/* Zero SynIC message and event pages */
memset ( hv->synic.message, 0, PAGE_SIZE );
memset ( hv->synic.event, 0, PAGE_SIZE );

/* Map SynIC message page */
simp = rdmsr ( HV_X64_MSR_SIMP );
simp &= ( PAGE_SIZE - 1 );
@@ -321,21 +326,14 @@ static void hv_map_synic ( struct hv_hypervisor *hv ) {
}

/**
* Unmap synthetic interrupt controller
* Unmap synthetic interrupt controller, leaving SCONTROL untouched
*
* @v hv Hyper-V hypervisor
*/
static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
uint64_t scontrol;
static void hv_unmap_synic_no_scontrol ( struct hv_hypervisor *hv ) {
uint64_t siefp;
uint64_t simp;

/* Disable SynIC */
scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
scontrol &= ~HV_SCONTROL_ENABLE;
DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
wrmsr ( HV_X64_MSR_SCONTROL, scontrol );

/* Unmap SynIC event page */
siefp = rdmsr ( HV_X64_MSR_SIEFP );
siefp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIEFP_ENABLE );
@@ -349,6 +347,24 @@ static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
wrmsr ( HV_X64_MSR_SIMP, simp );
}

/**
* Unmap synthetic interrupt controller
*
* @v hv Hyper-V hypervisor
*/
static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
uint64_t scontrol;

/* Disable SynIC */
scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
scontrol &= ~HV_SCONTROL_ENABLE;
DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
wrmsr ( HV_X64_MSR_SCONTROL, scontrol );

/* Unmap SynIC event and message pages */
hv_unmap_synic_no_scontrol ( hv );
}

/**
* Enable synthetic interrupt
*
@@ -385,8 +401,12 @@ void hv_disable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) {
unsigned long msr = HV_X64_MSR_SINT ( sintx );
uint64_t sint;

/* Disable synthetic interrupt */
/* Do nothing if interrupt is already disabled */
sint = rdmsr ( msr );
if ( sint & HV_SINT_MASKED )
return;

/* Disable synthetic interrupt */
sint &= ~HV_SINT_AUTO_EOI;
sint |= HV_SINT_MASKED;
DBGC2 ( hv, "HV %p SINT%d MSR is %#08llx\n", hv, sintx, sint );
@@ -589,6 +609,7 @@ static void hv_remove ( struct root_device *rootdev ) {
hv_free_pages ( hv, hv->hypercall, hv->synic.message, hv->synic.event,
NULL );
free ( hv );
rootdev_set_drvdata ( rootdev, NULL );
}

/** Hyper-V root device driver */
@@ -603,6 +624,100 @@ struct root_device hv_root_device __root_device = {
.driver = &hv_root_driver,
};

/**
* Quiesce system
*
*/
static void hv_quiesce ( void ) {
struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device );
unsigned int i;

/* Do nothing if we are not running in Hyper-V */
if ( ! hv )
return;

/* The "enlightened" portions of the Windows Server 2016 boot
* process will not cleanly take ownership of an active
* Hyper-V connection. Experimentation shows that the minimum
* requirement is that we disable the SynIC message page
* (i.e. zero the SIMP MSR).
*
* We cannot perform a full shutdown of the Hyper-V
* connection. Experimentation shows that if we disable the
* SynIC (i.e. zero the SCONTROL MSR) then Windows Server 2016
* will enter an indefinite wait loop.
*
* Attempt to create a safe handover environment by resetting
* all MSRs except for SCONTROL.
*
* Note that we do not shut down our VMBus devices, since we
* may need to unquiesce the system and continue operation.
*/

/* Disable all synthetic interrupts */
for ( i = 0 ; i <= HV_SINT_MAX ; i++ )
hv_disable_sint ( hv, i );

/* Unmap synthetic interrupt controller, leaving SCONTROL
* enabled (see above).
*/
hv_unmap_synic_no_scontrol ( hv );

/* Unmap hypercall page */
hv_unmap_hypercall ( hv );

DBGC ( hv, "HV %p quiesced\n", hv );
}

/**
* Unquiesce system
*
*/
static void hv_unquiesce ( void ) {
struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device );
uint64_t simp;
int rc;

/* Do nothing if we are not running in Hyper-V */
if ( ! hv )
return;

/* Experimentation shows that the "enlightened" portions of
* Windows Server 2016 will break our Hyper-V connection at
* some point during a SAN boot. Surprisingly it does not
* change the guest OS ID MSR, but it does leave the SynIC
* message page disabled.
*
* Our own explicit quiescing procedure will also disable the
* SynIC message page. We can therefore use the SynIC message
* page enable bit as a heuristic to determine when we need to
* reestablish our Hyper-V connection.
*/
simp = rdmsr ( HV_X64_MSR_SIMP );
if ( simp & HV_SIMP_ENABLE )
return;

/* Remap hypercall page */
hv_map_hypercall ( hv );

/* Remap synthetic interrupt controller */
hv_map_synic ( hv );

/* Reset Hyper-V devices */
if ( ( rc = vmbus_reset ( hv, &hv_root_device.dev ) ) != 0 ) {
DBGC ( hv, "HV %p could not unquiesce: %s\n",
hv, strerror ( rc ) );
/* Nothing we can do */
return;
}
}

/** Hyper-V quiescer */
struct quiescer hv_quiescer __quiescer = {
.quiesce = hv_quiesce,
.unquiesce = hv_unquiesce,
};

/**
* Probe timer
*
@@ -259,6 +259,15 @@ static int netvsc_revoke_buffer ( struct netvsc_device *netvsc,
struct netvsc_revoke_buffer_message msg;
int rc;

/* If the buffer's GPADL is obsolete (i.e. was created before
* the most recent Hyper-V reset), then we will never receive
* a response to the revoke message. Since the GPADL is
* already destroyed as far as the hypervisor is concerned, no
* further action is required.
*/
if ( netvsc_is_obsolete ( netvsc ) )
return 0;

/* Construct message */
memset ( &msg, 0, sizeof ( msg ) );
msg.header.type = cpu_to_le32 ( buffer->revoke_type );
@@ -474,6 +483,14 @@ static int netvsc_transmit ( struct rndis_device *rndis,
uint64_t xid;
int rc;

/* If the device is obsolete (i.e. was opened before the most
* recent Hyper-V reset), then we will never receive transmit
* completions. Fail transmissions immediately to minimise
* the delay in closing and reopening the device.
*/
if ( netvsc_is_obsolete ( netvsc ) )
return -EPIPE;

/* Sanity check */
assert ( iob_len ( iobuf ) >= sizeof ( *header ) );
assert ( iob_len ( iobuf ) == le32_to_cpu ( header->len ) );
@@ -823,6 +840,35 @@ static int netvsc_probe ( struct vmbus_device *vmdev ) {
return rc;
}

/**
* Reset device
*
* @v vmdev VMBus device
* @ret rc Return status code
*/
static int netvsc_reset ( struct vmbus_device *vmdev ) {
struct rndis_device *rndis = vmbus_get_drvdata ( vmdev );
struct netvsc_device *netvsc = rndis->priv;
struct net_device *netdev = rndis->netdev;
int rc;

/* A closed device holds no NetVSC (or RNDIS) state, so there
* is nothing to reset.
*/
if ( ! netdev_is_open ( netdev ) )
return 0;

/* Close and reopen device to reset any stale state */
netdev_close ( netdev );
if ( ( rc = netdev_open ( netdev ) ) != 0 ) {
DBGC ( netvsc, "NETVSC %s could not reopen: %s\n",
netvsc->name, strerror ( rc ) );
return rc;
}

return 0;
}

/**
* Remove device
*
@@ -844,5 +890,6 @@ struct vmbus_driver netvsc_driver __vmbus_driver = {
.type = VMBUS_TYPE ( 0xf8615163, 0xdf3e, 0x46c5, 0x913f,
0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e ),
.probe = netvsc_probe,
.reset = netvsc_reset,
.remove = netvsc_remove,
};
@@ -362,4 +362,19 @@ struct netvsc_device {
int wait_rc;
};

/**
* Check if NetVSC device is obsolete
*
* @v netvsc NetVSC device
* @v is_obsolete NetVSC device is obsolete
*
* Check if NetVSC device is obsolete (i.e. was opened before the most
* recent Hyper-V reset).
*/
static inline __attribute__ (( always_inline )) int
netvsc_is_obsolete ( struct netvsc_device *netvsc ) {

return vmbus_gpadl_is_obsolete ( netvsc->rx.gpadl );
}

#endif /* _NETVSC_H */
@@ -61,6 +61,9 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
/** Synthetic interrupt vector mask */
#define HV_SINT_VECTOR_MASK HV_SINT_VECTOR ( 0xff )

/** Maximum synthetic interrupt number */
#define HV_SINT_MAX 15

/** Post message */
#define HV_POST_MESSAGE 0x005c

@@ -479,6 +479,8 @@ struct vmbus_device {
/** Hyper-V hypervisor */
struct hv_hypervisor *hv;

/** Channel instance */
union uuid instance;
/** Channel ID */
unsigned int channel;
/** Monitor ID */
@@ -527,6 +529,12 @@ struct vmbus_driver {
* @ret rc Return status code
*/
int ( * probe ) ( struct vmbus_device *vmdev );
/** Reset device
*
* @v vmdev VMBus device
* @ret rc Return status code
*/
int ( * reset ) ( struct vmbus_device *vmdev );
/** Remove device
*
* @v vmdev VMBus device
@@ -609,6 +617,23 @@ vmbus_unregister_pages ( struct vmbus_device *vmdev,
list_del ( &pages->list );
}

extern unsigned int vmbus_obsolete_gpadl;

/**
* Check if GPADL is obsolete
*
* @v gpadl GPADL ID
* @v is_obsolete GPADL ID is obsolete
*
* Check if GPADL is obsolete (i.e. was created before the most recent
* Hyper-V reset).
*/
static inline __attribute__ (( always_inline )) int
vmbus_gpadl_is_obsolete ( unsigned int gpadl ) {

return ( gpadl <= vmbus_obsolete_gpadl );
}

extern int vmbus_establish_gpadl ( struct vmbus_device *vmdev, userptr_t data,
size_t len );
extern int vmbus_gpadl_teardown ( struct vmbus_device *vmdev,
@@ -629,6 +654,7 @@ extern int vmbus_poll ( struct vmbus_device *vmdev );
extern void vmbus_dump_channel ( struct vmbus_device *vmdev );

extern int vmbus_probe ( struct hv_hypervisor *hv, struct device *parent );
extern int vmbus_reset ( struct hv_hypervisor *hv, struct device *parent );
extern void vmbus_remove ( struct hv_hypervisor *hv, struct device *parent );

#endif /* _IPXE_VMBUS_H */

0 comments on commit b91cc98

Please sign in to comment.