diff --git a/core/emulate.c b/core/emulate.c
index f4084414..576fdef4 100644
--- a/core/emulate.c
+++ b/core/emulate.c
@@ -586,28 +586,48 @@ static void register_add(struct em_context_t *ctxt,
 
 static uint8_t insn_fetch_u8(struct em_context_t *ctxt)
 {
-    uint8_t result = *(uint8_t *)(&ctxt->insn[ctxt->len]);
+    uint8_t result;
+
+    if (ctxt->len >= INSTR_MAX_LEN)
+        return 0;
+
+    result = *(uint8_t *)(&ctxt->insn[ctxt->len]);
     ctxt->len += 1;
     return result;
 }
 
 static uint16_t insn_fetch_u16(struct em_context_t *ctxt)
 {
-    uint16_t result = *(uint16_t *)(&ctxt->insn[ctxt->len]);
+    uint16_t result;
+
+    if (ctxt->len >= INSTR_MAX_LEN)
+        return 0;
+
+    result = *(uint16_t *)(&ctxt->insn[ctxt->len]);
     ctxt->len += 2;
     return result;
 }
 
 static uint32_t insn_fetch_u32(struct em_context_t *ctxt)
 {
-    uint32_t result = *(uint32_t *)(&ctxt->insn[ctxt->len]);
+    uint32_t result;
+
+    if (ctxt->len >= INSTR_MAX_LEN)
+        return 0;
+
+    result = *(uint32_t *)(&ctxt->insn[ctxt->len]);
     ctxt->len += 4;
     return result;
 }
 
 static uint64_t insn_fetch_u64(struct em_context_t *ctxt)
 {
-    uint64_t result = *(uint64_t *)(&ctxt->insn[ctxt->len]);
+    uint64_t result;
+
+    if (ctxt->len >= INSTR_MAX_LEN)
+        return 0;
+
+    result = *(uint64_t *)(&ctxt->insn[ctxt->len]);
     ctxt->len += 8;
     return result;
 }
diff --git a/core/hax.c b/core/hax.c
index 9bc5ea79..0f85c14c 100644
--- a/core/hax.c
+++ b/core/hax.c
@@ -267,6 +267,44 @@ static int hax_vmx_enable_check(void)
     return 0;
 }
 
+/*
+ * Allows the guest to read from and/or write to the specified MSRs without
+ * causing a VM exit.
+ * |start| is the start MSR address, |count| the number of MSRs. Together they
+ * specify a range of consecutive MSR addresses.
+ * |read| and |write| determine if each MSR can be read or written freely by the
+ * guest, respectively.
+ */
+static void set_msr_access(uint32_t start, uint32_t count, bool read, bool write)
+{
+    uint32_t end = start + count - 1;
+    uint32_t read_base, write_base, bit;
+    uint8_t *msr_bitmap = hax_page_va(msr_bitmap_page);
+
+    hax_assert(((start ^ (start << 1)) & 0x80000000) == 0);
+    hax_assert((start & 0x3fffe000) == 0);
+    hax_assert(((start ^ end) & 0xffffe000) == 0);
+    hax_assert(msr_bitmap);
+
+    // See IA SDM Vol. 3C 24.6.9 for the layout of the MSR bitmaps page
+    read_base = start & 0x80000000 ? 1024 : 0;
+    write_base = read_base + 2048;
+    for (bit = (start & 0x1fff); bit <= (end & 0x1fff); bit++) {
+        // Bit clear means allowed
+        if (read) {
+            btr(msr_bitmap + read_base, bit);
+        } else {
+            bts(msr_bitmap + read_base, bit);
+        }
+
+        if (write) {
+            btr(msr_bitmap + write_base, bit);
+        } else {
+            bts(msr_bitmap + write_base, bit);
+        }
+    }
+}
+
 static int hax_vmx_init(void)
 {
     int ret = -ENOMEM;
@@ -297,6 +335,15 @@ static int hax_vmx_init(void)
     if ((ret = hax_vmx_enable_check()) < 0)
         goto out_5;
 
+    // Set MSRs loaded on VM entries/exits to pass-through
+    // See Intel SDM Vol. 3C 24.6.9 (MSR-Bitmap Address)
+
+    // 4 consecutive MSRs starting from IA32_STAR:
+    // IA32_STAR, IA32_LSTAR, IA32_CSTAR and IA32_SF_MASK
+    set_msr_access(IA32_STAR, 4, true, true);
+    set_msr_access(IA32_KERNEL_GS_BASE, 1, true, true);
+    set_msr_access(IA32_TSC_AUX, 1, true, true);
+
     return 0;
 out_5:
     hax_disable_vmx();
@@ -393,44 +440,6 @@ int hax_get_capability(void *buf, int bufLeng, int *outLength)
     return 0;
 }
 
-/*
- * Allows the guest to read from and/or write to the specified MSRs without
- * causing a VM exit.
- * |start| is the start MSR address, |count| the number of MSRs. Together they
- * specify a range of consecutive MSR addresses.
- * |read| and |write| determine if each MSR can be read or written freely by the
- * guest, respectively.
- */
-static void set_msr_access(uint32_t start, uint32_t count, bool read, bool write)
-{
-    uint32_t end = start + count - 1;
-    uint32_t read_base, write_base, bit;
-    uint8_t *msr_bitmap = hax_page_va(msr_bitmap_page);
-
-    hax_assert(((start ^ (start << 1)) & 0x80000000) == 0);
-    hax_assert((start & 0x3fffe000) == 0);
-    hax_assert(((start ^ end) & 0xffffe000) == 0);
-    hax_assert(msr_bitmap);
-
-    // See IA SDM Vol. 3C 24.6.9 for the layout of the MSR bitmaps page
-    read_base = start & 0x80000000 ? 1024 : 0;
-    write_base = read_base + 2048;
-    for (bit = (start & 0x1fff); bit <= (end & 0x1fff); bit++) {
-        // Bit clear means allowed
-        if (read) {
-            btr(msr_bitmap + read_base, bit);
-        } else {
-            bts(msr_bitmap + read_base, bit);
-        }
-
-        if (write) {
-            btr(msr_bitmap + write_base, bit);
-        } else {
-            bts(msr_bitmap + write_base, bit);
-        }
-    }
-}
-
 /*
  * Probes the host CPU to determine its performance monitoring capabilities.
  */
diff --git a/core/include/cpu.h b/core/include/cpu.h
index bc91cd58..80b89682 100644
--- a/core/include/cpu.h
+++ b/core/include/cpu.h
@@ -45,6 +45,12 @@ struct vcpu_t;
 struct vcpu_state_t;
 
 #define NR_HMSR 6
+// The number of MSRs to be loaded on VM exits
+// Currently the MSRs list only supports automatic loading of below MSRs, the
+// total count of which is 8.
+// * IA32_PMCx
+// * IA32_PERFEVTSELx
+#define NR_HMSR_AUTOLOAD 8
 
 struct hstate {
     /* ldt is not covered by host vmcs area */
@@ -65,6 +71,7 @@ struct hstate {
     uint64_t fs_base;
     uint64_t hcr2;
     struct vmx_msr hmsr[NR_HMSR];
+    vmx_msr_entry hmsr_autoload[NR_HMSR_AUTOLOAD];
     // IA32_PMCx, since APM v1
     uint64_t apm_pmc_msrs[APM_MAX_GENERAL_COUNT];
     // IA32_PERFEVTSELx, since APM v1
diff --git a/core/include/emulate.h b/core/include/emulate.h
index ea15f209..8582ede0 100644
--- a/core/include/emulate.h
+++ b/core/include/emulate.h
@@ -99,6 +99,10 @@ struct em_operand_t;
 /* Emulator interface flags */
 #define EM_OPS_NO_TRANSLATION  (1 << 0)
 
+// Instructions are never longer than 15 bytes:
+//   http://wiki.osdev.org/X86-64_Instruction_Encoding
+#define INSTR_MAX_LEN          15
+
 typedef struct em_vcpu_ops_t {
     uint64_t (*read_gpr)(void *vcpu, uint32_t reg_index);
     void (*write_gpr)(void *vcpu, uint32_t reg_index, uint64_t value);
diff --git a/core/include/vcpu.h b/core/include/vcpu.h
index 3cc8fa3b..ddfcfb10 100644
--- a/core/include/vcpu.h
+++ b/core/include/vcpu.h
@@ -42,9 +42,18 @@
 
 #define NR_GMSR     5
 #define NR_EMT64MSR 6
+// The number of MSRs to be loaded on VM entries
+// Currently the MSRs list only supports automatic loading of below MSRs, the
+// total count of which is 14.
+// * IA32_PMCx
+// * IA32_PERFEVTSELx
+// * IA32_TSC_AUX
+// * all MSRs defined in gmsr_list[]
+#define NR_GMSR_AUTOLOAD 14
 
 struct gstate {
     struct vmx_msr gmsr[NR_GMSR];
+    vmx_msr_entry gmsr_autoload[NR_GMSR_AUTOLOAD];
     // IA32_PMCx, since APM v1
     uint64_t apm_pmc_msrs[APM_MAX_GENERAL_COUNT];
     // IA32_PERFEVTSELx, since APM v1
diff --git a/core/include/vmx.h b/core/include/vmx.h
index 22ad4e75..216f63a6 100644
--- a/core/include/vmx.h
+++ b/core/include/vmx.h
@@ -639,6 +639,12 @@ struct invept_desc {
     uint64_t rsvd;
 };
 
+// Intel SDM Vol. 3C: Table 24-12. Format of an MSR Entry
+typedef struct ALIGNED(16) vmx_msr_entry {
+    uint64_t index;
+    uint64_t data;
+} vmx_msr_entry;
+
 struct vcpu_state_t;
 struct vcpu_t;
 
diff --git a/core/vcpu.c b/core/vcpu.c
index 21561f8d..e6c7fac3 100644
--- a/core/vcpu.c
+++ b/core/vcpu.c
@@ -47,29 +47,31 @@
 #include "include/hax_core_interface.h"
 #include "include/hax_driver.h"
 
+// Explicit type casting is to prevent the upper 32 bits of the array elements
+// from being filled with 1 due to sign extension of the enum type.
 uint64_t gmsr_list[NR_GMSR] = {
-    IA32_STAR,
-    IA32_LSTAR,
-    IA32_CSTAR,
-    IA32_SF_MASK,
-    IA32_KERNEL_GS_BASE
+    (uint32_t)IA32_STAR,
+    (uint32_t)IA32_LSTAR,
+    (uint32_t)IA32_CSTAR,
+    (uint32_t)IA32_SF_MASK,
+    (uint32_t)IA32_KERNEL_GS_BASE
 };
 
 uint64_t hmsr_list[NR_HMSR] = {
-    IA32_EFER,
-    IA32_STAR,
-    IA32_LSTAR,
-    IA32_CSTAR,
-    IA32_SF_MASK,
-    IA32_KERNEL_GS_BASE
+    (uint32_t)IA32_EFER,
+    (uint32_t)IA32_STAR,
+    (uint32_t)IA32_LSTAR,
+    (uint32_t)IA32_CSTAR,
+    (uint32_t)IA32_SF_MASK,
+    (uint32_t)IA32_KERNEL_GS_BASE
 };
 
 uint64_t emt64_msr[NR_EMT64MSR] = {
-    IA32_STAR,
-    IA32_LSTAR,
-    IA32_CSTAR,
-    IA32_SF_MASK,
-    IA32_KERNEL_GS_BASE
+    (uint32_t)IA32_STAR,
+    (uint32_t)IA32_LSTAR,
+    (uint32_t)IA32_CSTAR,
+    (uint32_t)IA32_SF_MASK,
+    (uint32_t)IA32_KERNEL_GS_BASE
 };
 
 static void vcpu_init(struct vcpu_t *vcpu);
@@ -985,27 +987,35 @@ void load_guest_msr(struct vcpu_t *vcpu)
     int i;
     struct gstate *gstate = &vcpu->gstate;
     bool em64t_support = cpu_has_feature(X86_FEATURE_EM64T);
+    uint32_t count = 0;
 
-    for (i = 0; i < NR_GMSR; i++) {
+    for (i = 0; i < NR_GMSR; ++i) {
         if (em64t_support || !is_emt64_msr(gstate->gmsr[i].entry)) {
-            ia32_wrmsr(gstate->gmsr[i].entry, gstate->gmsr[i].value);
+            gstate->gmsr_autoload[count].index = gstate->gmsr[i].entry;
+            gstate->gmsr_autoload[count++].data = gstate->gmsr[i].value;
         }
     }
 
     if (cpu_has_feature(X86_FEATURE_RDTSCP)) {
-        ia32_wrmsr(IA32_TSC_AUX, gstate->tsc_aux);
+        gstate->gmsr_autoload[count].index = (uint32_t)IA32_TSC_AUX;
+        gstate->gmsr_autoload[count++].data = gstate->tsc_aux;
     }
 
     if (!hax->apm_version)
         return;
 
     // APM v1: restore IA32_PMCx and IA32_PERFEVTSELx
-    for (i = 0; i < (int)hax->apm_general_count; i++) {
-        uint32_t msr = (uint32_t)(IA32_PMC0 + i);
-        ia32_wrmsr(msr, gstate->apm_pmc_msrs[i]);
-        msr = (uint32_t)(IA32_PERFEVTSEL0 + i);
-        ia32_wrmsr(msr, gstate->apm_pes_msrs[i]);
+    for (i = 0; i < (int)hax->apm_general_count; ++i) {
+        gstate->gmsr_autoload[count].index = (uint32_t)(IA32_PMC0 + i);
+        gstate->gmsr_autoload[count++].data = gstate->apm_pmc_msrs[i];
     }
+
+    for (i = 0; i < (int)hax->apm_general_count; ++i) {
+        gstate->gmsr_autoload[count].index = (uint32_t)(IA32_PERFEVTSEL0 + i);
+        gstate->gmsr_autoload[count++].data = gstate->apm_pes_msrs[i];
+    }
+
+    vmwrite(vcpu, VMX_ENTRY_MSR_LOAD_COUNT, count);
 }
 
 static void save_host_msr(struct vcpu_t *vcpu)
@@ -1042,13 +1052,26 @@ static void load_host_msr(struct vcpu_t *vcpu)
     int i;
     struct hstate *hstate = &get_cpu_data(vcpu->cpu_id)->hstate;
     bool em64t_support = cpu_has_feature(X86_FEATURE_EM64T);
-
-    for (i = 0; i < NR_HMSR; i++) {
+    uint32_t count = 0;
+
+    // Load below MSR values manually on VM exits.
+
+    // * IA32_STAR, IA32_LSTAR and IA32_SF_MASK
+    //   Host will crash immediatelly on automatic load. See IA SDM Vol. 3C
+    //   31.10.4.3 (Handling the SYSCALL and SYSRET Instructions).
+    // * IA32_EFER and IA32_CSTAR
+    //   See the same section as above.
+    // * IA32_KERNEL_GS_BASE
+    //   See IA SDM Vol. 3C 31.10.4.4 (Handling the SWAPGS Instruction).
+    for (i = 0; i < NR_HMSR; ++i) {
         if (em64t_support || !is_emt64_msr(hstate->hmsr[i].entry)) {
             ia32_wrmsr(hstate->hmsr[i].entry, hstate->hmsr[i].value);
         }
     }
 
+    // * IA32_TSC_AUX
+    //   BSOD will occur in host after automatic loading for a while, sometimes
+    //   even after VM is shutdown.
     if (cpu_has_feature(X86_FEATURE_RDTSCP)) {
         ia32_wrmsr(IA32_TSC_AUX, hstate->tsc_aux);
     }
@@ -1056,13 +1079,23 @@ static void load_host_msr(struct vcpu_t *vcpu)
     if (!hax->apm_version)
         return;
 
+    // Load below MSR values automatically on VM exits.
+
+    // TODO: It will be implemented to trap IA32_PERFEVTSELx MSRs and
+    // automatically load below host values only when IA32_PERFEVTSELx MSRs are
+    // changed during the guest runtime.
     // APM v1: restore IA32_PMCx and IA32_PERFEVTSELx
-    for (i = 0; i < (int)hax->apm_general_count; i++) {
-        uint32_t msr = (uint32_t)(IA32_PMC0 + i);
-        ia32_wrmsr(msr, hstate->apm_pmc_msrs[i]);
-        msr = (uint32_t)(IA32_PERFEVTSEL0 + i);
-        ia32_wrmsr(msr, hstate->apm_pes_msrs[i]);
+    for (i = 0; i < (int)hax->apm_general_count; ++i) {
+        hstate->hmsr_autoload[count].index = (uint32_t)(IA32_PMC0 + i);
+        hstate->hmsr_autoload[count++].data = hstate->apm_pmc_msrs[i];
     }
+
+    for (i = 0; i < (int)hax->apm_general_count; ++i) {
+        hstate->hmsr_autoload[count].index = (uint32_t)(IA32_PERFEVTSEL0 + i);
+        hstate->hmsr_autoload[count++].data = hstate->apm_pes_msrs[i];
+    }
+
+    vmwrite(vcpu, VMX_EXIT_MSR_LOAD_COUNT, count);
 }
 
 static inline bool is_host_debug_enabled(struct vcpu_t *vcpu)
@@ -1172,6 +1205,14 @@ static void load_guest_dr(struct vcpu_t *vcpu)
     if (!(is_guest_dr_dirty(vcpu) || is_host_debug_enabled(vcpu)))
         return;
 
+    // Reset DR7 to zero before setting DR0.
+    // Considering if the host has enabled guest debugging, it could trigger
+    // spurious exceptions in the host by setting a kernel address in DR0.
+    // Spurious exceptions encountered in unexpected conditions (such as with
+    // the user GS loaded, though this particular case does not seem to be
+    // triggerable here) can lead to privilege escalation.
+    set_dr7(0);
+
     set_dr0(state->_dr0);
     set_dr1(state->_dr1);
     set_dr2(state->_dr2);
@@ -1495,12 +1536,14 @@ static void fill_common_vmcs(struct vcpu_t *vcpu)
     vmwrite(vcpu, VMX_EXIT_MSR_STORE_ADDRESS, 0);
 
     vmwrite(vcpu, VMX_EXIT_MSR_LOAD_COUNT, 0);
-    vmwrite(vcpu, VMX_EXIT_MSR_LOAD_ADDRESS, 0);
+    vmwrite(vcpu, VMX_EXIT_MSR_LOAD_ADDRESS,
+            (uint64_t)hax_pa(cpu_data->hstate.hmsr_autoload));
 
     vmwrite(vcpu, VMX_ENTRY_INTERRUPT_INFO, 0);
     // vmwrite(NULL, VMX_ENTRY_EXCEPTION_ERROR_CODE, 0);
     vmwrite(vcpu, VMX_ENTRY_MSR_LOAD_COUNT, 0);
-    vmwrite(vcpu, VMX_ENTRY_MSR_LOAD_ADDRESS, 0);
+    vmwrite(vcpu, VMX_ENTRY_MSR_LOAD_ADDRESS,
+            (uint64_t)hax_pa(vcpu->gstate.gmsr_autoload));
     vmwrite(vcpu, VMX_ENTRY_INSTRUCTION_LENGTH, 0);
 
     // vmwrite(NULL, VMX_TPR_THRESHOLD, 0);
@@ -2109,10 +2152,6 @@ static void vcpu_exit_fpu_state(struct vcpu_t *vcpu)
     }
 }
 
-// Instructions are never longer than 15 bytes:
-//   http://wiki.osdev.org/X86-64_Instruction_Encoding
-#define INSTR_MAX_LEN               15
-
 static bool qemu_support_fastmmio(struct vcpu_t *vcpu)
 {
     struct vm_t *vm = vcpu->vm;
@@ -3092,7 +3131,7 @@ static int handle_msr_read(struct vcpu_t *vcpu, uint32_t msr, uint64_t *val)
         case IA32_SF_MASK:
         case IA32_KERNEL_GS_BASE: {
             for (index = 0; index < NR_GMSR; index++) {
-                if ((uint32_t)gstate->gmsr[index].entry == msr) {
+                if (gstate->gmsr[index].entry == msr) {
                     *val = gstate->gmsr[index].value;
                     break;
                 }
@@ -3426,7 +3465,7 @@ static int handle_msr_write(struct vcpu_t *vcpu, uint32_t msr, uint64_t val,
         case IA32_SF_MASK:
         case IA32_KERNEL_GS_BASE: {
             for (index = 0; index < NR_GMSR; index++) {
-                if ((uint32_t)gmsr_list[index] == msr) {
+                if (gmsr_list[index] == msr) {
                     gstate->gmsr[index].value = val;
                     gstate->gmsr[index].entry = msr;
                     break;