Skip to content

Commit

Permalink
arch-x86: Add XCR0 register and add to X86KvmCPU
Browse files Browse the repository at this point in the history
The extended control registers were not being updated in the KVM thread
context nor updated in the KVM state. This was causing issues when
checkpointing since the XCR0 value was reverting to the default value
rather than what it was previously before the checkpoint. THis was
causing multiple applications to crash due to executing instructions
which are now illegal instructions due to XCR0 being incorrect.

This commit adds the XCR0 as a misc register similar to the exiting x86
control registers and adds all of the helper functions to access and set
the register value. It also adds support for updating the KVM CPU's
state with the register value and updating the thread context's misc reg
value so that it is checkpointed along with the other misc regs.

Note that this does *not* add support for XSAVE of the AVX state (i.e.,
the upper 128 bits of YMM registers). It does however fix the immediate
problem in issue gem5#958 .

A checkpoint upgrader is also provided to add the default value of XCR0
if the checkpoint tag is missing.

Change-Id: I97456c8b57cbc7b381bd4be94944ce6567a43c76
  • Loading branch information
abmerop authored and hnpl committed Jun 9, 2024
1 parent f435438 commit 4a5160a
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/arch/x86/fs_workload.cc
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,11 @@ FsWorkload::initState()
// Point to the page tables.
tc->setMiscReg(misc_reg::Cr3, PageMapLevel4);

// Only used if cr4.osxsave is set
XCR0 xcr0 = tc->readMiscRegNoEffect(misc_reg::Xcr0);
xcr0.x87 = 1; // Must be 1 according to x86 specification
tc->setMiscReg(misc_reg::Xcr0, xcr0);

Efer efer = tc->readMiscRegNoEffect(misc_reg::Efer);
// Enable long mode.
efer.lme = 1;
Expand Down
2 changes: 2 additions & 0 deletions src/arch/x86/isa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
break;
case misc_reg::Cr8:
break;
case misc_reg::Xcr0:
break;
case misc_reg::Rflags:
{
RFLAGS rflags = val;
Expand Down
30 changes: 30 additions & 0 deletions src/arch/x86/kvm/x86_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ X86KvmCPU::updateKvmState()
updateKvmStateSRegs();
updateKvmStateFPU();
updateKvmStateMSRs();
updateKvmStateXCRs();

DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n");
if (debug::KvmContext)
Expand Down Expand Up @@ -996,6 +997,22 @@ X86KvmCPU::updateKvmStateMSRs()
setMSRs(msrs);
}

void
X86KvmCPU::updateKvmStateXCRs()
{
struct kvm_xcrs xcrs;

xcrs.nr_xcrs = NumXCRegs;
xcrs.flags = 0;

for (int i = 0; i < xcrs.nr_xcrs; ++i) {
xcrs.xcrs[i].xcr = i;
xcrs.xcrs[i].value = tc->readMiscReg(misc_reg::xcr(i));
}

setXCRs(xcrs);
}

void
X86KvmCPU::updateThreadContext()
{
Expand Down Expand Up @@ -1023,6 +1040,7 @@ X86KvmCPU::updateThreadContext()
updateThreadContextFPU(fpu);
}
updateThreadContextMSRs();
updateThreadContextXCRs();

// The M5 misc reg caches some values from other
// registers. Writing to it with side effects causes it to be
Expand Down Expand Up @@ -1189,6 +1207,18 @@ X86KvmCPU::updateThreadContextMSRs()
}
}

void
X86KvmCPU::updateThreadContextXCRs()
{
struct kvm_xcrs xcrs;

getXCRs(xcrs);

for (int i = 0; i < xcrs.nr_xcrs; ++i) {
tc->setMiscReg(misc_reg::xcr(xcrs.xcrs[i].xcr), xcrs.xcrs[i].value);
}
}

void
X86KvmCPU::deliverInterrupts()
{
Expand Down
4 changes: 4 additions & 0 deletions src/arch/x86/kvm/x86_cpu.hh
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ class X86KvmCPU : public BaseKvmCPU
void updateKvmStateFPUXSave();
/** Update MSR registers */
void updateKvmStateMSRs();
/** Update XCR registers */
void updateKvmStateXCRs();
/** @} */

/**
Expand All @@ -236,6 +238,8 @@ class X86KvmCPU : public BaseKvmCPU
void updateThreadContextXSave(const struct kvm_xsave &kxsave);
/** Update MSR registers */
void updateThreadContextMSRs();
/** Update XCR registers */
void updateThreadContextXCRs();
/** @} */

/** Transfer gem5's CPUID values into the virtual CPU. */
Expand Down
28 changes: 28 additions & 0 deletions src/arch/x86/regs/misc.hh
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,9 @@ enum : RegIndex
// "Fake" MSRs for internally implemented devices
PciConfigAddress,

XcrBase,
Xcr0 = XcrBase,

NumRegs
};

Expand All @@ -424,6 +427,13 @@ cr(int index)
return CrBase + index;
}

static inline RegIndex
xcr(int index)
{
assert(index >= 0 && index < NumXCRegs);
return XcrBase + index;
}

static inline RegIndex
dr(int index)
{
Expand Down Expand Up @@ -649,6 +659,24 @@ BitUnion64(CR8)
Bitfield<3, 0> tpr; // Task Priority Register
EndBitUnion(CR8)

BitUnion64(XCR0)
Bitfield<0> x87; // x87 FPU/MMX support (must be 1)
Bitfield<1> sse; // XSAVE support for MXCSR and XMM registers
Bitfield<2> avx; // AVX enabled and XSAVE support for upper halves of YMM
// registers
Bitfield<3> bndreg; // MPX enabled and XSAVE support for BND0-BND3
// registers
Bitfield<4> bndsrc; // MPX enabled and XSAVE support for BNDCFGU and
// BNDSTATUS registers
Bitfield<5> opmask; // AVX-512 enabled and XSAVE support for opmask
// registers k0-k7
Bitfield<6> zmm_hi256; // AVX-512 enabled and XSAVE support for upper
// halves of lower ZMM registers
Bitfield<7> hi16_zmm; // AVX-512 enabled and XSAVE support for upper ZMM
// registers
Bitfield<9> pkru; // XSAVE support for PKRU register
EndBitUnion(XCR0)

BitUnion64(DR6)
Bitfield<0> b0;
Bitfield<1> b1;
Expand Down
1 change: 1 addition & 0 deletions src/arch/x86/x86_traits.hh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace X86ISA

const int NumCRegs = 16;
const int NumDRegs = 8;
const int NumXCRegs = 1;

const int NumSegments = 6;
const int NumSysSegments = 4;
Expand Down
46 changes: 46 additions & 0 deletions util/cpt_upgraders/x86-add-xcr0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) 2024 Advanced Micro Devices, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.


def upgrader(cpt):
"""
Update the checkpoint to include the XCR0 register if X86 checkpoint.
The value is set to the default of 1.
"""

import re

for sec in cpt.sections():
if re.search(r".*sys.*\.cpu.*\.isa$", sec):
if cpt.get(sec, "isaName") == "x86":
regVals = cpt.get(sec, "regVal")

# Add the default value of XCR0 (1) if missing
regVals = f"{regVals} 1"
cpt.set(sec, "regVal", regVals)

0 comments on commit 4a5160a

Please sign in to comment.