Skip to content

Commit

Permalink
vfio: fix vfio device fail to initialize issue for 64k page size
Browse files Browse the repository at this point in the history
Currently, vfio device fails to initialize as the msix-cap region in BAR
is mapped as RW region.

To resolve the initialization issue, this commit avoids mapping the
msix-cap region in the BAR. However, this solution introduces another
problem where aligning the msix table offset in the BAR to the page
size may cause overlap with the MMIO RW region, leading to reduced
performance. By enlarging the entire region in the BAR and relocating
the msix table to achieve page size alignment, this problem can be
overcomed effectively.

Fixes: #5292
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
  • Loading branch information
jongwu authored and michael2012z committed Jun 19, 2023
1 parent 5a9dd74 commit a718716
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 23 deletions.
10 changes: 10 additions & 0 deletions pci/src/msix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,16 @@ impl MsixCap {
self.pba & 0xffff_fff8
}

pub fn table_set_offset(&mut self, addr: u32) {
self.table &= 0x7;
self.table += addr;
}

pub fn pba_set_offset(&mut self, addr: u32) {
self.pba &= 0x7;
self.pba += addr;
}

pub fn table_bir(&self) -> u32 {
self.table & 0x7
}
Expand Down
98 changes: 75 additions & 23 deletions pci/src/vfio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ use vfio_bindings::bindings::vfio::*;
use vfio_ioctls::{
VfioContainer, VfioDevice, VfioIrq, VfioRegionInfoCap, VfioRegionSparseMmapArea,
};
use vm_allocator::page_size::{
align_page_size_down, align_page_size_up, is_4k_aligned, is_4k_multiple, is_page_size_aligned,
};
use vm_allocator::{AddressAllocator, SystemAllocator};
use vm_device::interrupt::{
InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig,
Expand Down Expand Up @@ -499,6 +502,29 @@ impl VfioCommon {
Ok(vfio_common)
}

/// In case msix table offset is not page size aligned, we need do some fixup to achive it.
/// Becuse we don't want the MMIO RW region and trap region overlap each other.
fn fixup_msix_region(&mut self, bar_id: u32, region_size: u64) -> u64 {
let msix = self.interrupt.msix.as_mut().unwrap();
let msix_cap = &mut msix.cap;

// Suppose table_bir equals to pba_bir here. Am I right?
let (table_offset, table_size) = msix_cap.table_range();
if is_page_size_aligned(table_offset) || msix_cap.table_bir() != bar_id {
return region_size;
}

let (pba_offset, pba_size) = msix_cap.pba_range();
let msix_sz = align_page_size_up(table_size + pba_size);
// Expand region to hold RW and trap region which both page size aligned
let size = std::cmp::max(region_size * 2, msix_sz * 2);
// let table starts from the middle of the region
msix_cap.table_set_offset((size / 2) as u32);
msix_cap.pba_set_offset((size / 2 + pba_offset - table_offset) as u32);

size
}

pub(crate) fn allocate_bars(
&mut self,
allocator: &Arc<Mutex<SystemAllocator>>,
Expand Down Expand Up @@ -662,7 +688,9 @@ impl VfioCommon {
.ok_or(PciDeviceError::IoAllocationFailed(region_size))?
}
PciBarRegionType::Memory64BitRegion => {
// BAR allocation must be naturally aligned
// We need do some fixup to keep MMIO RW region and msix cap region page size
// aligned.
region_size = self.fixup_msix_region(bar_id, region_size);
mmio_allocator
.allocate(
restored_bar_addr,
Expand Down Expand Up @@ -806,6 +834,23 @@ impl VfioCommon {
});
}

pub(crate) fn get_msix_cap_idx(&self) -> Option<usize> {
let mut cap_next = self
.vfio_wrapper
.read_config_byte(PCI_CONFIG_CAPABILITY_OFFSET);

while cap_next != 0 {
let cap_id = self.vfio_wrapper.read_config_byte(cap_next.into());
if PciCapabilityId::from(cap_id) == PciCapabilityId::MsiX {
return Some(cap_next as usize);
} else {
cap_next = self.vfio_wrapper.read_config_byte((cap_next + 1).into());
}
}

None
}

pub(crate) fn parse_capabilities(&mut self, bdf: PciBdf) {
let mut cap_next = self
.vfio_wrapper
Expand Down Expand Up @@ -1160,6 +1205,15 @@ impl VfioCommon {
return self.configuration.read_reg(reg_idx);
}

if let Some(id) = self.get_msix_cap_idx() {
let msix = self.interrupt.msix.as_mut().unwrap();
if reg_idx * 4 == id + 4 {
return msix.cap.table;
} else if reg_idx * 4 == id + 8 {
return msix.cap.pba;
}
}

// Since we don't support passing multi-functions devices, we should
// mask the multi-function bit, bit 7 of the Header Type byte on the
// register 3.
Expand Down Expand Up @@ -1322,20 +1376,6 @@ impl VfioPciDevice {
self.iommu_attached
}

fn align_page_size(address: u64) -> u64 {
// SAFETY: FFI call. Trivially safe.
let page_size = unsafe { sysconf(_SC_PAGESIZE) as u64 };
(address + page_size - 1) & !(page_size - 1)
}

fn is_4k_aligned(address: u64) -> bool {
(address & 0xfff) == 0
}

fn is_4k_multiple(size: u64) -> bool {
(size & 0xfff) == 0
}

fn generate_sparse_areas(
caps: &[VfioRegionInfoCap],
region_index: u32,
Expand All @@ -1347,14 +1387,14 @@ impl VfioPciDevice {
match cap {
VfioRegionInfoCap::SparseMmap(sparse_mmap) => return Ok(sparse_mmap.areas.clone()),
VfioRegionInfoCap::MsixMappable => {
if !Self::is_4k_aligned(region_start) {
if !is_4k_aligned(region_start) {
error!(
"Region start address 0x{:x} must be at least aligned on 4KiB",
region_start
);
return Err(VfioPciError::RegionAlignment);
}
if !Self::is_4k_multiple(region_size) {
if !is_4k_multiple(region_size) {
error!(
"Region size 0x{:x} must be at least a multiple of 4KiB",
region_size
Expand All @@ -1366,40 +1406,43 @@ impl VfioPciDevice {
// the MSI-X PBA table, we must calculate the subregions
// around them, leading to a list of sparse areas.
// We want to make sure we will still trap MMIO accesses
// to these MSI-X specific ranges.
// to these MSI-X specific ranges. If these region don't align
// with pagesize, we can achive it by enlarging its range.
//
// Using a BtreeMap as the list provided through the iterator is sorted
// by key. This ensures proper split of the whole region.
let mut inter_ranges = BTreeMap::new();
if let Some(msix) = vfio_msix {
if region_index == msix.cap.table_bir() {
let (offset, size) = msix.cap.table_range();
let offset = align_page_size_down(offset);
let size = align_page_size_up(size);
inter_ranges.insert(offset, size);
}
if region_index == msix.cap.pba_bir() {
let (offset, size) = msix.cap.pba_range();
let offset = align_page_size_down(offset);
let size = align_page_size_up(size);
inter_ranges.insert(offset, size);
}
}

let mut sparse_areas = Vec::new();
let mut current_offset = 0;
for (range_offset, range_size) in inter_ranges {
let range_offset = Self::align_page_size(range_offset);
if range_offset > current_offset {
sparse_areas.push(VfioRegionSparseMmapArea {
offset: current_offset,
size: range_offset - current_offset,
});
}

current_offset = Self::align_page_size(range_offset + range_size);
current_offset = align_page_size_down(range_offset + range_size);
}

if region_size > current_offset {
sparse_areas.push(VfioRegionSparseMmapArea {
offset: Self::align_page_size(current_offset),
size: Self::align_page_size(region_size - current_offset),
offset: current_offset,
size: region_size - current_offset,
});
}

Expand Down Expand Up @@ -1491,6 +1534,15 @@ impl VfioPciDevice {
return Err(VfioPciError::MmapArea);
}

if !is_page_size_aligned(area.size) || !is_page_size_aligned(area.offset) {
warn!(
"Could not mmap sparse area that is not page size aligned (offset = 0x{:x}, size = 0x{:x})",
area.offset,
area.size,
);
return Ok(());
}

let user_memory_region = UserMemoryRegion {
slot: (self.memory_slot)(),
start: region.start.0 + area.offset,
Expand Down

0 comments on commit a718716

Please sign in to comment.