From bc0160ea6c60d56416622b247a4577194a324035 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Wed, 27 May 2026 16:30:14 -0500 Subject: [PATCH] enhancements --- .../device_enumeration_collector.py | 6 +++- .../plugins/inband/pcie/pcie_analyzer.py | 28 +++++++++++++++++-- .../test_device_enumeration_collector.py | 12 ++++++-- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py b/nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py index 82a82f91..9b0dc295 100644 --- a/nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py +++ b/nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py @@ -38,7 +38,11 @@ class DeviceEnumerationCollector(InBandDataCollector[DeviceEnumerationDataModel, DATA_MODEL = DeviceEnumerationDataModel - CMD_GPU_COUNT_LINUX = "lspci -d {vendorid_ep}: | grep -i 'VGA\\|Display\\|3D' | wc -l" + CMD_GPU_COUNT_LINUX = ( + "lspci -d {vendorid_ep}: | grep -iE " + "'VGA|Display|3D|Processing accelerators|Co-processor|Accelerator' | " + "grep -vi 'Virtual Function' | wc -l" + ) CMD_VF_COUNT_LINUX = "lspci -d {vendorid_ep}: | grep -i 'Virtual Function' | wc -l" CMD_LSCPU_LINUX = "lscpu" CMD_LSHW_LINUX = "lshw" diff --git a/nodescraper/plugins/inband/pcie/pcie_analyzer.py b/nodescraper/plugins/inband/pcie/pcie_analyzer.py index 7d9a7e58..43bf0213 100755 --- a/nodescraper/plugins/inband/pcie/pcie_analyzer.py +++ b/nodescraper/plugins/inband/pcie/pcie_analyzer.py @@ -53,6 +53,9 @@ T_CAP = TypeVar("T_CAP", bound=PcieCapStructure) +_AMD_PCIE_BRIDGE_DEVICE_IDS = frozenset({0x1500, 0x1501}) +_PCI_BASE_CLASS_BRIDGE = 0x06 + class PcieAnalyzerInputModel(BaseModel): """ @@ -870,6 +873,20 @@ def filter_pcie_data_by_device_id( new_cfg_space_dict[bdf] = pcie_data return new_cfg_space_dict + @staticmethod + def _is_amd_gpu_pcie_endpoint(cfg_space: PcieCfgSpace, vendorid_ep: int) -> bool: + """True if this config space is an AMD GPU/accelerator endpoint, not a bridge.""" + t0 = cfg_space.type_0_configuration + if t0.vendor_id.val != vendorid_ep: + return False + device_id = t0.device_id.val + if device_id in _AMD_PCIE_BRIDGE_DEVICE_IDS: + return False + base_class = t0.class_code.val + if base_class == _PCI_BASE_CLASS_BRIDGE: + return False + return True + def check_gpu_count( self, pcie_data: PcieDataModel, @@ -888,10 +905,15 @@ def check_gpu_count( return gpu_count_from_pcie = 0 + bridge_count = 0 for cfg_space in pcie_data.pcie_cfg_space.values(): - vendor_id = cfg_space.type_0_configuration.vendor_id.val - if vendor_id == self.system_info.vendorid_ep: + t0 = cfg_space.type_0_configuration + if t0.vendor_id.val != self.system_info.vendorid_ep: + continue + if self._is_amd_gpu_pcie_endpoint(cfg_space, self.system_info.vendorid_ep): gpu_count_from_pcie += 1 + else: + bridge_count += 1 if gpu_count_from_pcie != expected_gpu_count: self._log_event( @@ -900,6 +922,7 @@ def check_gpu_count( priority=EventPriority.ERROR, data={ "gpu_count_from_pcie": gpu_count_from_pcie, + "amd_pcie_bridge_count_excluded": bridge_count, "expected_gpu_count": expected_gpu_count, }, ) @@ -910,6 +933,7 @@ def check_gpu_count( priority=EventPriority.INFO, data={ "gpu_count": gpu_count_from_pcie, + "amd_pcie_bridge_count_excluded": bridge_count, }, ) diff --git a/test/unit/plugin/test_device_enumeration_collector.py b/test/unit/plugin/test_device_enumeration_collector.py index 795611a6..50335f1f 100644 --- a/test/unit/plugin/test_device_enumeration_collector.py +++ b/test/unit/plugin/test_device_enumeration_collector.py @@ -66,7 +66,11 @@ def test_collect_linux(system_info, device_enumeration_collector): exit_code=0, stdout="8", stderr="", - command="lspci -d 1002: | grep -i 'VGA\\|Display\\|3D' | wc -l", + command=( + "lspci -d 1002: | grep -iE " + "'VGA|Display|3D|Processing accelerators|Co-processor|Accelerator' | " + "grep -vi 'Virtual Function' | wc -l" + ), ), MagicMock( exit_code=0, @@ -142,7 +146,11 @@ def test_collect_error(system_info, device_enumeration_collector): exit_code=1, stdout="some output", stderr="command failed", - command="lspci -d 1002: | grep -i 'VGA\\|Display\\|3D' | wc -l", + command=( + "lspci -d 1002: | grep -iE " + "'VGA|Display|3D|Processing accelerators|Co-processor|Accelerator' | " + "grep -vi 'Virtual Function' | wc -l" + ), ), MagicMock( exit_code=1,