Skip to content

Commit

Permalink
lib: drop DeviceHandle if "amdgpu_top" is the only GPU process running
Browse files Browse the repository at this point in the history
  • Loading branch information
Umio-Yasuno committed Jul 14, 2024
1 parent 4ab12b9 commit ea2ade3
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 29 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions crates/amdgpu_top_json/src/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pub fn gpu_metrics_json(_title: &str, device_path_list: &[DevicePath]) {
pub fn dump_json(device_path_list: &[DevicePath]) {
let vec_json_info: Vec<Value> = device_path_list.iter().filter_map(|device_path| {
let amdgpu_dev = device_path.init().ok()?;
let app = AppAmdgpuTop::new(amdgpu_dev, device_path.clone(), &Default::default())?;
let mut app = AppAmdgpuTop::new(amdgpu_dev, device_path.clone(), &Default::default())?;

let mut m = Map::new();
let mut info = app.json_info();
Expand All @@ -57,12 +57,12 @@ pub fn dump_json(device_path_list: &[DevicePath]) {
}

pub trait JsonInfo {
fn json_info(&self) -> Value;
fn json_info(&mut self) -> Value;
fn stat(&self) -> Value;
}

impl JsonInfo for AppAmdgpuTop {
fn json_info(&self) -> Value {
fn json_info(&mut self) -> Value {
let gpu_clk = json!({
"min": self.device_info.min_gpu_clk,
"max": self.device_info.max_gpu_clk,
Expand All @@ -71,7 +71,7 @@ impl JsonInfo for AppAmdgpuTop {
"min": self.device_info.min_mem_clk,
"max": self.device_info.max_mem_clk,
});
let drm = self.amdgpu_dev.get_drm_version_struct().map_or(Value::Null, |drm| json!({
let drm = self.get_drm_version_struct().map_or(Value::Null, |drm| json!({
"major": drm.version_major,
"minor": drm.version_minor,
"patchlevel": drm.version_patchlevel,
Expand Down
2 changes: 1 addition & 1 deletion crates/amdgpu_top_json/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ impl JsonDeviceInfo {
pub fn from_device_path_list(device_path_list: &[DevicePath]) -> Vec<Self> {
let vec_json_device: Vec<Self> = device_path_list.iter().filter_map(|device_path| {
let amdgpu_dev = device_path.init().ok()?;
let app = AppAmdgpuTop::new(amdgpu_dev, device_path.clone(), &Default::default())?;
let mut app = AppAmdgpuTop::new(amdgpu_dev, device_path.clone(), &Default::default())?;
let info = app.json_info();

Some(Self { app, info })
Expand Down
93 changes: 75 additions & 18 deletions crates/libamdgpu_top/src/app.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
use crate::AMDGPU::{DeviceHandle, GPU_INFO, GpuMetrics, RasBlock, RasErrorCount};
use crate::{DevicePath, stat, VramUsage, has_vcn, has_vcn_unified, has_vpe, Sampling};
use stat::{FdInfoStat, GpuActivity, Sensors, PcieBw, PerfCounter, ProcInfo};
use std::mem::ManuallyDrop;
use std::time::Duration;
use std::sync::{Arc, Mutex};
use crate::AppDeviceInfo;
use crate::drmVersion;

pub struct AppAmdgpuTop {
pub amdgpu_dev: DeviceHandle,
pub amdgpu_dev: ManuallyDrop<DeviceHandle>,
is_dropped: bool,
pub device_info: AppDeviceInfo,
pub device_path: DevicePath,
pub stat: AppAmdgpuTopStat,
Expand Down Expand Up @@ -117,7 +120,8 @@ impl AppAmdgpuTop {
}

Some(Self {
amdgpu_dev,
amdgpu_dev: ManuallyDrop::new(amdgpu_dev),
is_dropped: false,
device_info,
device_path,
stat: AppAmdgpuTopStat {
Expand All @@ -137,11 +141,51 @@ impl AppAmdgpuTop {
}

pub fn update(&mut self, interval: Duration) {
self.stat.vram_usage.update_usage(&self.amdgpu_dev);
self.stat.vram_usage.update_usable_heap_size(&self.amdgpu_dev);
{
let lock = self.stat.arc_proc_index.try_lock();
if let Ok(proc_index) = lock {
self.stat.fdinfo.interval = interval + self.buf_interval;
self.stat.fdinfo.get_all_proc_usage(&proc_index);
self.buf_interval = Duration::ZERO;

} else {
self.buf_interval += interval;
}
}
{
// running GPU process is only "amdgpu_top"
if self.stat.fdinfo.proc_usage.len() == 1 && !self.is_dropped {
unsafe { ManuallyDrop::drop(&mut self.amdgpu_dev); }
self.is_dropped = true;
} else if self.stat.fdinfo.proc_usage.len() != 1 && self.is_dropped {
self.amdgpu_dev = ManuallyDrop::new(self.device_path.init().unwrap());
self.is_dropped = false;
}
}

let amdgpu_dev = if self.is_dropped {
if let Some(ref mut sensors) = self.stat.sensors {
sensors.update_without_device_handle();
sensors.sclk = None;
sensors.mclk = None;
sensors.vddnb = None;
sensors.vddgfx = None;
}

if self.stat.metrics.is_some() {
self.stat.metrics = GpuMetrics::get_from_sysfs_path(&self.device_info.sysfs_path).ok();
}

return;
} else {
unsafe { ManuallyDrop::take(&mut self.amdgpu_dev) }
};

self.stat.vram_usage.update_usage(&amdgpu_dev);
self.stat.vram_usage.update_usable_heap_size(&amdgpu_dev);

if let Some(ref mut sensors) = self.stat.sensors {
sensors.update(&self.amdgpu_dev);
sensors.update(&amdgpu_dev);
}

if self.stat.metrics.is_some() {
Expand All @@ -161,25 +205,24 @@ impl AppAmdgpuTop {
&self.stat.metrics,
);

{
let lock = self.stat.arc_proc_index.try_lock();
if let Ok(proc_index) = lock {
self.stat.fdinfo.interval = interval + self.buf_interval;
self.stat.fdinfo.get_all_proc_usage(&proc_index);
self.buf_interval = Duration::ZERO;
} else {
self.buf_interval += interval;
}
}

if self.stat.activity.media.is_none() || self.stat.activity.media == Some(0) {
self.stat.activity.media = self.stat.fdinfo.fold_fdinfo_usage().media.try_into().ok();
}

self.amdgpu_dev = ManuallyDrop::new(amdgpu_dev);
}

pub fn update_pc(&mut self) {
self.stat.grbm.read_reg(&self.amdgpu_dev);
self.stat.grbm2.read_reg(&self.amdgpu_dev);
let amdgpu_dev = if self.is_dropped {
return;
} else {
unsafe { ManuallyDrop::take(&mut self.amdgpu_dev) }
};

self.stat.grbm.read_reg(&amdgpu_dev);
self.stat.grbm2.read_reg(&amdgpu_dev);

self.amdgpu_dev = ManuallyDrop::new(amdgpu_dev);
}

pub fn update_pc_with_sampling(&mut self, sample: &Sampling) {
Expand All @@ -195,4 +238,18 @@ impl AppAmdgpuTop {
self.stat.grbm.bits.clear();
self.stat.grbm2.bits.clear();
}

pub fn get_drm_version_struct(&mut self) -> Option<drmVersion> {
let amdgpu_dev = if self.is_dropped {
return None;
} else {
unsafe { ManuallyDrop::take(&mut self.amdgpu_dev) }
};

let drm_ver = amdgpu_dev.get_drm_version_struct().ok();

self.amdgpu_dev = ManuallyDrop::new(amdgpu_dev);

drm_ver
}
}
14 changes: 9 additions & 5 deletions crates/libamdgpu_top/src/stat/sensors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,18 +143,14 @@ impl Sensors {
})
}

pub fn update(&mut self, amdgpu_dev: &DeviceHandle) {
pub fn update_without_device_handle(&mut self) {
self.current_link = if self.is_apu {
None
} else if self.vega10_and_later {
self.bus_info.get_current_link_info_from_dpm()
} else {
self.bus_info.get_current_link_info()
};
self.sclk = amdgpu_dev.sensor_info(SENSOR_TYPE::GFX_SCLK).ok();
self.mclk = amdgpu_dev.sensor_info(SENSOR_TYPE::GFX_MCLK).ok();
self.vddnb = amdgpu_dev.sensor_info(SENSOR_TYPE::VDDNB).ok();
self.vddgfx = amdgpu_dev.sensor_info(SENSOR_TYPE::VDDGFX).ok();

for temp in [&mut self.edge_temp, &mut self.junction_temp, &mut self.memory_temp] {
let Some(temp) = temp else { continue };
Expand Down Expand Up @@ -184,6 +180,14 @@ impl Sensors {
self.power_profile = PowerProfile::get_current_profile_from_sysfs(&self.sysfs_path);
}

pub fn update(&mut self, amdgpu_dev: &DeviceHandle) {
self.update_without_device_handle();
self.sclk = amdgpu_dev.sensor_info(SENSOR_TYPE::GFX_SCLK).ok();
self.mclk = amdgpu_dev.sensor_info(SENSOR_TYPE::GFX_MCLK).ok();
self.vddnb = amdgpu_dev.sensor_info(SENSOR_TYPE::VDDNB).ok();
self.vddgfx = amdgpu_dev.sensor_info(SENSOR_TYPE::VDDGFX).ok();
}

pub fn any_hwmon_power(&self) -> Option<HwmonPower> {
self.average_power.clone().or(self.input_power.clone())
}
Expand Down

0 comments on commit ea2ade3

Please sign in to comment.