Skip to content

Commit

Permalink
fix(api/device): use recent timestamp for GPU process utilization que…
Browse files Browse the repository at this point in the history
…ry (#85)
  • Loading branch information
XuehaiPan committed Aug 4, 2023
1 parent ec53de7 commit ef77b8b
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 7 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed

-
- Use recent timestamp for GPU process utilization query for more accurate per-process GPU usage by [@XuehaiPan](https://github.com/XuehaiPan) in [#85](https://github.com/XuehaiPan/nvitop/pull/85). We extend our heartfelt gratitude to [@2581543189](https://github.com/2581543189) for their invaluable assistance. Their timely comments and comprehensive feedback have greatly contributed to the improvement of this project.

### Fixed

Expand Down
11 changes: 6 additions & 5 deletions nvitop/api/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
import sys
import textwrap
import threading
import time
from collections import OrderedDict
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -683,7 +684,6 @@ def __init__(
self._nvml_index = libnvml.nvmlQuery('nvmlDeviceGetIndex', self._handle)

self._max_clock_infos: ClockInfos = ClockInfos(graphics=NA, sm=NA, memory=NA, video=NA)
self._timestamp: int = 0
self._lock: threading.RLock = threading.RLock()

self._ident: tuple[Hashable, str] = (self.index, self.uuid())
Expand Down Expand Up @@ -1700,11 +1700,13 @@ def processes(self) -> dict[int, GpuProcess]:
samples = libnvml.nvmlQuery(
'nvmlDeviceGetProcessUtilization',
self.handle,
self._timestamp,
# Only utilization samples that were recorded after this timestamp will be returned.
# The CPU timestamp, i.e. absolute Unix epoch timestamp (in microseconds), is used.
# Here we use the timestamp 1/4 second ago to ensure the record buffer is not empty.
time.time_ns() // 1000 - 250_000,
default=(),
)
self._timestamp = max(min((s.timeStamp for s in samples), default=0) - 2_000_000, 0)
for s in samples:
for s in sorted(samples, key=lambda s: s.timeStamp):
try:
processes[s.pid].set_gpu_utilization(s.smUtil, s.memUtil, s.encUtil, s.decUtil)
except KeyError:
Expand Down Expand Up @@ -2019,7 +2021,6 @@ def __init__(
raise libnvml.NVMLError_NotFound

self._max_clock_infos = ClockInfos(graphics=NA, sm=NA, memory=NA, video=NA)
self._timestamp = 0
self._lock = threading.RLock()

self._ident = (self.index, self.uuid())
Expand Down
14 changes: 13 additions & 1 deletion nvitop/api/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,19 @@ def update_gpu_status(self) -> int | NaType:
self.set_gpu_memory(NA)
self.set_gpu_cc_protected_memory(NA)
self.set_gpu_utilization(NA, NA, NA, NA)
self.device.processes()
processes = self.device.processes()
process = processes.get(self.pid, self)
if process is not self:
# The current process is gone and the instance has been removed from the cache.
# Update GPU status from the new instance.
self.set_gpu_memory(process.gpu_memory())
self.set_gpu_cc_protected_memory(process.gpu_cc_protected_memory())
self.set_gpu_utilization(
process.gpu_sm_utilization(),
process.gpu_memory_utilization(),
process.gpu_encoder_utilization(),
process.gpu_decoder_utilization(),
)
return self.gpu_memory()

@property
Expand Down

0 comments on commit ef77b8b

Please sign in to comment.