<a href="https://colab.research.google.com/github/hihunjin/Code-snippet-for-everything/blob/main/save_csv_gpu_usage_every_second.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi

Sat May  6 02:20:06 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   66C    P8    12W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
%%capture
!pip install pynvml

In [3]:
from typing import List, Tuple
import time
import pynvml
from datetime import datetime

import pandas as pd

In [9]:
csv_loc = "record.csv"
duration = 3

In [10]:
def get_gpu_memory() -> List[Tuple]:
    """GPU device당 GPU메모리를 NVML 모듈로부터 바이트 단위로 읽어옵니다.

    Returns:
        List[Tuple]: GPU memory in bytes

    Note:
        Use --pid=host flag with Docker.
    """
    pynvml.nvmlInit()
    memories = []
    for device_id in range(pynvml.nvmlDeviceGetCount()):
        handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
        mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
        mem_used = mem_info.used
        if mem_used is None:
            memories.append(
                (device_id, 0)
            )
        else:
            memories.append(
                (device_id, mem_used)
            )
    return memories


In [11]:
df = pd.DataFrame(columns=["time", "device_id", "GPU_memory_usage"])
df.to_csv(csv_loc, index=False)
st = time.time()
while time.time() - st < duration:
    # print("Subprocess is still running")
    # print("process.poll()", process.poll())
    memories = get_gpu_memory()
    _df = []  # pd.DataFrame(columns=["time", "device_id", "GPU_memory_usage"])
    for device_id, memory in memories:
        # print(memory)
        _df.append(
            {
                "time": datetime.now().strftime("%H:%M:%S.%f"),
                "device_id": int(device_id),
                "GPU_memory_usage": round(memory / 1024 ** 2, 5),
            }
        )
    _df = pd.DataFrame(_df)
    _df.to_csv(csv_loc, mode="a", index=False, header=False)
    time.sleep(0.2)


In [12]:
df_record = pd.read_csv(csv_loc)

In [13]:
df_record.head(10)

Unnamed: 0,time,device_id,GPU_memory_usage
0,02:20:45.576565,0,258.3125
1,02:20:45.778599,0,258.3125
2,02:20:45.980249,0,258.3125
3,02:20:46.181920,0,258.3125
4,02:20:46.384650,0,258.3125
5,02:20:46.586404,0,258.3125
6,02:20:46.788254,0,258.3125
7,02:20:46.989915,0,258.3125
8,02:20:47.191707,0,258.3125
9,02:20:47.393467,0,258.3125
