In [30]:
from fastcore.all import *
from nbdev.showdoc import *

In [33]:
import pynvml
from Path import Path

ModuleNotFoundError: No module named 'Path'

In [56]:
class NVIDIAGPUs():
    def __init__(self, max_allowed_used_memory=0):
        """Vends locked access to NVIDIA GPUs.
        
        If a GPU's used_memory is greater than max_allowed_used_memory will
        be regarded as available.
        """
        pynvml.nvmlInit()
        self.device_count = pynvml.nvmlDeviceGetCount()
        self.lock_path_root = Path('gpulocks')
        self.lock_path_root.mkdir(exist_ok=True)
        self.max_allowed_used_memory = max_allowed_used_memory
    
    def _lockpath(self,gpu_id):
        return self.lock_path_root/f'{gpu_id}.gpulock'
    
    def _lock(self,gpu_id):
        self._lockpath(gpu_id).write_text(str("locked"))
    
    def unlock(self,gpu_id):
        try: self._lockpath(gpu_id).unlink()
        except FileNotFoundError: pass
    
    def _is_locked(self,gpu_id):
        return self._lockpath(gpu_id).exists()
    
    def _is_available(self,gpu_id):
        if self._lockpath(gpu_id).exists(): return False
        device = pynvml.nvmlDeviceGetHandleByIndex(gpu_id) 
        meminfo = pynvml.nvmlDeviceGetMemoryInfo(device)
        used_mem = meminfo.used
        return used_mem <= self.max_allowed_used_memory
    
    def lock_next_available(self):
        """Locks an available GPU and returns its gpu_id, or None"""
        for gpu_id in range(self.device_count):
            if self._is_available(gpu_id):
                self._lock(gpu_id)
                return gpu_id
        return None   



In [57]:
g = NVIDIAGPUs()

In [58]:
g._is_available(0)

True

In [59]:
lockedID = g.lock_next_available()

In [60]:
nextLockedID = g.lock_next_available()

In [61]:
lockedID

0

In [62]:
nextLockedID

In [63]:
g.unlock(lockedID)

In [64]:
g._is_locked(0)

False

In [23]:
def is_available(i, p): return not (p/f'{i}.lock').exists()

def find_next_workerid(worker_ids, p):
    ids = worker_ids.filter(is_available, p=path)
    if ids: return ids[0]

def lock_worker(p, wid, details=""):
    (p/f'{wid}.lock').write_text(str(details))

def unlock_worker(p, wid):
    try: (p/f'{wid}.lock').unlink()
    except FileNotFoundError: pass
    

In [2]:
nvmlInit()

NameError: name 'nvmlInit' is not defined

In [3]:
pynvml.nvmlInit()

In [4]:
pynvml.nvmlSystemGetDriverVersion()

b'435.21'

In [5]:
pynvml.nvmlDeviceGetCount()

1

In [6]:
device = pynvml.nvmlDeviceGetHandleByIndex(0)

In [7]:
pynvml.nvmlDeviceGetName(device)

b'Tesla M60'

In [8]:
?? pynvml.nvmlDeviceGetMemoryInfo

In [12]:
meminfo = pynvml.nvmlDeviceGetMemoryInfo(device)

In [10]:
from collections import namedtuple

In [11]:
??namedtuple

meminfo

In [13]:
meminfo

<pynvml.c_nvmlMemory_t at 0x7f06128d8d40>

In [17]:
meminfo.total

7988903936

In [18]:
meminfo.free

7988903936

In [19]:
meminfo.used

0

In [None]:
import pynvml