diff --git a/backend/src/gpu.py b/backend/src/gpu.py index fb5491ee5..5afbe9321 100644 --- a/backend/src/gpu.py +++ b/backend/src/gpu.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import List, Tuple +from typing import List, Tuple, Union import pynvml as nv from sanic.log import logger @@ -22,6 +22,29 @@ class _GPU: uuid: str index: int handle: int + arch: int + + +FP16_ARCH_ABILITY_MAP = { + nv.NVML_DEVICE_ARCH_KEPLER: False, + nv.NVML_DEVICE_ARCH_MAXWELL: False, + nv.NVML_DEVICE_ARCH_PASCAL: False, + nv.NVML_DEVICE_ARCH_VOLTA: True, + nv.NVML_DEVICE_ARCH_TURING: True, + nv.NVML_DEVICE_ARCH_AMPERE: True, + nv.NVML_DEVICE_ARCH_ADA: True, + nv.NVML_DEVICE_ARCH_HOPPER: True, + nv.NVML_DEVICE_ARCH_UNKNOWN: False, +} + + +def supports_fp16(gpu: _GPU): + # This generation also contains the GTX 1600 cards, which do not support FP16. + if gpu.arch == nv.NVML_DEVICE_ARCH_TURING: + # There may be a more robust way to check this, but for now I think this will do. + return "RTX" in gpu.name + # Future proofing. We can be reasonably sure that future architectures will support FP16. + return FP16_ARCH_ABILITY_MAP.get(gpu.arch, gpu.arch > nv.NVML_DEVICE_ARCH_HOPPER) class NvidiaHelper: @@ -39,6 +62,7 @@ def __init__(self): uuid=nv.nvmlDeviceGetUUID(handle), index=i, handle=handle, + arch=nv.nvmlDeviceGetArchitecture(handle), ) ) @@ -57,6 +81,12 @@ def get_current_vram_usage(self, gpu_index=0) -> Tuple[int, int, int]: return info.total, info.used, info.free + def supports_fp16(self, gpu_index: Union[int, None] = None) -> bool: + if gpu_index is None: + return all(supports_fp16(gpu) for gpu in self.__gpus) + gpu = self.__gpus[gpu_index] + return supports_fp16(gpu) + _cachedNvidiaHelper = None diff --git a/backend/src/packages/chaiNNer_onnx/settings.py b/backend/src/packages/chaiNNer_onnx/settings.py index 1389bb8cf..5e09ad7e7 100644 --- a/backend/src/packages/chaiNNer_onnx/settings.py +++ b/backend/src/packages/chaiNNer_onnx/settings.py @@ -13,8 +13,9 @@ from . import package +nv = get_nvidia_helper() + if not is_arm_mac: - nv = get_nvidia_helper() gpu_list = nv.list_gpus() if nv is not None else [] package.add_setting( @@ -66,12 +67,16 @@ def get_providers(): ) ) + should_fp16 = False + if nv is not None: + should_fp16 = nv.supports_fp16() + package.add_setting( ToggleSetting( label="Use TensorRT FP16 Mode", key="tensorrt_fp16_mode", description="Runs TensorRT in half-precision (FP16) mode for less VRAM usage. RTX GPUs also get a speedup.", - default=False, + default=should_fp16, disabled="TensorrtExecutionProvider" not in execution_providers, ) ) diff --git a/backend/src/packages/chaiNNer_pytorch/settings.py b/backend/src/packages/chaiNNer_pytorch/settings.py index f2895b7b4..78a45f78c 100644 --- a/backend/src/packages/chaiNNer_pytorch/settings.py +++ b/backend/src/packages/chaiNNer_pytorch/settings.py @@ -3,10 +3,13 @@ import torch from api import DropdownSetting, ToggleSetting +from gpu import get_nvidia_helper from system import is_arm_mac from . import package +nv = get_nvidia_helper() + if not is_arm_mac: gpu_list = [] for i in range(torch.cuda.device_count()): @@ -32,6 +35,12 @@ ), ) +should_fp16 = False +if nv is not None: + should_fp16 = nv.supports_fp16() +else: + should_fp16 = is_arm_mac + package.add_setting( ToggleSetting( label="Use FP16 Mode", @@ -41,7 +50,7 @@ if is_arm_mac else "Runs PyTorch in half-precision (FP16) mode for less VRAM usage. RTX GPUs also get a speedup." ), - default=False, + default=should_fp16, ), )