chaiNNer-org · joeyballentine · Sep 13, 2023 · Sep 11, 2023 · Sep 11, 2023 · Sep 11, 2023
diff --git a/backend/src/gpu.py b/backend/src/gpu.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import List, Tuple
+from typing import List, Tuple, Union
 
 import pynvml as nv
 from sanic.log import logger
@@ -22,6 +22,31 @@ class _GPU:
     uuid: str
     index: int
     handle: int
+    arch: int
+
+
+FP16_ARCH_ABILITY_MAP = {
+    nv.NVML_DEVICE_ARCH_KEPLER: False,
+    nv.NVML_DEVICE_ARCH_MAXWELL: False,
+    nv.NVML_DEVICE_ARCH_PASCAL: False,
+    nv.NVML_DEVICE_ARCH_VOLTA: True,
+    nv.NVML_DEVICE_ARCH_TURING: True,
+    nv.NVML_DEVICE_ARCH_AMPERE: True,
+    nv.NVML_DEVICE_ARCH_ADA: True,
+    nv.NVML_DEVICE_ARCH_HOPPER: True,
+    nv.NVML_DEVICE_ARCH_UNKNOWN: False,
+}
+
+
+def can_gpu_fp16(gpu: _GPU):
+    # This generation also contains the GTX 1600 cards, which do not support FP16.
+    if gpu.arch == nv.NVML_DEVICE_ARCH_TURING:
+        # There may be a more robust way to check this, but for now I think this will do.
+        return "RTX" in gpu.name
+    if gpu.arch not in FP16_ARCH_ABILITY_MAP and gpu.arch > nv.NVML_DEVICE_ARCH_HOPPER:
+        # Future proofing. We can be reasonably sure that future architectures will support FP16.
+        return True
+    return FP16_ARCH_ABILITY_MAP[gpu.arch]
 
 
 class NvidiaHelper:
@@ -39,6 +64,7 @@ def __init__(self):
                     uuid=nv.nvmlDeviceGetUUID(handle),
                     index=i,
                     handle=handle,
+                    arch=nv.nvmlDeviceGetArchitecture(handle),
                 )
             )
 
@@ -57,6 +83,12 @@ def get_current_vram_usage(self, gpu_index=0) -> Tuple[int, int, int]:
 
         return info.total, info.used, info.free
 
+    def get_can_fp16(self, gpu_index: Union[int, None] = None) -> bool:
+        if gpu_index is None:
+            return all(can_gpu_fp16(gpu) for gpu in self.__gpus)
+        gpu = self.__gpus[gpu_index]
+        return can_gpu_fp16(gpu)
+
 
 _cachedNvidiaHelper = None
 

diff --git a/backend/src/packages/chaiNNer_onnx/settings.py b/backend/src/packages/chaiNNer_onnx/settings.py
@@ -13,8 +13,9 @@
 
 from . import package
 
+nv = get_nvidia_helper()
+
 if not is_arm_mac:
-    nv = get_nvidia_helper()
     gpu_list = nv.list_gpus() if nv is not None else []
 
     package.add_setting(
@@ -66,12 +67,16 @@ def get_providers():
         )
     )
 
+    should_fp16 = False
+    if nv is not None:
+        should_fp16 = nv.get_can_fp16()
+
     package.add_setting(
         ToggleSetting(
             label="Use TensorRT FP16 Mode",
             key="tensorrt_fp16_mode",
             description="Runs TensorRT in half-precision (FP16) mode for less VRAM usage. RTX GPUs also get a speedup.",
-            default=False,
+            default=should_fp16,
             disabled="TensorrtExecutionProvider" not in execution_providers,
         )
     )

diff --git a/backend/src/packages/chaiNNer_pytorch/settings.py b/backend/src/packages/chaiNNer_pytorch/settings.py
@@ -3,10 +3,13 @@
 import torch
 
 from api import DropdownSetting, ToggleSetting
+from gpu import get_nvidia_helper
 from system import is_arm_mac
 
 from . import package
 
+nv = get_nvidia_helper()
+
 if not is_arm_mac:
     gpu_list = []
     for i in range(torch.cuda.device_count()):
@@ -32,6 +35,10 @@
     ),
 )
 
+should_fp16 = False
+if nv is not None:
+    should_fp16 = nv.get_can_fp16()
+
 package.add_setting(
     ToggleSetting(
         label="Use FP16 Mode",
@@ -41,7 +48,7 @@
             if is_arm_mac
             else "Runs PyTorch in half-precision (FP16) mode for less VRAM usage. RTX GPUs also get a speedup."
         ),
-        default=False,
+        default=should_fp16 or is_arm_mac,
     ),
 )