GPU selector settings for PyTorch and NCNN (#919)

* make nvidia smi return all gpu names * Frontend & Settings for GPU selector * PyTorch GPU selection * NCNN GPU selection * Add index nubmers to GPU labels * use gpu index for vram estimation * fix ncnn missing index * Move settings around
chaiNNer-org · Sep 9, 2022 · 64f75b9 · 64f75b9
1 parent 29f9b67
commit 64f75b9
Show file tree

Hide file tree

Showing 14 changed files with 234 additions and 75 deletions.
diff --git a/backend/src/nodes/ncnn_nodes.py b/backend/src/nodes/ncnn_nodes.py
@@ -18,6 +18,7 @@
 from .utils.ncnn_auto_split import ncnn_auto_split_process
 from .utils.ncnn_model import NcnnModel
 from .utils.utils import get_h_w_c, convenient_upscale
+from .utils.exec_options import get_execution_options
 
 # NCNN Save Model node
 # pylint: disable=unused-import
@@ -71,9 +72,10 @@ def upscale(
         output_name: str,
         tile_mode: int,
     ):
+        exec_options = get_execution_options()
         # Try/except block to catch errors
         try:
-            vkdev = ncnn.get_gpu_device(ncnn.get_default_gpu_index())
+            vkdev = ncnn.get_gpu_device(exec_options.ncnn_gpu_index)
             blob_vkallocator = ncnn.VkBlobAllocator(vkdev)
             staging_vkallocator = ncnn.VkStagingAllocator(vkdev)
             output, _ = ncnn_auto_split_process(
@@ -97,13 +99,15 @@ def upscale(
             raise RuntimeError("An unexpected error occurred during NCNN processing.")
 
     def run(self, model: NcnnModel, img: np.ndarray, tile_mode: int) -> np.ndarray:
+        exec_options = get_execution_options()
+
         model_c = model.get_model_in_nc()
 
         net = ncnn.Net()
 
         # Use vulkan compute
         net.opt.use_vulkan_compute = True
-        net.set_vulkan_device(ncnn.get_default_gpu_index())
+        net.set_vulkan_device(exec_options.ncnn_gpu_index)
 
         # Load model param and bin
         net.load_param_mem(model.write_param())

diff --git a/backend/src/nodes/pytorch_nodes.py b/backend/src/nodes/pytorch_nodes.py
@@ -32,8 +32,12 @@
 
 def to_pytorch_execution_options(options: ExecutionOptions):
     return ExecutionOptions(
-        "cuda" if torch.cuda.is_available() and options.device != "cpu" else "cpu",
-        options.fp16,
+        device="cuda"
+        if torch.cuda.is_available() and options.device != "cpu"
+        else "cpu",
+        fp16=options.fp16,
+        pytorch_gpu_index=options.pytorch_gpu_index,
+        ncnn_gpu_index=options.ncnn_gpu_index,
     )
 
 
@@ -135,9 +139,9 @@ def upscale(
             logger.debug("Upscaling image")
 
             split_estimation = 1
-            if options.device == "cuda":
+            if "cuda" in options.device:
                 GB_AMT = 1024**3
-                free, total = torch.cuda.mem_get_info(0)  # type: ignore
+                free, total = torch.cuda.mem_get_info(options.pytorch_gpu_index)  # type: ignore
                 img_bytes = img_tensor.numel() * img_tensor.element_size()
                 model_bytes = sum(
                     p.numel() * (p.element_size() / (2 if should_use_fp16 else 1))
@@ -167,7 +171,7 @@ def upscale(
                 scale,
                 max_depth=tile_mode if tile_mode > 0 else split_estimation,
             )
-            if options.device == "cuda":
+            if "cuda" in options.device:
                 logger.info(f"Actual Split depth: {depth}")
             del img_tensor
             logger.debug("Converting tensor to image")
@@ -434,16 +438,14 @@ def run(self, model: torch.nn.Module) -> bytes:
         exec_options = to_pytorch_execution_options(get_execution_options())
 
         model = model.eval()
-        if exec_options.device == "cuda":
-            model = model.cuda()
+        model = model.to(torch.device(exec_options.device))
         # https://github.com/onnx/onnx/issues/654
         dynamic_axes = {
             "data": {0: "batch_size", 2: "width", 3: "height"},
             "output": {0: "batch_size", 2: "width", 3: "height"},
         }
         dummy_input = torch.rand(1, model.in_nc, 64, 64)  # type: ignore
-        if exec_options.device == "cuda":
-            dummy_input = dummy_input.cuda()
+        dummy_input = dummy_input.to(torch.device(exec_options.device))
 
         with BytesIO() as f:
             torch.onnx.export(

diff --git a/backend/src/nodes/utils/exec_options.py b/backend/src/nodes/utils/exec_options.py
@@ -1,25 +1,39 @@
-from typing import Literal
 from sanic.log import logger
 
 
-DeviceType = Literal["cpu", "cuda"]
-
-
 class ExecutionOptions:
-    def __init__(self, device: DeviceType, fp16: bool) -> None:
-        self.__device: DeviceType = device
+    def __init__(
+        self,
+        device: str,
+        fp16: bool,
+        pytorch_gpu_index: int,
+        ncnn_gpu_index: int,
+    ) -> None:
+        self.__device = device
         self.__fp16 = fp16
+        self.__pytorch_gpu_index = pytorch_gpu_index
+        self.__ncnn_gpu_index = ncnn_gpu_index
 
     @property
-    def device(self) -> DeviceType:
+    def device(self) -> str:
+        if self.__device == "cuda":
+            return f"cuda:{self.__pytorch_gpu_index}"
         return self.__device
 
     @property
     def fp16(self):
         return self.__fp16
 
+    @property
+    def pytorch_gpu_index(self):
+        return self.__pytorch_gpu_index
+
+    @property
+    def ncnn_gpu_index(self):
+        return self.__ncnn_gpu_index
+
 
-__global_exec_options = ExecutionOptions("cpu", False)
+__global_exec_options = ExecutionOptions("cpu", False, 0, 0)
 
 
 def get_execution_options() -> ExecutionOptions:

diff --git a/backend/src/nodes/utils/pytorch_auto_split.py b/backend/src/nodes/utils/pytorch_auto_split.py
@@ -61,9 +61,9 @@ def auto_split_process(
                 exec_options.fp16 and model.supports_fp16
             )  # TODO: use bfloat16 if RTX
             # cpu does not support autocast
-            if exec_options.device == "cuda":
+            if "cuda" in exec_options.device:
                 with torch.autocast(  # type: ignore
-                    device_type=exec_options.device,
+                    device_type=device.type,
                     dtype=torch.float16 if should_use_fp16 else torch.float32,
                 ):
                     result = model(d_img)

diff --git a/backend/src/run.py b/backend/src/run.py
@@ -169,6 +169,8 @@ class RunRequest(TypedDict):
     data: Dict[str, UsableData]
     isCpu: bool
     isFp16: bool
+    pytorchGPU: int
+    ncnnGPU: int
 
 
 @app.route("/run", methods=["POST"])
@@ -194,6 +196,8 @@ async def run(request: Request):
             exec_opts = ExecutionOptions(
                 device="cpu" if full_data["isCpu"] else "cuda",
                 fp16=full_data["isFp16"],
+                pytorch_gpu_index=full_data["pytorchGPU"],
+                ncnn_gpu_index=full_data["ncnnGPU"],
             )
             set_execution_options(exec_opts)
             logger.info(f"Using device: {exec_opts.device}")
@@ -242,6 +246,8 @@ class RunIndividualRequest(TypedDict):
     inputs: List[Any]
     isCpu: bool
     isFp16: bool
+    pytorchGPU: int
+    ncnnGPU: int
     schemaId: str
 
 
@@ -257,6 +263,8 @@ async def run_individual(request: Request):
         exec_opts = ExecutionOptions(
             device="cpu" if full_data["isCpu"] else "cuda",
             fp16=full_data["isFp16"],
+            pytorch_gpu_index=full_data["pytorchGPU"],
+            ncnn_gpu_index=full_data["ncnnGPU"],
         )
         set_execution_options(exec_opts)
         logger.info(f"Using device: {exec_opts.device}")

diff --git a/src/common/Backend.ts b/src/common/Backend.ts
@@ -28,12 +28,16 @@ export interface BackendRunRequest {
     data: Record<string, UsableData>;
     isCpu: boolean;
     isFp16: boolean;
+    pytorchGPU: number;
+    ncnnGPU: number;
 }
 export interface BackendRunIndividualRequest {
     id: string;
     inputs: (InputValue | null)[];
     isCpu: boolean;
     isFp16: boolean;
+    pytorchGPU: number;
+    ncnnGPU: number;
     schemaId: SchemaId;
 }
 

diff --git a/src/common/safeIpc.ts b/src/common/safeIpc.ts
@@ -21,6 +21,7 @@ type SendChannelInfo<Args extends unknown[] = []> = ChannelInfo<void, Args>;
 
 export interface InvokeChannels {
     'get-nvidia-gpu-name': ChannelInfo<string | null>;
+    'get-nvidia-gpus': ChannelInfo<string[] | null>;
     'get-gpu-info': ChannelInfo<Systeminformation.GraphicsData>;
     'get-python': ChannelInfo<PythonInfo>;
     'get-port': ChannelInfo<number>;

diff --git a/src/main/main.ts b/src/main/main.ts
@@ -21,7 +21,7 @@ import { checkFileExists, lazy } from '../common/util';
 import { getArguments } from './arguments';
 import { registerDiscordRPC, toggleDiscordRPC, updateDiscordRPC } from './discordRPC';
 import { setMainMenu } from './menu';
-import { createNvidiaSmiVRamChecker, getNvidiaGpuName, getNvidiaSmi } from './nvidiaSmi';
+import { createNvidiaSmiVRamChecker, getNvidiaGpuNames, getNvidiaSmi } from './nvidiaSmi';
 import { downloadPython, extractPython } from './setupIntegratedPython';
 import { getGpuInfo } from './systemInfo';
 import { hasUpdate } from './update';
@@ -366,12 +366,14 @@ const checkPythonDeps = async (splashWindow: BrowserWindowWithSafeIpc) => {
 const checkNvidiaSmi = async () => {
     const registerEmptyGpuEvents = () => {
         ipcMain.handle('get-nvidia-gpu-name', () => null);
+        ipcMain.handle('get-nvidia-gpus', () => null);
         ipcMain.handle('get-vram-usage', () => null);
     };
 
     const registerNvidiaSmiEvents = async (nvidiaSmi: string) => {
-        const nvidiaGpu = await getNvidiaGpuName(nvidiaSmi);
-        ipcMain.handle('get-nvidia-gpu-name', () => nvidiaGpu.trim());
+        const nvidiaGpus = await getNvidiaGpuNames(nvidiaSmi);
+        ipcMain.handle('get-nvidia-gpu-name', () => nvidiaGpus[0].trim());
+        ipcMain.handle('get-nvidia-gpus', () => nvidiaGpus.map((gpu) => gpu.trim()));
 
         let vramChecker: ChildProcessWithoutNullStreams | undefined;
         let lastVRam: number | null = null;

diff --git a/src/main/nvidiaSmi.ts b/src/main/nvidiaSmi.ts
@@ -85,15 +85,15 @@ export const getNvidiaSmi = lazy(async (): Promise<string | undefined> => {
     }
 });
 
-export const getNvidiaGpuName = async (nvidiaSmi: string): Promise<string> => {
-    const [nvidiaGpu] = (
+export const getNvidiaGpuNames = async (nvidiaSmi: string): Promise<string[]> => {
+    const nvidiaGpus = (
         await exec(
             `"${nvidiaSmi}" --query-gpu=name --format=csv,noheader,nounits ${
                 process.platform === 'linux' ? '  2>/dev/null' : ''
             }`
         )
     ).stdout.split('\n');
-    return nvidiaGpu;
+    return nvidiaGpus.slice(0, nvidiaGpus.length - 1);
 };
 
 export const createNvidiaSmiVRamChecker = (

diff --git a/src/renderer/components/CustomIcons.tsx b/src/renderer/components/CustomIcons.tsx
@@ -94,7 +94,7 @@ export const IconFactory = memo(
         boxSize = 4,
     }: {
         icon?: string;
-        accentColor: string;
+        accentColor?: string;
         boxSize?: number;
     }) => {
         const unknownIcon = (