Set the amount of memory to use for upscaling

CPU upscaling & upscaling on Apple Silicon (CPU & GPU) A value between 20% and 80% of the freely available memory can be chosen for upscaling. If desired, instead of the freely available memory, these values can be applied to the entire available RAM. If a user chooses to do so, a warning will be presented and when upscaling the settings and amount of used RAM will be logged. For GPU upscaling the amount of freely available can be set. This setting is only available on Windows and Linux. MaxTileSize for NCNN on Apple Silicon has been added.
chaiNNer-org · Aug 18, 2023 · 3ab5f42 · 3ab5f42
1 parent 8f90283
commit 3ab5f42
Show file tree

Hide file tree

Showing 11 changed files with 377 additions and 41 deletions.
diff --git a/backend/src/nodes/impl/pytorch/utils.py b/backend/src/nodes/impl/pytorch/utils.py
@@ -37,6 +37,9 @@ def to_pytorch_execution_options(options: ExecutionOptions):
         onnx_should_tensorrt_cache=options.onnx_should_tensorrt_cache,
         onnx_tensorrt_cache_path=options.onnx_tensorrt_cache_path,
         onnx_should_tensorrt_fp16=options.onnx_should_tensorrt_fp16,
+        memory_for_upscaling=options.memory_for_upscaling,
+        is_system_memory=options.is_system_memory,
+        memory_for_upscaling_gpu=options.memory_for_upscaling_gpu,
     )
 
 

diff --git a/backend/src/nodes/impl/upscale/auto_split_tiles.py b/backend/src/nodes/impl/upscale/auto_split_tiles.py
@@ -25,7 +25,7 @@ def estimate_tile_size(
     required_mem = f"{mem_required_estimation/GB_AMT:.2f}"
     budget_mem = f"{budget/GB_AMT:.2f}"
     logger.info(
-        f"Estimating memory required: {required_mem} GB, {budget_mem} GB free."
+        f"Estimating memory required: {required_mem} GB, {budget_mem} GB available."
         f" Estimated tile size: {tile_size}"
     )
 

diff --git a/backend/src/nodes/utils/exec_options.py b/backend/src/nodes/utils/exec_options.py
@@ -3,6 +3,8 @@
 
 from sanic.log import logger
 
+from system import is_arm_mac
+
 
 class ExecutionOptions:
     def __init__(
@@ -16,6 +18,9 @@ def __init__(
         onnx_should_tensorrt_cache: bool,
         onnx_tensorrt_cache_path: str,
         onnx_should_tensorrt_fp16: bool,
+        memory_for_upscaling: float,
+        is_system_memory: bool,
+        memory_for_upscaling_gpu: float,
     ) -> None:
         self.__device = device
         self.__fp16 = fp16
@@ -26,6 +31,9 @@ def __init__(
         self.__onnx_should_tensorrt_cache = onnx_should_tensorrt_cache
         self.__onnx_tensorrt_cache_path = onnx_tensorrt_cache_path
         self.__onnx_should_tensorrt_fp16 = onnx_should_tensorrt_fp16
+        self.__memory_for_upscaling = memory_for_upscaling
+        self.__is_system_memory = is_system_memory
+        self.__memory_for_upscaling_gpu = memory_for_upscaling_gpu
 
         if (
             not os.path.exists(onnx_tensorrt_cache_path)
@@ -34,7 +42,15 @@ def __init__(
             os.makedirs(onnx_tensorrt_cache_path)
 
         logger.debug(
-            f"PyTorch execution options: fp16: {fp16}, device: {self.full_device} | NCNN execution options: gpu_index: {ncnn_gpu_index} | ONNX execution options: gpu_index: {onnx_gpu_index}, execution_provider: {onnx_execution_provider}, should_tensorrt_cache: {onnx_should_tensorrt_cache}, tensorrt_cache_path: {onnx_tensorrt_cache_path}, should_tensorrt_fp16: {onnx_should_tensorrt_fp16}"
+            f"PyTorch execution options: fp16: {fp16}, device: {self.full_device} |"
+            f" NCNN execution options: gpu_index: {ncnn_gpu_index} | ONNX execution"
+            f" options: gpu_index: {onnx_gpu_index}, execution_provider:"
+            f" {onnx_execution_provider}, should_tensorrt_cache:"
+            f" {onnx_should_tensorrt_cache}, tensorrt_cache_path:"
+            f" {onnx_tensorrt_cache_path}, should_tensorrt_fp16:"
+            f" {onnx_should_tensorrt_fp16}, memory_for_upscaling:"
+            f" {memory_for_upscaling}, is_system_memory {is_system_memory},"
+            f" memory_for_upscaling_gpu {memory_for_upscaling_gpu}"
         )
 
     @property
@@ -75,9 +91,34 @@ def onnx_tensorrt_cache_path(self):
     def onnx_should_tensorrt_fp16(self):
         return self.__onnx_should_tensorrt_fp16
 
+    @property
+    def memory_for_upscaling(self):
+        return self.__memory_for_upscaling
+
+    @property
+    def is_system_memory(self):
+        return self.__is_system_memory
+
+    @property
+    def memory_for_upscaling_gpu(self):
+        return self.__memory_for_upscaling_gpu
+
+
+defaultRamValue = 0.8 if is_arm_mac else 0.5
 
 __global_exec_options = ExecutionOptions(
-    "cpu", False, 0, 0, 0, "CPUExecutionProvider", False, "", False
+    "cpu",
+    False,
+    0,
+    0,
+    0,
+    "CPUExecutionProvider",
+    False,
+    "",
+    False,
+    defaultRamValue,
+    False,
+    0.8,
 )
 
 
@@ -102,6 +143,9 @@ class JsonExecutionOptions(TypedDict):
     onnxShouldTensorRtCache: bool
     onnxTensorRtCachePath: str
     onnxShouldTensorRtFp16: bool
+    memoryForUpscaling: float
+    isSystemMemory: bool
+    memoryForUpscalingGPU: float
 
 
 def parse_execution_options(json: JsonExecutionOptions) -> ExecutionOptions:
@@ -115,4 +159,7 @@ def parse_execution_options(json: JsonExecutionOptions) -> ExecutionOptions:
         onnx_should_tensorrt_cache=json["onnxShouldTensorRtCache"],
         onnx_tensorrt_cache_path=json["onnxTensorRtCachePath"],
         onnx_should_tensorrt_fp16=json["onnxShouldTensorRtFp16"],
+        memory_for_upscaling=json["memoryForUpscaling"],
+        is_system_memory=json["isSystemMemory"],
+        memory_for_upscaling_gpu=json["memoryForUpscalingGPU"],
     )
diff --git a/backend/src/packages/chaiNNer_ncnn/ncnn/processing/upscale_image.py b/backend/src/packages/chaiNNer_ncnn/ncnn/processing/upscale_image.py
@@ -4,6 +4,7 @@
 
 import cv2
 import numpy as np
+import psutil
 
 try:
     from ncnn_vulkan import ncnn
@@ -29,7 +30,7 @@
 from nodes.properties.outputs import ImageOutput
 from nodes.utils.exec_options import get_execution_options
 from nodes.utils.utils import get_h_w_c
-from system import is_mac
+from system import is_arm_mac, is_mac
 
 from .. import processing_group
 
@@ -74,13 +75,39 @@ def upscale_impl(
             vkdev = ncnn.get_gpu_device(exec_options.ncnn_gpu_index)
 
             def estimate_gpu():
-                if is_mac:
+                if is_arm_mac:
+                    memory_for_upscaling = exec_options.memory_for_upscaling
+
+                    if exec_options.is_system_memory:
+                        available_memory = psutil.virtual_memory().total
+
+                        logger.info(
+                            f"Memory limit set to {memory_for_upscaling * 100}% of"
+                            " total system memory."
+                            f" ({available_memory / (1024 ** 3)} GB)"
+                        )
+                    else:
+                        available_memory = psutil.virtual_memory().available
+
+                elif is_mac:
                     # the actual estimate frequently crashes on mac, so we just use 256
                     return MaxTileSize(256)
+                else:
+                    available_memory = vkdev.get_heap_budget() * 1024 * 1024
+                    memory_for_upscaling = exec_options.memory_for_upscaling_gpu
+
+                budget = int(
+                    max(
+                        available_memory * 0.2,
+                        min(
+                            available_memory * memory_for_upscaling,
+                            available_memory * 0.8,
+                        ),
+                    )
+                )
 
-                heap_budget = vkdev.get_heap_budget() * 1024 * 1024 * 0.8
                 return MaxTileSize(
-                    estimate_tile_size(heap_budget, model.model.bin_length, img, 4)
+                    estimate_tile_size(budget, model.model.bin_length, img, 4)
                 )
 
             with ncnn_allocators(vkdev) as (
@@ -124,19 +151,28 @@ def estimate_cpu():
 @processing_group.register(
     schema_id="chainner:ncnn:upscale_image",
     name="Upscale Image",
-    description="Upscale an image with NCNN. Unlike PyTorch, NCNN has GPU support on all devices, assuming your drivers support Vulkan. \
-            Select a manual number of tiles if you are having issues with the automatic mode.",
+    description=(
+        "Upscale an image with NCNN. Unlike PyTorch, NCNN has GPU support on all"
+        " devices, assuming your drivers support Vulkan. Select a manual"
+        " number of tiles if you are having issues with the automatic mode."
+    ),
     icon="NCNN",
     inputs=[
         ImageInput().with_id(1),
         NcnnModelInput().with_id(0),
         TileSizeDropdown()
         .with_id(2)
         .with_docs(
-            "Tiled upscaling is used to allow large images to be upscaled without hitting memory limits.",
-            "This works by splitting the image into tiles (with overlap), upscaling each tile individually, and seamlessly recombining them.",
-            "Generally it's recommended to use the largest tile size possible for best performance (with the ideal scenario being no tiling at all), but depending on the model and image size, this may not be possible.",
-            "If you are having issues with the automatic mode, you can manually select a tile size. On certain machines, a very small tile size such as 256 or 128 might be required for it to work at all.",
+            "Tiled upscaling is used to allow large images to be upscaled without"
+            " hitting memory limits.",
+            "This works by splitting the image into tiles (with overlap), upscaling"
+            " each tile individually, and seamlessly recombining them.",
+            "Generally it's recommended to use the largest tile size possible for best"
+            " performance (with the ideal scenario being no tiling at all), but"
+            " depending on the model and image size, this may not be possible.",
+            "If you are having issues with the automatic mode, you can manually select"
+            " a tile size. On certain machines, a very small tile size such as 256 or"
+            " 128 might be required for it to work at all.",
         ),
     ],
     outputs=[

diff --git a/backend/src/packages/chaiNNer_pytorch/pytorch/processing/upscale_image.py b/backend/src/packages/chaiNNer_pytorch/pytorch/processing/upscale_image.py
@@ -3,6 +3,7 @@
 from typing import Tuple
 
 import numpy as np
+import psutil
 import torch
 from sanic.log import logger
 
@@ -41,22 +42,44 @@ def upscale(
         device = torch.device(options.full_device)
 
         def estimate():
+            element_size = 2 if use_fp16 else 4
+            model_bytes = sum(p.numel() * element_size for p in model.parameters())
+
             if "cuda" in options.full_device:
                 mem_info: Tuple[int, int] = torch.cuda.mem_get_info(device)  # type: ignore
-                free, _total = mem_info
-                element_size = 2 if use_fp16 else 4
-                model_bytes = sum(p.numel() * element_size for p in model.parameters())
-                budget = int(free * 0.8)
-
-                return MaxTileSize(
-                    estimate_tile_size(
-                        budget,
-                        model_bytes,
-                        img,
-                        element_size,
+                available_memory, _total = mem_info
+                memory_for_upscaling = options.memory_for_upscaling_gpu
+            else:
+                memory_for_upscaling = options.memory_for_upscaling
+
+                if options.is_system_memory:
+                    available_memory = psutil.virtual_memory().total
+
+                    logger.info(
+                        f"Memory limit set to {memory_for_upscaling * 100}% of total"
+                        f" system memory. ({available_memory / (1024 ** 3)} GB)"
                     )
+                else:
+                    available_memory = psutil.virtual_memory().available
+
+            budget = int(
+                max(
+                    available_memory * 0.2,
+                    min(
+                        available_memory * memory_for_upscaling,
+                        available_memory * 0.8,
+                    ),
                 )
-            return MaxTileSize()
+            )
+
+            return MaxTileSize(
+                estimate_tile_size(
+                    budget,
+                    model_bytes,
+                    img,
+                    element_size,
+                )
+            )
 
         # Disable tiling for SCUNet
         upscale_tile_size = tile_size
@@ -78,8 +101,10 @@ def estimate():
 @processing_group.register(
     schema_id="chainner:pytorch:upscale_image",
     name="Upscale Image",
-    description="Upscales an image using a PyTorch Super-Resolution model. \
-            Select a manual number of tiles if you are having issues with the automatic mode. ",
+    description=(
+        "Upscales an image using a PyTorch Super-Resolution model. Select a manual"
+        " number of tiles if you are having issues with the automatic mode."
+    ),
     icon="PyTorch",
     inputs=[
         ImageInput().with_id(1),
@@ -92,10 +117,16 @@ def estimate():
             TileSizeDropdown()
             .with_id(2)
             .with_docs(
-                "Tiled upscaling is used to allow large images to be upscaled without hitting memory limits.",
-                "This works by splitting the image into tiles (with overlap), upscaling each tile individually, and seamlessly recombining them.",
-                "Generally it's recommended to use the largest tile size possible for best performance (with the ideal scenario being no tiling at all), but depending on the model and image size, this may not be possible.",
-                "If you are having issues with the automatic mode, you can manually select a tile size. Sometimes, a manually selected tile size may be faster than what the automatic mode picks.",
+                "Tiled upscaling is used to allow large images to be upscaled without"
+                " hitting memory limits.",
+                "This works by splitting the image into tiles (with overlap), upscaling"
+                " each tile individually, and seamlessly recombining them.",
+                "Generally it's recommended to use the largest tile size possible for"
+                " best performance (with the ideal scenario being no tiling at all),"
+                " but depending on the model and image size, this may not be possible.",
+                "If you are having issues with the automatic mode, you can manually"
+                " select a tile size. Sometimes, a manually selected tile size may be"
+                " faster than what the automatic mode picks.",
                 hint=True,
             )
         ),
@@ -124,7 +155,8 @@ def upscale_image_node(
     scale = model.scale
     h, w, c = get_h_w_c(img)
     logger.debug(
-        f"Upscaling a {h}x{w}x{c} image with a {scale}x model (in_nc: {in_nc}, out_nc: {out_nc})"
+        f"Upscaling a {h}x{w}x{c} image with a {scale}x model (in_nc: {in_nc}, out_nc:"
+        f" {out_nc})"
     )
 
     return convenient_upscale(

diff --git a/src/common/Backend.ts b/src/common/Backend.ts
@@ -77,6 +77,9 @@ export interface BackendExecutionOptions {
     onnxShouldTensorRtCache: boolean;
     onnxTensorRtCachePath: string;
     onnxShouldTensorRtFp16: boolean;
+    memoryForUpscaling: number;
+    isSystemMemory: boolean;
+    memoryForUpscalingGPU: number;
 }
 export interface BackendRunRequest {
     data: BackendJsonNode[];

diff --git a/src/common/env.ts b/src/common/env.ts
@@ -31,3 +31,5 @@ export const sanitizedEnv = env;
 export const getOnnxTensorRtCacheLocation = (userDataPath: string) => {
     return path.join(userDataPath, 'onnx-tensorrt-cache');
 };
+
+export const totalMemory = os.totalmem();
diff --git a/src/main/cli/run.ts b/src/main/cli/run.ts
@@ -7,7 +7,7 @@ import {
     getBackend,
 } from '../../common/Backend';
 import { EdgeData, NodeData, NodeSchema, SchemaId } from '../../common/common-types';
-import { getOnnxTensorRtCacheLocation } from '../../common/env';
+import { getOnnxTensorRtCacheLocation, isArmMac } from '../../common/env';
 import { formatExecutionErrorMessage } from '../../common/formatExecutionErrorMessage';
 import { applyOverrides, readOverrideFile } from '../../common/input-override';
 import { log } from '../../common/log';
@@ -121,6 +121,8 @@ const connectToBackend = async (backendProcess: BackendProcess): Promise<ReadyBa
     return { backend, schemata, functionDefinitions, eventSource };
 };
 
+const defaultRamValue = isArmMac ? 0.8 : 0.5;
+
 const getExecutionOptions = (): BackendExecutionOptions => {
     const getSetting = <T>(key: string, defaultValue: T): T => {
         const value = settingStorage.getItem(key);
@@ -138,6 +140,9 @@ const getExecutionOptions = (): BackendExecutionOptions => {
         onnxShouldTensorRtCache: getSetting('onnx-should-tensorrt-cache', false),
         onnxTensorRtCachePath: getOnnxTensorRtCacheLocation(app.getPath('userData')),
         onnxShouldTensorRtFp16: getSetting('onnx-should-tensorrt-fp16', false),
+        memoryForUpscaling: getSetting('memory-for-upscaling', defaultRamValue),
+        isSystemMemory: getSetting('use-system-memory', false),
+        memoryForUpscalingGPU: getSetting('memory-for-upscaling-gpu', 0.8),
     };
 };