Skip to content

Commit

Permalink
Set the amount of memory to use for upscaling
Browse files Browse the repository at this point in the history
CPU upscaling & upscaling on Apple Silicon (CPU & GPU)

A value between 20% and 80% of the freely available memory can be chosen for upscaling. If desired, instead of the freely available memory, these values can be applied to the entire available RAM. If a user chooses to do so, a warning will be presented and when upscaling the  settings and amount of used RAM will be logged.

For GPU upscaling the amount of freely available can be set. This setting is only available on Windows and Linux.

MaxTileSize for NCNN on Apple Silicon has been added.
  • Loading branch information
stonerl committed Aug 18, 2023
1 parent 8f90283 commit 3ab5f42
Show file tree
Hide file tree
Showing 11 changed files with 377 additions and 41 deletions.
3 changes: 3 additions & 0 deletions backend/src/nodes/impl/pytorch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ def to_pytorch_execution_options(options: ExecutionOptions):
onnx_should_tensorrt_cache=options.onnx_should_tensorrt_cache,
onnx_tensorrt_cache_path=options.onnx_tensorrt_cache_path,
onnx_should_tensorrt_fp16=options.onnx_should_tensorrt_fp16,
memory_for_upscaling=options.memory_for_upscaling,
is_system_memory=options.is_system_memory,
memory_for_upscaling_gpu=options.memory_for_upscaling_gpu,
)


Expand Down
2 changes: 1 addition & 1 deletion backend/src/nodes/impl/upscale/auto_split_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def estimate_tile_size(
required_mem = f"{mem_required_estimation/GB_AMT:.2f}"
budget_mem = f"{budget/GB_AMT:.2f}"
logger.info(
f"Estimating memory required: {required_mem} GB, {budget_mem} GB free."
f"Estimating memory required: {required_mem} GB, {budget_mem} GB available."
f" Estimated tile size: {tile_size}"
)

Expand Down
51 changes: 49 additions & 2 deletions backend/src/nodes/utils/exec_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from sanic.log import logger

from system import is_arm_mac


class ExecutionOptions:
def __init__(
Expand All @@ -16,6 +18,9 @@ def __init__(
onnx_should_tensorrt_cache: bool,
onnx_tensorrt_cache_path: str,
onnx_should_tensorrt_fp16: bool,
memory_for_upscaling: float,
is_system_memory: bool,
memory_for_upscaling_gpu: float,
) -> None:
self.__device = device
self.__fp16 = fp16
Expand All @@ -26,6 +31,9 @@ def __init__(
self.__onnx_should_tensorrt_cache = onnx_should_tensorrt_cache
self.__onnx_tensorrt_cache_path = onnx_tensorrt_cache_path
self.__onnx_should_tensorrt_fp16 = onnx_should_tensorrt_fp16
self.__memory_for_upscaling = memory_for_upscaling
self.__is_system_memory = is_system_memory
self.__memory_for_upscaling_gpu = memory_for_upscaling_gpu

if (
not os.path.exists(onnx_tensorrt_cache_path)
Expand All @@ -34,7 +42,15 @@ def __init__(
os.makedirs(onnx_tensorrt_cache_path)

logger.debug(
f"PyTorch execution options: fp16: {fp16}, device: {self.full_device} | NCNN execution options: gpu_index: {ncnn_gpu_index} | ONNX execution options: gpu_index: {onnx_gpu_index}, execution_provider: {onnx_execution_provider}, should_tensorrt_cache: {onnx_should_tensorrt_cache}, tensorrt_cache_path: {onnx_tensorrt_cache_path}, should_tensorrt_fp16: {onnx_should_tensorrt_fp16}"
f"PyTorch execution options: fp16: {fp16}, device: {self.full_device} |"
f" NCNN execution options: gpu_index: {ncnn_gpu_index} | ONNX execution"
f" options: gpu_index: {onnx_gpu_index}, execution_provider:"
f" {onnx_execution_provider}, should_tensorrt_cache:"
f" {onnx_should_tensorrt_cache}, tensorrt_cache_path:"
f" {onnx_tensorrt_cache_path}, should_tensorrt_fp16:"
f" {onnx_should_tensorrt_fp16}, memory_for_upscaling:"
f" {memory_for_upscaling}, is_system_memory {is_system_memory},"
f" memory_for_upscaling_gpu {memory_for_upscaling_gpu}"
)

@property
Expand Down Expand Up @@ -75,9 +91,34 @@ def onnx_tensorrt_cache_path(self):
def onnx_should_tensorrt_fp16(self):
return self.__onnx_should_tensorrt_fp16

@property
def memory_for_upscaling(self):
return self.__memory_for_upscaling

@property
def is_system_memory(self):
return self.__is_system_memory

@property
def memory_for_upscaling_gpu(self):
return self.__memory_for_upscaling_gpu


defaultRamValue = 0.8 if is_arm_mac else 0.5

__global_exec_options = ExecutionOptions(
"cpu", False, 0, 0, 0, "CPUExecutionProvider", False, "", False
"cpu",
False,
0,
0,
0,
"CPUExecutionProvider",
False,
"",
False,
defaultRamValue,
False,
0.8,
)


Expand All @@ -102,6 +143,9 @@ class JsonExecutionOptions(TypedDict):
onnxShouldTensorRtCache: bool
onnxTensorRtCachePath: str
onnxShouldTensorRtFp16: bool
memoryForUpscaling: float
isSystemMemory: bool
memoryForUpscalingGPU: float


def parse_execution_options(json: JsonExecutionOptions) -> ExecutionOptions:
Expand All @@ -115,4 +159,7 @@ def parse_execution_options(json: JsonExecutionOptions) -> ExecutionOptions:
onnx_should_tensorrt_cache=json["onnxShouldTensorRtCache"],
onnx_tensorrt_cache_path=json["onnxTensorRtCachePath"],
onnx_should_tensorrt_fp16=json["onnxShouldTensorRtFp16"],
memory_for_upscaling=json["memoryForUpscaling"],
is_system_memory=json["isSystemMemory"],
memory_for_upscaling_gpu=json["memoryForUpscalingGPU"],
)
56 changes: 46 additions & 10 deletions backend/src/packages/chaiNNer_ncnn/ncnn/processing/upscale_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import cv2
import numpy as np
import psutil

try:
from ncnn_vulkan import ncnn
Expand All @@ -29,7 +30,7 @@
from nodes.properties.outputs import ImageOutput
from nodes.utils.exec_options import get_execution_options
from nodes.utils.utils import get_h_w_c
from system import is_mac
from system import is_arm_mac, is_mac

from .. import processing_group

Expand Down Expand Up @@ -74,13 +75,39 @@ def upscale_impl(
vkdev = ncnn.get_gpu_device(exec_options.ncnn_gpu_index)

def estimate_gpu():
if is_mac:
if is_arm_mac:
memory_for_upscaling = exec_options.memory_for_upscaling

if exec_options.is_system_memory:
available_memory = psutil.virtual_memory().total

logger.info(
f"Memory limit set to {memory_for_upscaling * 100}% of"
" total system memory."
f" ({available_memory / (1024 ** 3)} GB)"
)
else:
available_memory = psutil.virtual_memory().available

elif is_mac:
# the actual estimate frequently crashes on mac, so we just use 256
return MaxTileSize(256)
else:
available_memory = vkdev.get_heap_budget() * 1024 * 1024
memory_for_upscaling = exec_options.memory_for_upscaling_gpu

budget = int(
max(
available_memory * 0.2,
min(
available_memory * memory_for_upscaling,
available_memory * 0.8,
),
)
)

heap_budget = vkdev.get_heap_budget() * 1024 * 1024 * 0.8
return MaxTileSize(
estimate_tile_size(heap_budget, model.model.bin_length, img, 4)
estimate_tile_size(budget, model.model.bin_length, img, 4)
)

with ncnn_allocators(vkdev) as (
Expand Down Expand Up @@ -124,19 +151,28 @@ def estimate_cpu():
@processing_group.register(
schema_id="chainner:ncnn:upscale_image",
name="Upscale Image",
description="Upscale an image with NCNN. Unlike PyTorch, NCNN has GPU support on all devices, assuming your drivers support Vulkan. \
Select a manual number of tiles if you are having issues with the automatic mode.",
description=(
"Upscale an image with NCNN. Unlike PyTorch, NCNN has GPU support on all"
" devices, assuming your drivers support Vulkan. Select a manual"
" number of tiles if you are having issues with the automatic mode."
),
icon="NCNN",
inputs=[
ImageInput().with_id(1),
NcnnModelInput().with_id(0),
TileSizeDropdown()
.with_id(2)
.with_docs(
"Tiled upscaling is used to allow large images to be upscaled without hitting memory limits.",
"This works by splitting the image into tiles (with overlap), upscaling each tile individually, and seamlessly recombining them.",
"Generally it's recommended to use the largest tile size possible for best performance (with the ideal scenario being no tiling at all), but depending on the model and image size, this may not be possible.",
"If you are having issues with the automatic mode, you can manually select a tile size. On certain machines, a very small tile size such as 256 or 128 might be required for it to work at all.",
"Tiled upscaling is used to allow large images to be upscaled without"
" hitting memory limits.",
"This works by splitting the image into tiles (with overlap), upscaling"
" each tile individually, and seamlessly recombining them.",
"Generally it's recommended to use the largest tile size possible for best"
" performance (with the ideal scenario being no tiling at all), but"
" depending on the model and image size, this may not be possible.",
"If you are having issues with the automatic mode, you can manually select"
" a tile size. On certain machines, a very small tile size such as 256 or"
" 128 might be required for it to work at all.",
),
],
outputs=[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Tuple

import numpy as np
import psutil
import torch
from sanic.log import logger

Expand Down Expand Up @@ -41,22 +42,44 @@ def upscale(
device = torch.device(options.full_device)

def estimate():
element_size = 2 if use_fp16 else 4
model_bytes = sum(p.numel() * element_size for p in model.parameters())

if "cuda" in options.full_device:
mem_info: Tuple[int, int] = torch.cuda.mem_get_info(device) # type: ignore
free, _total = mem_info
element_size = 2 if use_fp16 else 4
model_bytes = sum(p.numel() * element_size for p in model.parameters())
budget = int(free * 0.8)

return MaxTileSize(
estimate_tile_size(
budget,
model_bytes,
img,
element_size,
available_memory, _total = mem_info
memory_for_upscaling = options.memory_for_upscaling_gpu
else:
memory_for_upscaling = options.memory_for_upscaling

if options.is_system_memory:
available_memory = psutil.virtual_memory().total

logger.info(
f"Memory limit set to {memory_for_upscaling * 100}% of total"
f" system memory. ({available_memory / (1024 ** 3)} GB)"
)
else:
available_memory = psutil.virtual_memory().available

budget = int(
max(
available_memory * 0.2,
min(
available_memory * memory_for_upscaling,
available_memory * 0.8,
),
)
return MaxTileSize()
)

return MaxTileSize(
estimate_tile_size(
budget,
model_bytes,
img,
element_size,
)
)

# Disable tiling for SCUNet
upscale_tile_size = tile_size
Expand All @@ -78,8 +101,10 @@ def estimate():
@processing_group.register(
schema_id="chainner:pytorch:upscale_image",
name="Upscale Image",
description="Upscales an image using a PyTorch Super-Resolution model. \
Select a manual number of tiles if you are having issues with the automatic mode. ",
description=(
"Upscales an image using a PyTorch Super-Resolution model. Select a manual"
" number of tiles if you are having issues with the automatic mode."
),
icon="PyTorch",
inputs=[
ImageInput().with_id(1),
Expand All @@ -92,10 +117,16 @@ def estimate():
TileSizeDropdown()
.with_id(2)
.with_docs(
"Tiled upscaling is used to allow large images to be upscaled without hitting memory limits.",
"This works by splitting the image into tiles (with overlap), upscaling each tile individually, and seamlessly recombining them.",
"Generally it's recommended to use the largest tile size possible for best performance (with the ideal scenario being no tiling at all), but depending on the model and image size, this may not be possible.",
"If you are having issues with the automatic mode, you can manually select a tile size. Sometimes, a manually selected tile size may be faster than what the automatic mode picks.",
"Tiled upscaling is used to allow large images to be upscaled without"
" hitting memory limits.",
"This works by splitting the image into tiles (with overlap), upscaling"
" each tile individually, and seamlessly recombining them.",
"Generally it's recommended to use the largest tile size possible for"
" best performance (with the ideal scenario being no tiling at all),"
" but depending on the model and image size, this may not be possible.",
"If you are having issues with the automatic mode, you can manually"
" select a tile size. Sometimes, a manually selected tile size may be"
" faster than what the automatic mode picks.",
hint=True,
)
),
Expand Down Expand Up @@ -124,7 +155,8 @@ def upscale_image_node(
scale = model.scale
h, w, c = get_h_w_c(img)
logger.debug(
f"Upscaling a {h}x{w}x{c} image with a {scale}x model (in_nc: {in_nc}, out_nc: {out_nc})"
f"Upscaling a {h}x{w}x{c} image with a {scale}x model (in_nc: {in_nc}, out_nc:"
f" {out_nc})"
)

return convenient_upscale(
Expand Down
3 changes: 3 additions & 0 deletions src/common/Backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ export interface BackendExecutionOptions {
onnxShouldTensorRtCache: boolean;
onnxTensorRtCachePath: string;
onnxShouldTensorRtFp16: boolean;
memoryForUpscaling: number;
isSystemMemory: boolean;
memoryForUpscalingGPU: number;
}
export interface BackendRunRequest {
data: BackendJsonNode[];
Expand Down
2 changes: 2 additions & 0 deletions src/common/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ export const sanitizedEnv = env;
export const getOnnxTensorRtCacheLocation = (userDataPath: string) => {
return path.join(userDataPath, 'onnx-tensorrt-cache');
};

export const totalMemory = os.totalmem();
7 changes: 6 additions & 1 deletion src/main/cli/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
getBackend,
} from '../../common/Backend';
import { EdgeData, NodeData, NodeSchema, SchemaId } from '../../common/common-types';
import { getOnnxTensorRtCacheLocation } from '../../common/env';
import { getOnnxTensorRtCacheLocation, isArmMac } from '../../common/env';
import { formatExecutionErrorMessage } from '../../common/formatExecutionErrorMessage';
import { applyOverrides, readOverrideFile } from '../../common/input-override';
import { log } from '../../common/log';
Expand Down Expand Up @@ -121,6 +121,8 @@ const connectToBackend = async (backendProcess: BackendProcess): Promise<ReadyBa
return { backend, schemata, functionDefinitions, eventSource };
};

const defaultRamValue = isArmMac ? 0.8 : 0.5;

const getExecutionOptions = (): BackendExecutionOptions => {
const getSetting = <T>(key: string, defaultValue: T): T => {
const value = settingStorage.getItem(key);
Expand All @@ -138,6 +140,9 @@ const getExecutionOptions = (): BackendExecutionOptions => {
onnxShouldTensorRtCache: getSetting('onnx-should-tensorrt-cache', false),
onnxTensorRtCachePath: getOnnxTensorRtCacheLocation(app.getPath('userData')),
onnxShouldTensorRtFp16: getSetting('onnx-should-tensorrt-fp16', false),
memoryForUpscaling: getSetting('memory-for-upscaling', defaultRamValue),
isSystemMemory: getSetting('use-system-memory', false),
memoryForUpscalingGPU: getSetting('memory-for-upscaling-gpu', 0.8),
};
};

Expand Down

0 comments on commit 3ab5f42

Please sign in to comment.