Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
# Admins
* @comfyanonymous @kosinkadink @guill @alexisrolland @rattus128 @kijai

/CODEOWNERS @comfyanonymous
/.ci/ @comfyanonymous
/.github/ @comfyanonymous
7 changes: 4 additions & 3 deletions comfy/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,11 @@ def from_string(cls, value: str):

parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")

CACHE_RAM_AUTO_GB = -1.0

cache_group = parser.add_mutually_exclusive_group()
cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. This is the default caching mode. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 25%% of system RAM (min 4GB, max 32GB), inactive 75%% of system RAM (min 12GB, max 96GB).")
cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
cache_group.add_argument("--cache-ram", nargs='?', const=CACHE_RAM_AUTO_GB, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threshold the cache removes large items to free RAM. Default (when no value is provided): 25%% of system RAM (min 4GB, max 32GB).")

attn_group = parser.add_mutually_exclusive_group()
attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
Expand Down Expand Up @@ -245,6 +243,9 @@ def is_valid_directory(path: str) -> str:
else:
args = parser.parse_args([])

if args.cache_ram is not None and len(args.cache_ram) > 2:
parser.error("--cache-ram accepts at most two values: active GB and inactive GB")

if args.windows_standalone_build:
args.auto_launch = True

Expand Down
19 changes: 13 additions & 6 deletions comfy/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,16 +484,23 @@ def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, ori

return weight

def prefetch_prepared_value(value, allocate_buffer, stream):
def prefetch_prepared_value(value, counter, destination, stream, copy):
if isinstance(value, torch.Tensor):
dest = allocate_buffer(comfy.memory_management.vram_aligned_size(value))
comfy.model_management.cast_to_gathered([value], dest, non_blocking=True, stream=stream)
size = comfy.memory_management.vram_aligned_size(value)
offset = counter[0]
counter[0] += size
if destination is None:
return value

dest = destination[offset:offset + size]
if copy:
comfy.model_management.cast_to_gathered([value], dest, non_blocking=True, stream=stream)
return comfy.memory_management.interpret_gathered_like([value], dest)[0]
elif isinstance(value, weight_adapter.WeightAdapterBase):
return type(value)(value.loaded_keys, prefetch_prepared_value(value.weights, allocate_buffer, stream))
return type(value)(value.loaded_keys, prefetch_prepared_value(value.weights, counter, destination, stream, copy))
elif isinstance(value, tuple):
return tuple(prefetch_prepared_value(item, allocate_buffer, stream) for item in value)
return tuple(prefetch_prepared_value(item, counter, destination, stream, copy) for item in value)
elif isinstance(value, list):
return [prefetch_prepared_value(item, allocate_buffer, stream) for item in value]
return [prefetch_prepared_value(item, counter, destination, stream, copy) for item in value]

return value
24 changes: 20 additions & 4 deletions comfy/memory_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,25 @@ class TensorFileSlice(NamedTuple):
size: int


def read_tensor_file_slice_into(tensor, destination):
def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=None):

if isinstance(tensor, QuantizedTensor):
if not isinstance(destination, QuantizedTensor):
return False
if tensor._layout_cls != destination._layout_cls:
return False

if not read_tensor_file_slice_into(tensor._qdata, destination._qdata):
if not read_tensor_file_slice_into(tensor._qdata, destination._qdata, stream=stream,
destination2=(destination2._qdata if destination2 is not None else None)):
return False

dst_orig_dtype = destination._params.orig_dtype
destination._params.copy_from(tensor._params, non_blocking=False)
destination._params = dataclasses.replace(destination._params, orig_dtype=dst_orig_dtype)
if destination2 is not None:
dst_orig_dtype = destination2._params.orig_dtype
destination2._params.copy_from(destination._params, non_blocking=True)
destination2._params = dataclasses.replace(destination2._params, orig_dtype=dst_orig_dtype)
return True

info = getattr(tensor.untyped_storage(), "_comfy_tensor_file_slice", None)
Expand All @@ -48,6 +53,17 @@ def read_tensor_file_slice_into(tensor, destination):
if info.size == 0:
return True

hostbuf = getattr(destination.untyped_storage(), "_comfy_hostbuf", None)
if hostbuf is not None:
stream_ptr = getattr(stream, "cuda_stream", 0) if stream is not None else 0
device_ptr = destination2.data_ptr() if destination2 is not None else 0
hostbuf.read_file_slice(file_obj, info.offset, info.size,
offset=destination.data_ptr() - hostbuf.get_raw_address(),
stream=stream_ptr,
device_ptr=device_ptr,
device=None if destination2 is None else destination2.device.index)
return True

buf_type = ctypes.c_ubyte * info.size
view = memoryview(buf_type.from_address(destination.data_ptr()))

Expand Down Expand Up @@ -151,7 +167,7 @@ def set_ram_cache_release_state(callback, headroom):
extra_ram_release_callback = callback
RAM_CACHE_HEADROOM = max(0, int(headroom))

def extra_ram_release(target):
def extra_ram_release(target, free_active=False):
if extra_ram_release_callback is None:
return 0
return extra_ram_release_callback(target)
return extra_ram_release_callback(target, free_active=free_active)
Loading
Loading