diff --git a/comfy/ldm/lightricks/vocoders/vocoder.py b/comfy/ldm/lightricks/vocoders/vocoder.py index 6c4028aa89c2..2481d8bdd373 100644 --- a/comfy/ldm/lightricks/vocoders/vocoder.py +++ b/comfy/ldm/lightricks/vocoders/vocoder.py @@ -2,6 +2,7 @@ import torch.nn.functional as F import torch.nn as nn import comfy.ops +import comfy.model_management import numpy as np import math @@ -81,7 +82,7 @@ def forward(self, x): _, C, _ = x.shape if self.padding: x = F.pad(x, (self.pad_left, self.pad_right), mode=self.padding_mode) - return F.conv1d(x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C) + return F.conv1d(x, comfy.model_management.cast_to(self.filter.expand(C, -1, -1), dtype=x.dtype, device=x.device), stride=self.stride, groups=C) class UpSample1d(nn.Module): @@ -125,7 +126,7 @@ def forward(self, x): _, C, _ = x.shape x = F.pad(x, (self.pad, self.pad), mode="replicate") x = self.ratio * F.conv_transpose1d( - x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C + x, comfy.model_management.cast_to(self.filter.expand(C, -1, -1), dtype=x.dtype, device=x.device), stride=self.stride, groups=C ) x = x[..., self.pad_left : -self.pad_right] return x @@ -190,7 +191,7 @@ def __init__( self.eps = 1e-9 def forward(self, x): - a = self.alpha.unsqueeze(0).unsqueeze(-1) + a = comfy.model_management.cast_to(self.alpha.unsqueeze(0).unsqueeze(-1), dtype=x.dtype, device=x.device) if self.alpha_logscale: a = torch.exp(a) return x + (1.0 / (a + self.eps)) * torch.sin(x * a).pow(2) @@ -217,8 +218,8 @@ def __init__( self.eps = 1e-9 def forward(self, x): - a = self.alpha.unsqueeze(0).unsqueeze(-1) - b = self.beta.unsqueeze(0).unsqueeze(-1) + a = comfy.model_management.cast_to(self.alpha.unsqueeze(0).unsqueeze(-1), dtype=x.dtype, device=x.device) + b = comfy.model_management.cast_to(self.beta.unsqueeze(0).unsqueeze(-1), dtype=x.dtype, device=x.device) if self.alpha_logscale: a = torch.exp(a) b = torch.exp(b) @@ -596,7 +597,7 @@ def forward(self, y: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: y = y.unsqueeze(1) # (B, 1, T) left_pad = max(0, self.win_length - self.hop_length) # causal: left-only y = F.pad(y, (left_pad, 0)) - spec = F.conv1d(y, self.forward_basis, stride=self.hop_length, padding=0) + spec = F.conv1d(y, comfy.model_management.cast_to(self.forward_basis, dtype=y.dtype, device=y.device), stride=self.hop_length, padding=0) n_freqs = spec.shape[1] // 2 real, imag = spec[:, :n_freqs], spec[:, n_freqs:] magnitude = torch.sqrt(real ** 2 + imag ** 2) @@ -647,7 +648,7 @@ def mel_spectrogram( """ magnitude, phase = self.stft_fn(y) energy = torch.norm(magnitude, dim=1) - mel = torch.matmul(self.mel_basis.to(magnitude.dtype), magnitude) + mel = torch.matmul(comfy.model_management.cast_to(self.mel_basis, dtype=magnitude.dtype, device=y.device), magnitude) log_mel = torch.log(torch.clamp(mel, min=1e-5)) return log_mel, magnitude, phase, energy diff --git a/comfy/ops.py b/comfy/ops.py index 3e19cd1b6842..87b36b5c5cb8 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -80,6 +80,21 @@ def cast_to_input(weight, input, non_blocking=False, copy=True): def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant): + + #vbar doesn't support CPU weights, but some custom nodes have weird paths + #that might switch the layer to the CPU and expect it to work. We have to take + #a clone conservatively as we are mmapped and some SFT files are packed misaligned + #If you are a custom node author reading this, please move your layer to the GPU + #or declare your ModelPatcher as CPU in the first place. + if comfy.model_management.is_device_cpu(device): + weight = s.weight.to(dtype=dtype, copy=True) + if isinstance(weight, QuantizedTensor): + weight = weight.dequantize() + bias = None + if s.bias is not None: + bias = s.bias.to(dtype=bias_dtype, copy=True) + return weight, bias, (None, None, None) + offload_stream = None xfer_dest = None diff --git a/comfy_extras/nodes_lt.py b/comfy_extras/nodes_lt.py index 32fe921ff40b..c055711438f8 100644 --- a/comfy_extras/nodes_lt.py +++ b/comfy_extras/nodes_lt.py @@ -253,10 +253,12 @@ def get_latent_index(cls, cond, latent_length, guide_length, frame_idx, scale_fa return frame_idx, latent_idx @classmethod - def add_keyframe_index(cls, cond, frame_idx, guiding_latent, scale_factors, latent_downscale_factor=1): + def add_keyframe_index(cls, cond, frame_idx, guiding_latent, scale_factors, latent_downscale_factor=1, causal_fix=None): keyframe_idxs, _ = get_keyframe_idxs(cond) _, latent_coords = cls.PATCHIFIER.patchify(guiding_latent) - pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=frame_idx == 0) # we need the causal fix only if we're placing the new latents at index 0 + if causal_fix is None: + causal_fix = frame_idx == 0 or guiding_latent.shape[2] == 1 + pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=causal_fix) pixel_coords[:, 0] += frame_idx # The following adjusts keyframe end positions for small grid IC-LoRA. @@ -278,12 +280,12 @@ def add_keyframe_index(cls, cond, frame_idx, guiding_latent, scale_factors, late return node_helpers.conditioning_set_values(cond, {"keyframe_idxs": keyframe_idxs}) @classmethod - def append_keyframe(cls, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors, guide_mask=None, in_channels=128, latent_downscale_factor=1): + def append_keyframe(cls, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors, guide_mask=None, in_channels=128, latent_downscale_factor=1, causal_fix=None): if latent_image.shape[1] != in_channels or guiding_latent.shape[1] != in_channels: raise ValueError("Adding guide to a combined AV latent is not supported.") - positive = cls.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors, latent_downscale_factor) - negative = cls.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors, latent_downscale_factor) + positive = cls.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors, latent_downscale_factor, causal_fix=causal_fix) + negative = cls.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors, latent_downscale_factor, causal_fix=causal_fix) if guide_mask is not None: target_h = max(noise_mask.shape[3], guide_mask.shape[3]) diff --git a/comfyui_version.py b/comfyui_version.py index e58e0fb633d6..5da21150b702 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.16.1" +__version__ = "0.16.3" diff --git a/pyproject.toml b/pyproject.toml index 199a90364020..6a83c5c6320b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.16.1" +version = "0.16.3" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.10" diff --git a/requirements.txt b/requirements.txt index 3fd44e0cf15d..9a674fac5a59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ comfyui-frontend-package==1.39.19 -comfyui-workflow-templates==0.9.8 +comfyui-workflow-templates==0.9.10 comfyui-embedded-docs==0.4.3 torch torchsde @@ -22,7 +22,7 @@ alembic SQLAlchemy av>=14.2.0 comfy-kitchen>=0.2.7 -comfy-aimdo>=0.2.6 +comfy-aimdo>=0.2.7 requests #non essential dependencies: