From 344bdbd33b34fcf16ee533a4de42af50820ff1ea Mon Sep 17 00:00:00 2001 From: "lirui.926" Date: Sun, 30 Nov 2025 10:25:29 +0800 Subject: [PATCH] fix spatial compression ratio compute error for AutoEncoderKLWan --- .../models/autoencoders/autoencoder_kl_wan.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_wan.py b/src/diffusers/models/autoencoders/autoencoder_kl_wan.py index b0b2960aaf18..dedf9724f10a 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_wan.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_wan.py @@ -1259,14 +1259,20 @@ def tiled_encode(self, x: torch.Tensor) -> AutoencoderKLOutput: `torch.Tensor`: The latent representation of the encoded videos. """ + _, _, num_frames, height, width = x.shape - latent_height = height // self.spatial_compression_ratio - latent_width = width // self.spatial_compression_ratio + encode_spatial_compression_ratio = self.spatial_compression_ratio + if self.config.patch_size is not None: + assert encode_spatial_compression_ratio % self.config.patch_size == 0 + encode_spatial_compression_ratio = self.spatial_compression_ratio // self.config.patch_size - tile_latent_min_height = self.tile_sample_min_height // self.spatial_compression_ratio - tile_latent_min_width = self.tile_sample_min_width // self.spatial_compression_ratio - tile_latent_stride_height = self.tile_sample_stride_height // self.spatial_compression_ratio - tile_latent_stride_width = self.tile_sample_stride_width // self.spatial_compression_ratio + latent_height = height // encode_spatial_compression_ratio + latent_width = width // encode_spatial_compression_ratio + + tile_latent_min_height = self.tile_sample_min_height // encode_spatial_compression_ratio + tile_latent_min_width = self.tile_sample_min_width // encode_spatial_compression_ratio + tile_latent_stride_height = self.tile_sample_stride_height // encode_spatial_compression_ratio + tile_latent_stride_width = self.tile_sample_stride_width // encode_spatial_compression_ratio blend_height = tile_latent_min_height - tile_latent_stride_height blend_width = tile_latent_min_width - tile_latent_stride_width