From cea6b8044b1e2642ebd9fa3cf144dd73816051d9 Mon Sep 17 00:00:00 2001 From: DefTruth Date: Tue, 2 Sep 2025 04:32:21 +0000 Subject: [PATCH 1/7] fix hidream type hint --- src/diffusers/models/transformers/transformer_hidream_image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/models/transformers/transformer_hidream_image.py b/src/diffusers/models/transformers/transformer_hidream_image.py index 77902dcf5852..d3d20613e2f1 100644 --- a/src/diffusers/models/transformers/transformer_hidream_image.py +++ b/src/diffusers/models/transformers/transformer_hidream_image.py @@ -592,7 +592,7 @@ def forward( encoder_hidden_states: Optional[torch.Tensor] = None, temb: Optional[torch.Tensor] = None, image_rotary_emb: torch.Tensor = None, - ) -> torch.Tensor: + ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: return self.block( hidden_states=hidden_states, hidden_states_masks=hidden_states_masks, From a5b47b1daae9c12978884308148b799de7add0b2 Mon Sep 17 00:00:00 2001 From: DefTruth Date: Fri, 5 Sep 2025 05:17:18 +0000 Subject: [PATCH 2/7] fix hunyuan-video type hint --- src/diffusers/models/transformers/transformer_hunyuan_video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/models/transformers/transformer_hunyuan_video.py b/src/diffusers/models/transformers/transformer_hunyuan_video.py index 6944a6c536b5..3723156805d8 100644 --- a/src/diffusers/models/transformers/transformer_hunyuan_video.py +++ b/src/diffusers/models/transformers/transformer_hunyuan_video.py @@ -684,7 +684,7 @@ def forward( image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, token_replace_emb: torch.Tensor = None, num_tokens: int = None, - ) -> torch.Tensor: + ) -> Tuple[torch.Tensor, torch.Tensor]: text_seq_length = encoder_hidden_states.shape[1] hidden_states = torch.cat([hidden_states, encoder_hidden_states], dim=1) From 168dd7d20c9114068acbd378856450edc6f16887 Mon Sep 17 00:00:00 2001 From: DefTruth Date: Fri, 5 Sep 2025 05:30:42 +0000 Subject: [PATCH 3/7] fix many type hint --- src/diffusers/models/attention.py | 2 +- src/diffusers/models/transformers/auraflow_transformer_2d.py | 2 +- src/diffusers/models/transformers/cogvideox_transformer_3d.py | 2 +- src/diffusers/models/transformers/consisid_transformer_3d.py | 2 +- src/diffusers/models/transformers/lumina_nextdit2d.py | 2 +- src/diffusers/models/transformers/transformer_bria.py | 2 +- src/diffusers/models/transformers/transformer_cogview3plus.py | 4 ++-- src/diffusers/models/transformers/transformer_cogview4.py | 2 +- .../models/transformers/transformer_hidream_image.py | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py index c720b379551f..c99133f257a5 100644 --- a/src/diffusers/models/attention.py +++ b/src/diffusers/models/attention.py @@ -674,7 +674,7 @@ def forward( encoder_hidden_states: torch.FloatTensor, temb: torch.FloatTensor, joint_attention_kwargs: Optional[Dict[str, Any]] = None, - ): + ) -> Tuple[torch.Tensor, torch.Tensor]: joint_attention_kwargs = joint_attention_kwargs or {} if self.use_dual_attention: norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp, norm_hidden_states2, gate_msa2 = self.norm1( diff --git a/src/diffusers/models/transformers/auraflow_transformer_2d.py b/src/diffusers/models/transformers/auraflow_transformer_2d.py index a8d275d14214..26d80add05dc 100644 --- a/src/diffusers/models/transformers/auraflow_transformer_2d.py +++ b/src/diffusers/models/transformers/auraflow_transformer_2d.py @@ -173,7 +173,7 @@ def forward( hidden_states: torch.FloatTensor, temb: torch.FloatTensor, attention_kwargs: Optional[Dict[str, Any]] = None, - ): + ) -> torch.Tensor: residual = hidden_states attention_kwargs = attention_kwargs or {} diff --git a/src/diffusers/models/transformers/cogvideox_transformer_3d.py b/src/diffusers/models/transformers/cogvideox_transformer_3d.py index a8c98bccb86c..ebe230380d42 100644 --- a/src/diffusers/models/transformers/cogvideox_transformer_3d.py +++ b/src/diffusers/models/transformers/cogvideox_transformer_3d.py @@ -122,7 +122,7 @@ def forward( temb: torch.Tensor, image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, attention_kwargs: Optional[Dict[str, Any]] = None, - ) -> torch.Tensor: + ) -> Tuple[torch.Tensor, torch.Tensor]: text_seq_length = encoder_hidden_states.size(1) attention_kwargs = attention_kwargs or {} diff --git a/src/diffusers/models/transformers/consisid_transformer_3d.py b/src/diffusers/models/transformers/consisid_transformer_3d.py index 41632dbd4751..ec5a7844885d 100644 --- a/src/diffusers/models/transformers/consisid_transformer_3d.py +++ b/src/diffusers/models/transformers/consisid_transformer_3d.py @@ -315,7 +315,7 @@ def forward( encoder_hidden_states: torch.Tensor, temb: torch.Tensor, image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - ) -> torch.Tensor: + ) -> Tuple[torch.Tensor, torch.Tensor]: text_seq_length = encoder_hidden_states.size(1) # norm & modulate diff --git a/src/diffusers/models/transformers/lumina_nextdit2d.py b/src/diffusers/models/transformers/lumina_nextdit2d.py index 84b1175386b0..0c79a57a0395 100644 --- a/src/diffusers/models/transformers/lumina_nextdit2d.py +++ b/src/diffusers/models/transformers/lumina_nextdit2d.py @@ -124,7 +124,7 @@ def forward( encoder_mask: torch.Tensor, temb: torch.Tensor, cross_attention_kwargs: Optional[Dict[str, Any]] = None, - ): + ) -> torch.Tensor: """ Perform a forward pass through the LuminaNextDiTBlock. diff --git a/src/diffusers/models/transformers/transformer_bria.py b/src/diffusers/models/transformers/transformer_bria.py index 27a9941501a1..a2fcf81f7ac8 100644 --- a/src/diffusers/models/transformers/transformer_bria.py +++ b/src/diffusers/models/transformers/transformer_bria.py @@ -472,7 +472,7 @@ def forward( temb: torch.Tensor, image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, attention_kwargs: Optional[Dict[str, Any]] = None, - ) -> torch.Tensor: + ) -> Tuple[torch.Tensor, torch.Tensor]: text_seq_len = encoder_hidden_states.shape[1] hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) diff --git a/src/diffusers/models/transformers/transformer_cogview3plus.py b/src/diffusers/models/transformers/transformer_cogview3plus.py index 77f15f6ca6f1..81d16f5b59b6 100644 --- a/src/diffusers/models/transformers/transformer_cogview3plus.py +++ b/src/diffusers/models/transformers/transformer_cogview3plus.py @@ -13,7 +13,7 @@ # limitations under the License. -from typing import Dict, Union +from typing import Dict, Union, Tuple import torch import torch.nn as nn @@ -79,7 +79,7 @@ def forward( hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor, emb: torch.Tensor, - ) -> torch.Tensor: + ) -> Tuple[torch.Tensor, torch.Tensor]: text_seq_length = encoder_hidden_states.size(1) # norm & modulate diff --git a/src/diffusers/models/transformers/transformer_cogview4.py b/src/diffusers/models/transformers/transformer_cogview4.py index 25dcfa14cc0b..58f80a72f6ee 100644 --- a/src/diffusers/models/transformers/transformer_cogview4.py +++ b/src/diffusers/models/transformers/transformer_cogview4.py @@ -494,7 +494,7 @@ def forward( ] = None, attention_mask: Optional[Dict[str, torch.Tensor]] = None, attention_kwargs: Optional[Dict[str, Any]] = None, - ) -> torch.Tensor: + ) -> Tuple[torch.Tensor, torch.Tensor]: # 1. Timestep conditioning ( norm_hidden_states, diff --git a/src/diffusers/models/transformers/transformer_hidream_image.py b/src/diffusers/models/transformers/transformer_hidream_image.py index d3d20613e2f1..5578ef5ab20a 100644 --- a/src/diffusers/models/transformers/transformer_hidream_image.py +++ b/src/diffusers/models/transformers/transformer_hidream_image.py @@ -534,7 +534,7 @@ def forward( encoder_hidden_states: Optional[torch.Tensor] = None, temb: Optional[torch.Tensor] = None, image_rotary_emb: torch.Tensor = None, - ) -> torch.Tensor: + ) -> Tuple[torch.Tensor, torch.Tensor]: wtype = hidden_states.dtype ( shift_msa_i, From 68d7db3854afbc71d07d9a9c750bd216cb528a03 Mon Sep 17 00:00:00 2001 From: DefTruth Date: Fri, 12 Sep 2025 09:59:17 +0000 Subject: [PATCH 4/7] fix many type hint errors --- .../models/transformers/auraflow_transformer_2d.py | 8 ++++---- .../models/transformers/cogvideox_transformer_3d.py | 2 +- .../models/transformers/consisid_transformer_3d.py | 2 +- src/diffusers/models/transformers/lumina_nextdit2d.py | 4 ++-- .../models/transformers/transformer_hidream_image.py | 6 +++--- .../models/transformers/transformer_hunyuan_video.py | 4 ++-- .../transformers/transformer_hunyuan_video_framepack.py | 4 ++-- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/diffusers/models/transformers/auraflow_transformer_2d.py b/src/diffusers/models/transformers/auraflow_transformer_2d.py index 26d80add05dc..897a80f49418 100644 --- a/src/diffusers/models/transformers/auraflow_transformer_2d.py +++ b/src/diffusers/models/transformers/auraflow_transformer_2d.py @@ -13,7 +13,7 @@ # limitations under the License. -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional, Union, Tuple import torch import torch.nn as nn @@ -92,7 +92,7 @@ def pe_selection_index_based_on_dim(self, h, w): return selected_indices - def forward(self, latent): + def forward(self, latent) -> torch.Tensor: batch_size, num_channels, height, width = latent.size() latent = latent.view( batch_size, @@ -242,7 +242,7 @@ def forward( encoder_hidden_states: torch.FloatTensor, temb: torch.FloatTensor, attention_kwargs: Optional[Dict[str, Any]] = None, - ): + ) -> Tuple[torch.Tensor, torch.Tensor]: residual = hidden_states residual_context = encoder_hidden_states attention_kwargs = attention_kwargs or {} @@ -472,7 +472,7 @@ def forward( timestep: torch.LongTensor = None, attention_kwargs: Optional[Dict[str, Any]] = None, return_dict: bool = True, - ) -> Union[torch.FloatTensor, Transformer2DModelOutput]: + ) -> Union[torch.Tensor, Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) diff --git a/src/diffusers/models/transformers/cogvideox_transformer_3d.py b/src/diffusers/models/transformers/cogvideox_transformer_3d.py index ebe230380d42..6753ecaaea3a 100644 --- a/src/diffusers/models/transformers/cogvideox_transformer_3d.py +++ b/src/diffusers/models/transformers/cogvideox_transformer_3d.py @@ -441,7 +441,7 @@ def forward( image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, attention_kwargs: Optional[Dict[str, Any]] = None, return_dict: bool = True, - ): + ) -> Union[torch.Tensor, Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) diff --git a/src/diffusers/models/transformers/consisid_transformer_3d.py b/src/diffusers/models/transformers/consisid_transformer_3d.py index ec5a7844885d..59853300e2ea 100644 --- a/src/diffusers/models/transformers/consisid_transformer_3d.py +++ b/src/diffusers/models/transformers/consisid_transformer_3d.py @@ -691,7 +691,7 @@ def forward( id_cond: Optional[torch.Tensor] = None, id_vit_hidden: Optional[torch.Tensor] = None, return_dict: bool = True, - ): + ) -> Union[torch.Tensor, Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) diff --git a/src/diffusers/models/transformers/lumina_nextdit2d.py b/src/diffusers/models/transformers/lumina_nextdit2d.py index 0c79a57a0395..aca8a2ff2911 100644 --- a/src/diffusers/models/transformers/lumina_nextdit2d.py +++ b/src/diffusers/models/transformers/lumina_nextdit2d.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union import torch import torch.nn as nn @@ -297,7 +297,7 @@ def forward( image_rotary_emb: torch.Tensor, cross_attention_kwargs: Dict[str, Any] = None, return_dict=True, - ) -> torch.Tensor: + ) -> Union[torch.Tensor, Transformer2DModelOutput]: """ Forward pass of LuminaNextDiT. diff --git a/src/diffusers/models/transformers/transformer_hidream_image.py b/src/diffusers/models/transformers/transformer_hidream_image.py index 5578ef5ab20a..ce21e67898c2 100644 --- a/src/diffusers/models/transformers/transformer_hidream_image.py +++ b/src/diffusers/models/transformers/transformer_hidream_image.py @@ -55,7 +55,7 @@ def __init__(self, hidden_size, frequency_embedding_size=256): self.time_proj = Timesteps(num_channels=frequency_embedding_size, flip_sin_to_cos=True, downscale_freq_shift=0) self.timestep_embedder = TimestepEmbedding(in_channels=frequency_embedding_size, time_embed_dim=hidden_size) - def forward(self, timesteps: torch.Tensor, wdtype: Optional[torch.dtype] = None): + def forward(self, timesteps: torch.Tensor, wdtype: Optional[torch.dtype] = None) -> torch.Tensor: t_emb = self.time_proj(timesteps).to(dtype=wdtype) t_emb = self.timestep_embedder(t_emb) return t_emb @@ -87,7 +87,7 @@ def __init__( self.out_channels = out_channels self.proj = nn.Linear(in_channels * patch_size * patch_size, out_channels, bias=True) - def forward(self, latent): + def forward(self, latent) -> torch.Tensor: latent = self.proj(latent) return latent @@ -786,7 +786,7 @@ def forward( attention_kwargs: Optional[Dict[str, Any]] = None, return_dict: bool = True, **kwargs, - ): + ) -> Union[torch.Tensor, Transformer2DModelOutput]: encoder_hidden_states = kwargs.get("encoder_hidden_states", None) if encoder_hidden_states is not None: diff --git a/src/diffusers/models/transformers/transformer_hunyuan_video.py b/src/diffusers/models/transformers/transformer_hunyuan_video.py index 3723156805d8..a48b3b5dd195 100644 --- a/src/diffusers/models/transformers/transformer_hunyuan_video.py +++ b/src/diffusers/models/transformers/transformer_hunyuan_video.py @@ -529,7 +529,7 @@ def forward( image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, *args, **kwargs, - ) -> torch.Tensor: + ) -> Tuple[torch.Tensor, torch.Tensor]: text_seq_length = encoder_hidden_states.shape[1] hidden_states = torch.cat([hidden_states, encoder_hidden_states], dim=1) @@ -1038,7 +1038,7 @@ def forward( guidance: torch.Tensor = None, attention_kwargs: Optional[Dict[str, Any]] = None, return_dict: bool = True, - ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + ) -> Union[torch.Tensor, Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) diff --git a/src/diffusers/models/transformers/transformer_hunyuan_video_framepack.py b/src/diffusers/models/transformers/transformer_hunyuan_video_framepack.py index c2eb7fd2a705..51bc705f4f76 100644 --- a/src/diffusers/models/transformers/transformer_hunyuan_video_framepack.py +++ b/src/diffusers/models/transformers/transformer_hunyuan_video_framepack.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union import torch import torch.nn as nn @@ -216,7 +216,7 @@ def forward( indices_latents_history_4x: Optional[torch.Tensor] = None, attention_kwargs: Optional[Dict[str, Any]] = None, return_dict: bool = True, - ): + ) -> Union[torch.Tensor, Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) From 3107fe09aa6d3d9ca0a107ca1d975cf460bdfd0d Mon Sep 17 00:00:00 2001 From: DefTruth Date: Fri, 12 Sep 2025 10:05:15 +0000 Subject: [PATCH 5/7] fix many type hint errors --- src/diffusers/models/transformers/auraflow_transformer_2d.py | 2 +- src/diffusers/models/transformers/cogvideox_transformer_3d.py | 2 +- src/diffusers/models/transformers/consisid_transformer_3d.py | 2 +- src/diffusers/models/transformers/lumina_nextdit2d.py | 4 ++-- src/diffusers/models/transformers/transformer_bria.py | 2 +- src/diffusers/models/transformers/transformer_cogview3plus.py | 2 +- src/diffusers/models/transformers/transformer_cogview4.py | 2 +- .../models/transformers/transformer_hidream_image.py | 2 +- .../transformers/transformer_hunyuan_video_framepack.py | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/diffusers/models/transformers/auraflow_transformer_2d.py b/src/diffusers/models/transformers/auraflow_transformer_2d.py index 897a80f49418..d13e6e0df065 100644 --- a/src/diffusers/models/transformers/auraflow_transformer_2d.py +++ b/src/diffusers/models/transformers/auraflow_transformer_2d.py @@ -472,7 +472,7 @@ def forward( timestep: torch.LongTensor = None, attention_kwargs: Optional[Dict[str, Any]] = None, return_dict: bool = True, - ) -> Union[torch.Tensor, Transformer2DModelOutput]: + ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) diff --git a/src/diffusers/models/transformers/cogvideox_transformer_3d.py b/src/diffusers/models/transformers/cogvideox_transformer_3d.py index 6753ecaaea3a..50381096903c 100644 --- a/src/diffusers/models/transformers/cogvideox_transformer_3d.py +++ b/src/diffusers/models/transformers/cogvideox_transformer_3d.py @@ -441,7 +441,7 @@ def forward( image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, attention_kwargs: Optional[Dict[str, Any]] = None, return_dict: bool = True, - ) -> Union[torch.Tensor, Transformer2DModelOutput]: + ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) diff --git a/src/diffusers/models/transformers/consisid_transformer_3d.py b/src/diffusers/models/transformers/consisid_transformer_3d.py index 59853300e2ea..91fe811f0013 100644 --- a/src/diffusers/models/transformers/consisid_transformer_3d.py +++ b/src/diffusers/models/transformers/consisid_transformer_3d.py @@ -691,7 +691,7 @@ def forward( id_cond: Optional[torch.Tensor] = None, id_vit_hidden: Optional[torch.Tensor] = None, return_dict: bool = True, - ) -> Union[torch.Tensor, Transformer2DModelOutput]: + ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) diff --git a/src/diffusers/models/transformers/lumina_nextdit2d.py b/src/diffusers/models/transformers/lumina_nextdit2d.py index aca8a2ff2911..cabdf98505d3 100644 --- a/src/diffusers/models/transformers/lumina_nextdit2d.py +++ b/src/diffusers/models/transformers/lumina_nextdit2d.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional, Union, Tuple import torch import torch.nn as nn @@ -297,7 +297,7 @@ def forward( image_rotary_emb: torch.Tensor, cross_attention_kwargs: Dict[str, Any] = None, return_dict=True, - ) -> Union[torch.Tensor, Transformer2DModelOutput]: + ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: """ Forward pass of LuminaNextDiT. diff --git a/src/diffusers/models/transformers/transformer_bria.py b/src/diffusers/models/transformers/transformer_bria.py index a2fcf81f7ac8..04a9c5645c81 100644 --- a/src/diffusers/models/transformers/transformer_bria.py +++ b/src/diffusers/models/transformers/transformer_bria.py @@ -588,7 +588,7 @@ def forward( return_dict: bool = True, controlnet_block_samples=None, controlnet_single_block_samples=None, - ) -> Union[torch.FloatTensor, Transformer2DModelOutput]: + ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: """ The [`BriaTransformer2DModel`] forward method. diff --git a/src/diffusers/models/transformers/transformer_cogview3plus.py b/src/diffusers/models/transformers/transformer_cogview3plus.py index 81d16f5b59b6..799cfcfa30d6 100644 --- a/src/diffusers/models/transformers/transformer_cogview3plus.py +++ b/src/diffusers/models/transformers/transformer_cogview3plus.py @@ -293,7 +293,7 @@ def forward( target_size: torch.Tensor, crop_coords: torch.Tensor, return_dict: bool = True, - ) -> Union[torch.Tensor, Transformer2DModelOutput]: + ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: """ The [`CogView3PlusTransformer2DModel`] forward method. diff --git a/src/diffusers/models/transformers/transformer_cogview4.py b/src/diffusers/models/transformers/transformer_cogview4.py index 58f80a72f6ee..64e9a538a7c2 100644 --- a/src/diffusers/models/transformers/transformer_cogview4.py +++ b/src/diffusers/models/transformers/transformer_cogview4.py @@ -717,7 +717,7 @@ def forward( image_rotary_emb: Optional[ Union[Tuple[torch.Tensor, torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]] ] = None, - ) -> Union[torch.Tensor, Transformer2DModelOutput]: + ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) diff --git a/src/diffusers/models/transformers/transformer_hidream_image.py b/src/diffusers/models/transformers/transformer_hidream_image.py index ce21e67898c2..4a5aee29abc4 100644 --- a/src/diffusers/models/transformers/transformer_hidream_image.py +++ b/src/diffusers/models/transformers/transformer_hidream_image.py @@ -786,7 +786,7 @@ def forward( attention_kwargs: Optional[Dict[str, Any]] = None, return_dict: bool = True, **kwargs, - ) -> Union[torch.Tensor, Transformer2DModelOutput]: + ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: encoder_hidden_states = kwargs.get("encoder_hidden_states", None) if encoder_hidden_states is not None: diff --git a/src/diffusers/models/transformers/transformer_hunyuan_video_framepack.py b/src/diffusers/models/transformers/transformer_hunyuan_video_framepack.py index 51bc705f4f76..60b40fff3cb8 100644 --- a/src/diffusers/models/transformers/transformer_hunyuan_video_framepack.py +++ b/src/diffusers/models/transformers/transformer_hunyuan_video_framepack.py @@ -216,7 +216,7 @@ def forward( indices_latents_history_4x: Optional[torch.Tensor] = None, attention_kwargs: Optional[Dict[str, Any]] = None, return_dict: bool = True, - ) -> Union[torch.Tensor, Transformer2DModelOutput]: + ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) From 9059c43862cc2212e0635e667946b7452b99232f Mon Sep 17 00:00:00 2001 From: DefTruth Date: Fri, 12 Sep 2025 10:06:45 +0000 Subject: [PATCH 6/7] fix many type hint errors --- src/diffusers/models/transformers/transformer_hunyuan_video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/models/transformers/transformer_hunyuan_video.py b/src/diffusers/models/transformers/transformer_hunyuan_video.py index a48b3b5dd195..bc857ccab463 100644 --- a/src/diffusers/models/transformers/transformer_hunyuan_video.py +++ b/src/diffusers/models/transformers/transformer_hunyuan_video.py @@ -1038,7 +1038,7 @@ def forward( guidance: torch.Tensor = None, attention_kwargs: Optional[Dict[str, Any]] = None, return_dict: bool = True, - ) -> Union[torch.Tensor, Transformer2DModelOutput]: + ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: if attention_kwargs is not None: attention_kwargs = attention_kwargs.copy() lora_scale = attention_kwargs.pop("scale", 1.0) From 5a2f6f7880b5c54afbd89b080e7197e9ea8011ce Mon Sep 17 00:00:00 2001 From: DefTruth Date: Wed, 17 Sep 2025 03:05:31 +0000 Subject: [PATCH 7/7] make stype & make quality --- src/diffusers/models/transformers/auraflow_transformer_2d.py | 2 +- src/diffusers/models/transformers/lumina_nextdit2d.py | 2 +- src/diffusers/models/transformers/transformer_cogview3plus.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diffusers/models/transformers/auraflow_transformer_2d.py b/src/diffusers/models/transformers/auraflow_transformer_2d.py index d13e6e0df065..4d7d1ba40e92 100644 --- a/src/diffusers/models/transformers/auraflow_transformer_2d.py +++ b/src/diffusers/models/transformers/auraflow_transformer_2d.py @@ -13,7 +13,7 @@ # limitations under the License. -from typing import Any, Dict, Optional, Union, Tuple +from typing import Any, Dict, Optional, Tuple, Union import torch import torch.nn as nn diff --git a/src/diffusers/models/transformers/lumina_nextdit2d.py b/src/diffusers/models/transformers/lumina_nextdit2d.py index cabdf98505d3..bed5e69c2d36 100644 --- a/src/diffusers/models/transformers/lumina_nextdit2d.py +++ b/src/diffusers/models/transformers/lumina_nextdit2d.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Optional, Union, Tuple +from typing import Any, Dict, Optional, Tuple, Union import torch import torch.nn as nn diff --git a/src/diffusers/models/transformers/transformer_cogview3plus.py b/src/diffusers/models/transformers/transformer_cogview3plus.py index 799cfcfa30d6..7356f4a606bb 100644 --- a/src/diffusers/models/transformers/transformer_cogview3plus.py +++ b/src/diffusers/models/transformers/transformer_cogview3plus.py @@ -13,7 +13,7 @@ # limitations under the License. -from typing import Dict, Union, Tuple +from typing import Dict, Tuple, Union import torch import torch.nn as nn