diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index 2f149b662ec2..1b6557cf2f53 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -1724,7 +1724,7 @@ def get_ids_area(masks, scores, dedup=False): # inspired by https://github.com/facebookresearch/detr/blob/master/models/detr.py#L258 def post_process_object_detection( - self, outputs, threshold: float = 0.5, target_sizes: Union[TensorType, list[tuple]] = None + self, outputs, threshold: float = 0.5, target_sizes: Optional[Union[TensorType, list[tuple]]] = None ): """ Converts the raw output of [`DetrForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y, diff --git a/src/transformers/models/detr/image_processing_detr_fast.py b/src/transformers/models/detr/image_processing_detr_fast.py index 190d01ab5590..f2be84ece9b3 100644 --- a/src/transformers/models/detr/image_processing_detr_fast.py +++ b/src/transformers/models/detr/image_processing_detr_fast.py @@ -948,7 +948,7 @@ def get_ids_area(masks, scores, dedup=False): # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.post_process_object_detection def post_process_object_detection( - self, outputs, threshold: float = 0.5, target_sizes: Union[TensorType, list[tuple]] = None + self, outputs, threshold: float = 0.5, target_sizes: Optional[Union[TensorType, list[tuple]]] = None ): """ Converts the raw output of [`DetrForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y, diff --git a/src/transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py b/src/transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py index ac7e11ece28e..1bdc46fbba82 100644 --- a/src/transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +++ b/src/transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py @@ -2673,9 +2673,9 @@ def replace_multimodal_special_tokens( def __call__( self, text: TextInput = None, - images: ImageInput = None, - videos: VideoInput = None, - audio: AudioInput = None, + images: Optional[ImageInput] = None, + videos: Optional[VideoInput] = None, + audio: Optional[AudioInput] = None, **kwargs, ): """ diff --git a/src/transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py b/src/transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py index d74554de6624..9f3a894c114d 100644 --- a/src/transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py +++ b/src/transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py @@ -20,7 +20,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import re -from typing import Union +from typing import Optional, Union import numpy as np @@ -123,9 +123,9 @@ def __init__( def __call__( self, text: TextInput = None, - images: ImageInput = None, - videos: VideoInput = None, - audio: AudioInput = None, + images: Optional[ImageInput] = None, + videos: Optional[VideoInput] = None, + audio: Optional[AudioInput] = None, **kwargs, ) -> BatchFeature: """ diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index eeba9cbb1fc7..e207c55dc636 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -241,7 +241,7 @@ def from_pretrained_question_encoder_generator( cls, question_encoder_pretrained_model_name_or_path: Optional[str] = None, generator_pretrained_model_name_or_path: Optional[str] = None, - retriever: RagRetriever = None, + retriever: Optional[RagRetriever] = None, **kwargs, ) -> PreTrainedModel: r""" diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr.py b/src/transformers/models/rt_detr/image_processing_rt_detr.py index b366ca62fabf..14993a5a5c9a 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr.py @@ -978,7 +978,7 @@ def post_process_object_detection( self, outputs, threshold: float = 0.5, - target_sizes: Union[TensorType, list[tuple]] = None, + target_sizes: Optional[Union[TensorType, list[tuple]]] = None, use_focal_loss: bool = True, ): """ diff --git a/src/transformers/models/sam2_video/modular_sam2_video.py b/src/transformers/models/sam2_video/modular_sam2_video.py index 0b3f7e245090..f6ed7a49fd43 100644 --- a/src/transformers/models/sam2_video/modular_sam2_video.py +++ b/src/transformers/models/sam2_video/modular_sam2_video.py @@ -635,9 +635,9 @@ def init_video_session( self, video: Optional[VideoInput] = None, inference_device: Union[str, "torch.device"] = "cpu", - inference_state_device: Union[str, "torch.device"] = None, - processing_device: Union[str, "torch.device"] = None, - video_storage_device: Union[str, "torch.device"] = None, + inference_state_device: Optional[Union[str, "torch.device"]] = None, + processing_device: Optional[Union[str, "torch.device"]] = None, + video_storage_device: Optional[Union[str, "torch.device"]] = None, max_vision_features_cache_size: int = 1, dtype: torch.dtype = torch.float32, ): diff --git a/src/transformers/models/sam2_video/processing_sam2_video.py b/src/transformers/models/sam2_video/processing_sam2_video.py index 0c0df9490152..8e09ee23b9a4 100644 --- a/src/transformers/models/sam2_video/processing_sam2_video.py +++ b/src/transformers/models/sam2_video/processing_sam2_video.py @@ -530,9 +530,9 @@ def init_video_session( self, video: Optional[VideoInput] = None, inference_device: Union[str, "torch.device"] = "cpu", - inference_state_device: Union[str, "torch.device"] = None, - processing_device: Union[str, "torch.device"] = None, - video_storage_device: Union[str, "torch.device"] = None, + inference_state_device: Optional[Union[str, "torch.device"]] = None, + processing_device: Optional[Union[str, "torch.device"]] = None, + video_storage_device: Optional[Union[str, "torch.device"]] = None, max_vision_features_cache_size: int = 1, dtype: torch.dtype = torch.float32, ): diff --git a/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py b/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py index fd773316580c..403922eee93c 100644 --- a/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py +++ b/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py @@ -211,9 +211,9 @@ def vocab_size(self): def __call__( self, - text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, + text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None, text_pair: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None, - text_target: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, + text_target: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None, text_pair_target: Optional[ Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] ] = None, diff --git a/src/transformers/models/seamless_m4t/tokenization_seamless_m4t_fast.py b/src/transformers/models/seamless_m4t/tokenization_seamless_m4t_fast.py index 0318336332c3..081dcec7dd99 100644 --- a/src/transformers/models/seamless_m4t/tokenization_seamless_m4t_fast.py +++ b/src/transformers/models/seamless_m4t/tokenization_seamless_m4t_fast.py @@ -371,9 +371,9 @@ def _from_pretrained( def __call__( self, - text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, + text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None, text_pair: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None, - text_target: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, + text_target: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None, text_pair_target: Optional[ Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] ] = None, diff --git a/src/transformers/models/trocr/processing_trocr.py b/src/transformers/models/trocr/processing_trocr.py index 037f708e4ee4..6f5a8b6cb0bc 100644 --- a/src/transformers/models/trocr/processing_trocr.py +++ b/src/transformers/models/trocr/processing_trocr.py @@ -53,7 +53,7 @@ def __init__(self, image_processor=None, tokenizer=None, **kwargs): def __call__( self, images: Optional[ImageInput] = None, - text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, + text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None, **kwargs: Unpack[TrOCRProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/udop/tokenization_udop.py b/src/transformers/models/udop/tokenization_udop.py index a5833333e10a..26eb7fa82e7a 100644 --- a/src/transformers/models/udop/tokenization_udop.py +++ b/src/transformers/models/udop/tokenization_udop.py @@ -508,11 +508,11 @@ def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = @add_end_docstrings(UDOP_ENCODE_KWARGS_DOCSTRING) def __call__( self, - text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, + text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None, text_pair: Optional[Union[PreTokenizedInput, list[PreTokenizedInput]]] = None, boxes: Optional[Union[list[list[int]], list[list[list[int]]]]] = None, word_labels: Optional[Union[list[int], list[list[int]]]] = None, - text_target: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, + text_target: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None, text_pair_target: Optional[ Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] ] = None, @@ -703,7 +703,7 @@ def batch_encode_plus_boxes( word_labels: Optional[list[list[int]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = None, + truncation: Optional[Union[bool, str, TruncationStrategy]] = None, max_length: Optional[int] = None, stride: int = 0, is_split_into_words: bool = False, @@ -771,7 +771,7 @@ def encode_boxes( word_labels: Optional[list[list[int]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = None, + truncation: Optional[Union[bool, str, TruncationStrategy]] = None, max_length: Optional[int] = None, stride: int = 0, return_tensors: Optional[Union[str, TensorType]] = None, @@ -814,7 +814,7 @@ def encode_plus_boxes( word_labels: Optional[list[list[int]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = None, + truncation: Optional[Union[bool, str, TruncationStrategy]] = None, max_length: Optional[int] = None, stride: int = 0, is_split_into_words: bool = False, diff --git a/src/transformers/models/video_llava/processing_video_llava.py b/src/transformers/models/video_llava/processing_video_llava.py index a6f826fa72a3..927d662fb587 100644 --- a/src/transformers/models/video_llava/processing_video_llava.py +++ b/src/transformers/models/video_llava/processing_video_llava.py @@ -93,8 +93,8 @@ def __call__( images: Optional[ImageInput] = None, videos: Optional[ImageInput] = None, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = None, - max_length=None, + truncation: Optional[Union[bool, str, TruncationStrategy]] = None, + max_length: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH, ) -> BatchFeature: """ diff --git a/src/transformers/models/vitpose/image_processing_vitpose.py b/src/transformers/models/vitpose/image_processing_vitpose.py index 95f4dd8e5ded..beb197f987ef 100644 --- a/src/transformers/models/vitpose/image_processing_vitpose.py +++ b/src/transformers/models/vitpose/image_processing_vitpose.py @@ -595,7 +595,7 @@ def post_process_pose_estimation( boxes: Union[list[list[list[float]]], np.ndarray], kernel_size: int = 11, threshold: Optional[float] = None, - target_sizes: Union[TensorType, list[tuple]] = None, + target_sizes: Optional[Union[TensorType, list[tuple]]] = None, ): """ Transform the heatmaps into keypoint predictions and transform them back to the image. diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index da25e36663d0..8f039f1574ad 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -1474,7 +1474,7 @@ def compute_contrastive_logits( target_features: torch.FloatTensor, negative_features: torch.FloatTensor, predicted_features: torch.FloatTensor, - temperature: int = 0.1, + temperature: float = 0.1, ): """ Compute logits for contrastive loss based using cosine similarity as the distance measure between diff --git a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py index 62357c8e0dcb..4230a28f9a26 100644 --- a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +++ b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py @@ -1228,7 +1228,7 @@ def compute_contrastive_logits( target_features: torch.FloatTensor, negative_features: torch.FloatTensor, predicted_features: torch.FloatTensor, - temperature: int = 0.1, + temperature: float = 0.1, ): """ Compute logits for contrastive loss based using cosine similarity as the distance measure between diff --git a/src/transformers/models/xcodec/configuration_xcodec.py b/src/transformers/models/xcodec/configuration_xcodec.py index bf91c02912ca..7281e1719033 100644 --- a/src/transformers/models/xcodec/configuration_xcodec.py +++ b/src/transformers/models/xcodec/configuration_xcodec.py @@ -98,8 +98,8 @@ def __init__( codebook_size: int = 1024, codebook_dim: Optional[int] = None, initializer_range: float = 0.02, - acoustic_model_config: Union[dict, DacConfig] = None, - semantic_model_config: Union[dict, HubertConfig] = None, + acoustic_model_config: Optional[Union[dict, DacConfig]] = None, + semantic_model_config: Optional[Union[dict, HubertConfig]] = None, **kwargs, ): if acoustic_model_config is None: diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index b594c296707b..0d33b6c761bf 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -1471,7 +1471,7 @@ def post_process(self, outputs, target_sizes): # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.post_process_object_detection with Detr->Yolos def post_process_object_detection( - self, outputs, threshold: float = 0.5, target_sizes: Union[TensorType, list[tuple]] = None + self, outputs, threshold: float = 0.5, target_sizes: Optional[Union[TensorType, list[tuple]]] = None ): """ Converts the raw output of [`YolosForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y, diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py index 2bdeb27514e3..f1bb9da8c202 100644 --- a/src/transformers/utils/quantization_config.py +++ b/src/transformers/utils/quantization_config.py @@ -1128,11 +1128,11 @@ def __init__( in_features: int = -1, indices_as_float: bool = False, is_indice_packed: bool = True, - num_centroids: tuple = [-1, -1], - num_res_centroids: tuple = [-1, -1], + num_centroids: list = [-1, -1], + num_res_centroids: list = [-1, -1], out_features: int = -1, outlier_size: int = 0, - vector_lens: tuple = [-1, -1], + vector_lens: list = [-1, -1], **kwargs, ): self.enable_norm = enable_norm diff --git a/tests/models/tvp/test_image_processing_tvp.py b/tests/models/tvp/test_image_processing_tvp.py index 6d454daf9e4b..40b72e7e1ec1 100644 --- a/tests/models/tvp/test_image_processing_tvp.py +++ b/tests/models/tvp/test_image_processing_tvp.py @@ -47,7 +47,7 @@ def __init__( do_pad: bool = True, pad_size: dict[str, int] = {"height": 80, "width": 80}, fill: Optional[int] = None, - pad_mode: PaddingMode = None, + pad_mode: Optional[PaddingMode] = None, do_normalize: bool = True, image_mean: Optional[Union[float, list[float]]] = [0.48145466, 0.4578275, 0.40821073], image_std: Optional[Union[float, list[float]]] = [0.26862954, 0.26130258, 0.27577711],