diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 3ac83b854978..1fa0570ee81f 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -1091,7 +1091,7 @@ def _get_logits_processor( self, generation_config: GenerationConfig, input_ids_seq_length: Optional[int] = None, - encoder_input_ids: torch.LongTensor = None, + encoder_input_ids: Optional[torch.LongTensor] = None, prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], list[int]]] = None, logits_processor: Optional[LogitsProcessorList] = None, device: Optional[str] = None, diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py index 38a4a3e32718..071348cb4330 100644 --- a/src/transformers/image_processing_utils_fast.py +++ b/src/transformers/image_processing_utils_fast.py @@ -243,7 +243,7 @@ def resize( self, image: "torch.Tensor", size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": diff --git a/src/transformers/integrations/flash_paged.py b/src/transformers/integrations/flash_paged.py index 352bc82a1e40..c305b71b7eba 100644 --- a/src/transformers/integrations/flash_paged.py +++ b/src/transformers/integrations/flash_paged.py @@ -1,3 +1,5 @@ +from typing import Optional + import torch from ..generation.continuous_batching import PagedAttentionCache @@ -16,7 +18,7 @@ def paged_attention_forward( q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, - attention_mask: torch.Tensor = None, + attention_mask: Optional[torch.Tensor] = None, cache: PagedAttentionCache = None, cu_seq_lens_q=None, cu_seq_lens_k=None, diff --git a/src/transformers/integrations/higgs.py b/src/transformers/integrations/higgs.py index 5c7e5bd7ac71..4233a5bf5fb8 100644 --- a/src/transformers/integrations/higgs.py +++ b/src/transformers/integrations/higgs.py @@ -14,6 +14,7 @@ "HIGGS through FLUTE (Flexible Lookup Table Engine for LUT-quantized LLMs) integration file" from math import sqrt +from typing import Optional from ..utils import ( is_flute_available, @@ -496,8 +497,8 @@ def __init__( out_features: int, num_bits: int, bias=True, - dtype: torch.dtype = None, - device: torch.device = None, + dtype: Optional[torch.dtype] = None, + device: Optional[torch.device] = None, group_size: int = 256, hadamard_size: int = 1024, ): diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 973ee405cb3a..c19a06dfad0e 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -1719,7 +1719,11 @@ def create_extended_attention_mask_for_decoder(input_shape, attention_mask, devi return extended_attention_mask def get_extended_attention_mask( - self, attention_mask: Tensor, input_shape: tuple[int], device: torch.device = None, dtype: torch.float = None + self, + attention_mask: Tensor, + input_shape: tuple[int], + device: Optional[torch.device] = None, + dtype: Optional[torch.dtype] = None, ) -> Tensor: """ Makes broadcastable attention and causal masks so that future and masked tokens are ignored. diff --git a/src/transformers/models/align/processing_align.py b/src/transformers/models/align/processing_align.py index 3b73e391d6b5..5cc8efea0fb4 100644 --- a/src/transformers/models/align/processing_align.py +++ b/src/transformers/models/align/processing_align.py @@ -16,7 +16,7 @@ Image/Text processor class for ALIGN """ -from typing import Union +from typing import Optional, Union from ...image_utils import ImageInput from ...processing_utils import ProcessingKwargs, ProcessorMixin, Unpack @@ -72,7 +72,7 @@ def __init__(self, image_processor, tokenizer): def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/altclip/processing_altclip.py b/src/transformers/models/altclip/processing_altclip.py index ef58cad3d11e..1280b256d4a0 100644 --- a/src/transformers/models/altclip/processing_altclip.py +++ b/src/transformers/models/altclip/processing_altclip.py @@ -16,7 +16,7 @@ Image/Text processor class for AltCLIP """ -from typing import Union +from typing import Optional, Union from ...image_utils import ImageInput from ...processing_utils import ProcessingKwargs, ProcessorMixin, Unpack @@ -58,7 +58,7 @@ def __init__(self, image_processor=None, tokenizer=None): def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/aria/image_processing_aria.py b/src/transformers/models/aria/image_processing_aria.py index 6146f9b32bd2..3e6aa4ea12d1 100644 --- a/src/transformers/models/aria/image_processing_aria.py +++ b/src/transformers/models/aria/image_processing_aria.py @@ -153,7 +153,7 @@ def preprocess( do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, return_tensors: Optional[Union[str, TensorType]] = "pt", data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/aria/modeling_aria.py b/src/transformers/models/aria/modeling_aria.py index 62bacb72a44e..bccb7dff9e92 100644 --- a/src/transformers/models/aria/modeling_aria.py +++ b/src/transformers/models/aria/modeling_aria.py @@ -1005,9 +1005,9 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_mask: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_mask: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1134,9 +1134,9 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_mask: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_mask: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/aria/modular_aria.py b/src/transformers/models/aria/modular_aria.py index 8696095588c9..a1e4a8bebef5 100644 --- a/src/transformers/models/aria/modular_aria.py +++ b/src/transformers/models/aria/modular_aria.py @@ -539,7 +539,7 @@ def preprocess( do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, return_tensors: Optional[Union[str, TensorType]] = "pt", data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -1392,9 +1392,9 @@ def get_image_features( def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_mask: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_mask: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1463,9 +1463,9 @@ def get_image_features( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_mask: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_mask: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/aya_vision/modeling_aya_vision.py b/src/transformers/models/aya_vision/modeling_aya_vision.py index 5c2cd95dff01..5ccb074399f5 100644 --- a/src/transformers/models/aya_vision/modeling_aya_vision.py +++ b/src/transformers/models/aya_vision/modeling_aya_vision.py @@ -269,8 +269,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/aya_vision/modular_aya_vision.py b/src/transformers/models/aya_vision/modular_aya_vision.py index f3f75b3e2dbd..f76e046e0b94 100644 --- a/src/transformers/models/aya_vision/modular_aya_vision.py +++ b/src/transformers/models/aya_vision/modular_aya_vision.py @@ -166,8 +166,8 @@ def get_image_features( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index a52e3488f0b0..ee04f019ba29 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -88,7 +88,9 @@ def __init__(self, num_embeddings: int, embedding_dim: int): self.offset = 2 super().__init__(num_embeddings + self.offset, embedding_dim) - def forward(self, input_ids: torch.Tensor, past_key_values_length: int = 0, position_ids: torch.Tensor = None): + def forward( + self, input_ids: torch.Tensor, past_key_values_length: int = 0, position_ids: Optional[torch.Tensor] = None + ): """`input_ids' shape is expected to be [bsz x seqlen].""" if position_ids is None: diff --git a/src/transformers/models/beit/image_processing_beit.py b/src/transformers/models/beit/image_processing_beit.py index 7fde9c8a8c90..c25880bcfada 100644 --- a/src/transformers/models/beit/image_processing_beit.py +++ b/src/transformers/models/beit/image_processing_beit.py @@ -183,7 +183,7 @@ def _preprocess( do_reduce_labels: Optional[bool] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, @@ -215,7 +215,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, @@ -260,7 +260,7 @@ def _preprocess_segmentation_map( segmentation_map: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_reduce_labels: Optional[bool] = None, @@ -308,7 +308,7 @@ def preprocess( segmentation_maps: Optional[ImageInput] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index 175474e33d89..959202e866ed 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -81,7 +81,9 @@ class BigBirdPegasusLearnedPositionalEmbedding(nn.Embedding): def __init__(self, num_embeddings: int, embedding_dim: int): super().__init__(num_embeddings, embedding_dim) - def forward(self, input_ids_shape: torch.Size, past_key_values_length: int = 0, position_ids: torch.Tensor = None): + def forward( + self, input_ids_shape: torch.Size, past_key_values_length: int = 0, position_ids: Optional[torch.Tensor] = None + ): """`input_ids' shape is expected to be [bsz x seqlen].""" if position_ids is None: diff --git a/src/transformers/models/bit/image_processing_bit.py b/src/transformers/models/bit/image_processing_bit.py index 6ebd9267b6fb..3d32752edca8 100644 --- a/src/transformers/models/bit/image_processing_bit.py +++ b/src/transformers/models/bit/image_processing_bit.py @@ -178,7 +178,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/blip/image_processing_blip.py b/src/transformers/models/blip/image_processing_blip.py index b932cb1453f2..78a152374fd0 100644 --- a/src/transformers/models/blip/image_processing_blip.py +++ b/src/transformers/models/blip/image_processing_blip.py @@ -162,7 +162,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/blip/processing_blip.py b/src/transformers/models/blip/processing_blip.py index 86be17edefc0..5cc4334a974c 100644 --- a/src/transformers/models/blip/processing_blip.py +++ b/src/transformers/models/blip/processing_blip.py @@ -65,7 +65,7 @@ def __init__(self, image_processor, tokenizer, **kwargs): def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Optional[Union[str, list[str], TextInput, PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/blip_2/processing_blip_2.py b/src/transformers/models/blip_2/processing_blip_2.py index 880d325a6522..a1c89f7f460a 100644 --- a/src/transformers/models/blip_2/processing_blip_2.py +++ b/src/transformers/models/blip_2/processing_blip_2.py @@ -79,7 +79,7 @@ def __init__(self, image_processor, tokenizer, num_query_tokens=None, **kwargs): def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Optional[Union[str, list[str], TextInput, PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/bridgetower/image_processing_bridgetower.py b/src/transformers/models/bridgetower/image_processing_bridgetower.py index 7e047284aa2f..28145b337a68 100644 --- a/src/transformers/models/bridgetower/image_processing_bridgetower.py +++ b/src/transformers/models/bridgetower/image_processing_bridgetower.py @@ -378,7 +378,7 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, size_divisor: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py b/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py index 355315a296db..64610ec4462a 100644 --- a/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py +++ b/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py @@ -137,7 +137,7 @@ def resize( image: "torch.Tensor", size: SizeDict, size_divisor: int = 32, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": diff --git a/src/transformers/models/chameleon/image_processing_chameleon.py b/src/transformers/models/chameleon/image_processing_chameleon.py index 651fd63b7e44..9cae9d7bdd34 100644 --- a/src/transformers/models/chameleon/image_processing_chameleon.py +++ b/src/transformers/models/chameleon/image_processing_chameleon.py @@ -170,7 +170,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/chameleon/image_processing_chameleon_fast.py b/src/transformers/models/chameleon/image_processing_chameleon_fast.py index dea89a0d1697..421c4ea98374 100644 --- a/src/transformers/models/chameleon/image_processing_chameleon_fast.py +++ b/src/transformers/models/chameleon/image_processing_chameleon_fast.py @@ -14,6 +14,8 @@ # limitations under the License. """Fast Image processor class for Chameleon.""" +from typing import Optional + import numpy as np from ...image_processing_utils_fast import BaseImageProcessorFast @@ -87,7 +89,7 @@ def resize( self, image: "torch.Tensor", size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> "torch.Tensor": """ diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py index 3b2464e9371d..c55805f28913 100644 --- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py +++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py @@ -171,7 +171,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/chinese_clip/processing_chinese_clip.py b/src/transformers/models/chinese_clip/processing_chinese_clip.py index f4b950c9e373..bb3a93d93f2e 100644 --- a/src/transformers/models/chinese_clip/processing_chinese_clip.py +++ b/src/transformers/models/chinese_clip/processing_chinese_clip.py @@ -17,7 +17,7 @@ """ import warnings -from typing import Union +from typing import Optional, Union from ...image_utils import ImageInput from ...processing_utils import ProcessingKwargs, ProcessorMixin, Unpack @@ -69,7 +69,7 @@ def __init__(self, image_processor=None, tokenizer=None, **kwargs): def __call__( self, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - images: ImageInput = None, + images: Optional[ImageInput] = None, audio=None, videos=None, **kwargs: Unpack[ChineseClipProcessorKwargs], diff --git a/src/transformers/models/clip/image_processing_clip.py b/src/transformers/models/clip/image_processing_clip.py index 25709a3d5462..ea17e4a65ff4 100644 --- a/src/transformers/models/clip/image_processing_clip.py +++ b/src/transformers/models/clip/image_processing_clip.py @@ -204,7 +204,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/cohere2_vision/modeling_cohere2_vision.py b/src/transformers/models/cohere2_vision/modeling_cohere2_vision.py index b67749c2f42d..ddb0f360c6d6 100644 --- a/src/transformers/models/cohere2_vision/modeling_cohere2_vision.py +++ b/src/transformers/models/cohere2_vision/modeling_cohere2_vision.py @@ -219,8 +219,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/cohere2_vision/modular_cohere2_vision.py b/src/transformers/models/cohere2_vision/modular_cohere2_vision.py index 9cbe84f26d31..36f5d0b71ce0 100644 --- a/src/transformers/models/cohere2_vision/modular_cohere2_vision.py +++ b/src/transformers/models/cohere2_vision/modular_cohere2_vision.py @@ -115,8 +115,8 @@ def get_image_features(self, pixel_values: torch.FloatTensor): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/colpali/modular_colpali.py b/src/transformers/models/colpali/modular_colpali.py index 3b86a0ee1116..0988b0f7aafb 100644 --- a/src/transformers/models/colpali/modular_colpali.py +++ b/src/transformers/models/colpali/modular_colpali.py @@ -89,7 +89,7 @@ def query_augmentation_token(self) -> str: def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, @@ -208,7 +208,7 @@ def __call__( def process_images( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, **kwargs: Unpack[ColPaliProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/colpali/processing_colpali.py b/src/transformers/models/colpali/processing_colpali.py index 429856ec30cb..e6f72cd60b66 100644 --- a/src/transformers/models/colpali/processing_colpali.py +++ b/src/transformers/models/colpali/processing_colpali.py @@ -133,7 +133,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, @@ -279,7 +279,7 @@ def query_augmentation_token(self) -> str: def process_images( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, **kwargs: Unpack[ColPaliProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/colqwen2/modular_colqwen2.py b/src/transformers/models/colqwen2/modular_colqwen2.py index 530caef7d973..2c268248856b 100644 --- a/src/transformers/models/colqwen2/modular_colqwen2.py +++ b/src/transformers/models/colqwen2/modular_colqwen2.py @@ -92,7 +92,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/colqwen2/processing_colqwen2.py b/src/transformers/models/colqwen2/processing_colqwen2.py index 68c67a976d25..1609f6e182da 100644 --- a/src/transformers/models/colqwen2/processing_colqwen2.py +++ b/src/transformers/models/colqwen2/processing_colqwen2.py @@ -92,7 +92,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, @@ -260,7 +260,7 @@ def query_augmentation_token(self) -> str: def process_images( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, **kwargs: Unpack[ColQwen2ProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py index f0c84da0ff13..06ef3f431050 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py @@ -385,7 +385,7 @@ def resize( self, image: torch.Tensor, size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> torch.Tensor: """ @@ -441,7 +441,7 @@ def resize_annotation( orig_size: tuple[int, int], target_size: tuple[int, int], threshold: float = 0.5, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, ): """ Resizes an annotation to a target size. diff --git a/src/transformers/models/convnext/image_processing_convnext.py b/src/transformers/models/convnext/image_processing_convnext.py index 299cabd5d4d9..af89274500dd 100644 --- a/src/transformers/models/convnext/image_processing_convnext.py +++ b/src/transformers/models/convnext/image_processing_convnext.py @@ -192,7 +192,7 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, crop_pct: Optional[float] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/csm/modeling_csm.py b/src/transformers/models/csm/modeling_csm.py index b1a4b5942f65..7cfa90397010 100644 --- a/src/transformers/models/csm/modeling_csm.py +++ b/src/transformers/models/csm/modeling_csm.py @@ -85,12 +85,12 @@ class CsmOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None attentions: Optional[tuple[torch.FloatTensor, ...]] = None depth_decoder_loss: Optional[torch.FloatTensor] = None - depth_decoder_logits: torch.FloatTensor = None + depth_decoder_logits: Optional[torch.FloatTensor] = None depth_decoder_past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None depth_decoder_hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None depth_decoder_attentions: Optional[tuple[torch.FloatTensor, ...]] = None @@ -415,7 +415,7 @@ def __init__(self, config): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, backbone_last_hidden_state: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -546,7 +546,7 @@ def __init__(self, config): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, backbone_last_hidden_state: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -925,7 +925,7 @@ def prepare_inputs_for_generation( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, input_values: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, input_values_cutoffs: Optional[torch.Tensor] = None, diff --git a/src/transformers/models/csm/modular_csm.py b/src/transformers/models/csm/modular_csm.py index 94983a05b08a..f83a1abd5ae8 100644 --- a/src/transformers/models/csm/modular_csm.py +++ b/src/transformers/models/csm/modular_csm.py @@ -84,12 +84,12 @@ class CsmOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None attentions: Optional[tuple[torch.FloatTensor, ...]] = None depth_decoder_loss: Optional[torch.FloatTensor] = None - depth_decoder_logits: torch.FloatTensor = None + depth_decoder_logits: Optional[torch.FloatTensor] = None depth_decoder_past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None depth_decoder_hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None depth_decoder_attentions: Optional[tuple[torch.FloatTensor, ...]] = None @@ -162,7 +162,7 @@ def __init__(self, config): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, backbone_last_hidden_state: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -312,7 +312,7 @@ def prepare_inputs_for_generation( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, backbone_last_hidden_state: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -603,7 +603,7 @@ def prepare_inputs_for_generation( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, input_values: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, input_values_cutoffs: Optional[torch.Tensor] = None, diff --git a/src/transformers/models/deepseek_vl/configuration_deepseek_vl.py b/src/transformers/models/deepseek_vl/configuration_deepseek_vl.py index cfe008635090..a6c35f6be0d5 100644 --- a/src/transformers/models/deepseek_vl/configuration_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/configuration_deepseek_vl.py @@ -18,6 +18,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional from ...configuration_utils import PretrainedConfig from ...utils import ( @@ -67,8 +68,8 @@ class DeepseekVLConfig(PretrainedConfig): def __init__( self, - text_config: AutoConfig = None, - vision_config: AutoConfig = None, + text_config: Optional[AutoConfig] = None, + vision_config: Optional[AutoConfig] = None, image_token_id: int = 100015, **kwargs, ): diff --git a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl.py b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl.py index 8a68d434837f..02b39db51e88 100644 --- a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl.py @@ -208,7 +208,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py index 9837acad0312..59a86c89921d 100644 --- a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +++ b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py @@ -77,7 +77,7 @@ def resize( image: "torch.Tensor", size: SizeDict, min_size: int, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": diff --git a/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py b/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py index e745c80e76c4..2a34ce84a93e 100644 --- a/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py @@ -205,8 +205,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -278,8 +278,8 @@ def prepare_embeddings_for_image_generation(self) -> torch.Tensor: @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/deepseek_vl/modular_deepseek_vl.py b/src/transformers/models/deepseek_vl/modular_deepseek_vl.py index 6d9b7709eae6..9c3f4f39bdc1 100644 --- a/src/transformers/models/deepseek_vl/modular_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/modular_deepseek_vl.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Union +from typing import Optional, Union from ...configuration_utils import PretrainedConfig from ...image_processing_utils import BatchFeature @@ -79,8 +79,8 @@ class DeepseekVLConfig(PretrainedConfig): def __init__( self, - text_config: AutoConfig = None, - vision_config: AutoConfig = None, + text_config: Optional[AutoConfig] = None, + vision_config: Optional[AutoConfig] = None, image_token_id: int = 100015, **kwargs, ): @@ -243,7 +243,7 @@ def __init__( def __call__( self, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - images: ImageInput = None, + images: Optional[ImageInput] = None, **kwargs: Unpack[DeepseekVLProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/deepseek_vl/processing_deepseek_vl.py b/src/transformers/models/deepseek_vl/processing_deepseek_vl.py index ada14ab87b90..8abb3f1b4ad6 100644 --- a/src/transformers/models/deepseek_vl/processing_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/processing_deepseek_vl.py @@ -18,7 +18,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Union +from typing import Optional, Union from ...image_processing_utils import BatchFeature from ...image_utils import ImageInput @@ -72,7 +72,7 @@ def __init__( def __call__( self, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - images: ImageInput = None, + images: Optional[ImageInput] = None, **kwargs: Unpack[DeepseekVLProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py index c3a5aa5260f6..9fd82dbfefdf 100644 --- a/src/transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py @@ -18,8 +18,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional + from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ...utils import ( + logging, +) from ..auto import CONFIG_MAPPING, AutoConfig @@ -66,9 +70,9 @@ class DeepseekVLHybridConfig(PretrainedConfig): def __init__( self, - text_config: AutoConfig = None, - vision_config: AutoConfig = None, - high_res_vision_config: AutoConfig = None, + text_config: Optional[AutoConfig] = None, + vision_config: Optional[AutoConfig] = None, + high_res_vision_config: Optional[AutoConfig] = None, image_token_id: int = 100015, **kwargs, ): diff --git a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py index 4589a0deeca5..e3f0b54d65d1 100644 --- a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py @@ -240,8 +240,8 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, high_res_size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, - high_res_resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, + high_res_resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py index 14a99c56a049..37a2f9d78a6f 100644 --- a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +++ b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py @@ -111,7 +111,7 @@ def resize( image: "torch.Tensor", size: SizeDict, min_size: int, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": diff --git a/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py index 02eb3e4f9c79..65c5c8024e09 100644 --- a/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py @@ -309,9 +309,9 @@ def get_placeholder_mask( @auto_docstring(custom_args=DEEPSEEK_VL_COMMON_CUSTOM_ARGS) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - high_res_pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + high_res_pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -420,9 +420,9 @@ def prepare_embeddings_for_image_generation(self) -> torch.Tensor: @auto_docstring(custom_args=DEEPSEEK_VL_COMMON_CUSTOM_ARGS) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - high_res_pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + high_res_pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py index c0d423809256..ba6637e1b269 100644 --- a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py @@ -130,9 +130,9 @@ class DeepseekVLHybridConfig(DeepseekVLConfig): def __init__( self, - text_config: AutoConfig = None, - vision_config: AutoConfig = None, - high_res_vision_config: AutoConfig = None, + text_config: Optional[AutoConfig] = None, + vision_config: Optional[AutoConfig] = None, + high_res_vision_config: Optional[AutoConfig] = None, image_token_id: int = 100015, **kwargs, ): @@ -295,9 +295,9 @@ def get_image_features(self, pixel_values, high_res_pixel_values): @auto_docstring(custom_args=DEEPSEEK_VL_COMMON_CUSTOM_ARGS) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - high_res_pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + high_res_pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -358,9 +358,9 @@ class DeepseekVLHybridForConditionalGeneration(DeepseekVLForConditionalGeneratio @auto_docstring(custom_args=DEEPSEEK_VL_COMMON_CUSTOM_ARGS) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - high_res_pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + high_res_pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -546,8 +546,8 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, high_res_size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, - high_res_resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, + high_res_resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -920,7 +920,7 @@ class DeepseekVLHybridProcessor(DeepseekVLProcessor): def __call__( self, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - images: ImageInput = None, + images: Optional[ImageInput] = None, **kwargs: Unpack[DeepseekVLHybridProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py index 914c59ad205c..465945033eec 100644 --- a/src/transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py @@ -18,7 +18,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Union +from typing import Optional, Union from ...image_processing_utils_fast import BatchFeature from ...image_utils import ImageInput @@ -72,7 +72,7 @@ def __init__( def __call__( self, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - images: ImageInput = None, + images: Optional[ImageInput] = None, **kwargs: Unpack[DeepseekVLHybridProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py index 0c04bfc089c1..b6cd0a7075f3 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py @@ -376,7 +376,7 @@ def resize( self, image: torch.Tensor, size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> torch.Tensor: """ @@ -432,7 +432,7 @@ def resize_annotation( orig_size: tuple[int, int], target_size: tuple[int, int], threshold: float = 0.5, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, ): """ Resizes an annotation to a target size. diff --git a/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py b/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py index 39267aa2e6cd..a2dd1281e920 100644 --- a/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py @@ -180,7 +180,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/deprecated/realm/modeling_realm.py b/src/transformers/models/deprecated/realm/modeling_realm.py index 8021a142dd80..284a99b559f4 100644 --- a/src/transformers/models/deprecated/realm/modeling_realm.py +++ b/src/transformers/models/deprecated/realm/modeling_realm.py @@ -719,12 +719,12 @@ class RealmReaderOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None retriever_loss: Optional[torch.FloatTensor] = None reader_loss: Optional[torch.FloatTensor] = None - retriever_correct: torch.BoolTensor = None - reader_correct: torch.BoolTensor = None + retriever_correct: Optional[torch.BoolTensor] = None + reader_correct: Optional[torch.BoolTensor] = None block_idx: Optional[torch.LongTensor] = None candidate: Optional[torch.LongTensor] = None - start_pos: torch.int32 = None - end_pos: torch.int32 = None + start_pos: Optional[torch.IntTensor] = None + end_pos: Optional[torch.IntTensor] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -742,7 +742,7 @@ class RealmForOpenQAOutput(ModelOutput): Predicted answer ids. """ - reader_output: dict = None + reader_output: Optional[dict] = None predicted_answer_ids: Optional[torch.LongTensor] = None diff --git a/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py b/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py index 9c469036f232..19c3fb0bd485 100644 --- a/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py +++ b/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py @@ -618,7 +618,7 @@ class TransfoXLModelOutput(ModelOutput): """ last_hidden_state: torch.FloatTensor - mems: list[torch.FloatTensor] = None + mems: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -652,7 +652,7 @@ class TransfoXLSequenceClassifierOutputWithPast(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: Optional[torch.FloatTensor] = None - mems: list[torch.FloatTensor] = None + mems: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -688,7 +688,7 @@ class TransfoXLLMHeadModelOutput(ModelOutput): losses: Optional[torch.FloatTensor] = None prediction_scores: Optional[torch.FloatTensor] = None - mems: list[torch.FloatTensor] = None + mems: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None loss: Optional[torch.FloatTensor] = None diff --git a/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py b/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py index d142939ee394..c0e1a33f091b 100644 --- a/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py @@ -222,7 +222,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, @@ -281,7 +281,7 @@ def preprocess( size: Optional[dict[str, int]] = None, patch_size: Optional[list[int]] = None, num_frames: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py index f274d8c058f9..92d518363b2c 100644 --- a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py @@ -194,7 +194,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/detr/image_processing_detr_fast.py b/src/transformers/models/detr/image_processing_detr_fast.py index 37eef3717a9a..9877729434e1 100644 --- a/src/transformers/models/detr/image_processing_detr_fast.py +++ b/src/transformers/models/detr/image_processing_detr_fast.py @@ -397,7 +397,7 @@ def resize( self, image: torch.Tensor, size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> torch.Tensor: """ @@ -453,7 +453,7 @@ def resize_annotation( orig_size: tuple[int, int], target_size: tuple[int, int], threshold: float = 0.5, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, ): """ Resizes an annotation to a target size. diff --git a/src/transformers/models/dia/generation_dia.py b/src/transformers/models/dia/generation_dia.py index 45ee66d39a97..bf18c775eed6 100644 --- a/src/transformers/models/dia/generation_dia.py +++ b/src/transformers/models/dia/generation_dia.py @@ -45,7 +45,7 @@ def _get_logits_processor( self, generation_config: GenerationConfig, input_ids_seq_length: Optional[int] = None, - encoder_input_ids: torch.LongTensor = None, + encoder_input_ids: Optional[torch.LongTensor] = None, prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], list[int]]] = None, logits_processor: Optional[LogitsProcessorList] = None, device: Optional[str] = None, diff --git a/src/transformers/models/donut/image_processing_donut.py b/src/transformers/models/donut/image_processing_donut.py index d6c963803743..570981decf61 100644 --- a/src/transformers/models/donut/image_processing_donut.py +++ b/src/transformers/models/donut/image_processing_donut.py @@ -314,7 +314,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_thumbnail: Optional[bool] = None, do_align_long_axis: Optional[bool] = None, do_pad: Optional[bool] = None, diff --git a/src/transformers/models/donut/processing_donut.py b/src/transformers/models/donut/processing_donut.py index 288ba1107dd8..b84d0ed949d3 100644 --- a/src/transformers/models/donut/processing_donut.py +++ b/src/transformers/models/donut/processing_donut.py @@ -76,7 +76,7 @@ def __init__(self, image_processor=None, tokenizer=None, **kwargs): def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Optional[Union[str, list[str], TextInput, PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/dpt/image_processing_dpt.py b/src/transformers/models/dpt/image_processing_dpt.py index 4a312e3005f3..4e02ae10144c 100644 --- a/src/transformers/models/dpt/image_processing_dpt.py +++ b/src/transformers/models/dpt/image_processing_dpt.py @@ -299,7 +299,7 @@ def _preprocess( do_reduce_labels: Optional[bool] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, do_rescale: Optional[bool] = None, @@ -340,7 +340,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, do_rescale: Optional[bool] = None, @@ -391,7 +391,7 @@ def _preprocess_segmentation_map( segmentation_map: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, do_reduce_labels: Optional[bool] = None, @@ -442,7 +442,7 @@ def preprocess( size: Optional[int] = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/dpt/image_processing_dpt_fast.py b/src/transformers/models/dpt/image_processing_dpt_fast.py index acfa82c4694a..05ee807ce8e5 100644 --- a/src/transformers/models/dpt/image_processing_dpt_fast.py +++ b/src/transformers/models/dpt/image_processing_dpt_fast.py @@ -313,7 +313,7 @@ def resize( self, image: "torch.Tensor", size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, ensure_multiple_of: Optional[int] = 1, keep_aspect_ratio: bool = False, diff --git a/src/transformers/models/dpt/modular_dpt.py b/src/transformers/models/dpt/modular_dpt.py index 9c74b4c570ae..e49fa04ea2eb 100644 --- a/src/transformers/models/dpt/modular_dpt.py +++ b/src/transformers/models/dpt/modular_dpt.py @@ -140,7 +140,7 @@ def resize( self, image: "torch.Tensor", size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, ensure_multiple_of: Optional[int] = 1, keep_aspect_ratio: bool = False, diff --git a/src/transformers/models/efficientloftr/image_processing_efficientloftr.py b/src/transformers/models/efficientloftr/image_processing_efficientloftr.py index 32a351ea2355..58ce0e96f5b8 100644 --- a/src/transformers/models/efficientloftr/image_processing_efficientloftr.py +++ b/src/transformers/models/efficientloftr/image_processing_efficientloftr.py @@ -224,7 +224,7 @@ def preprocess( images, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_grayscale: Optional[bool] = None, diff --git a/src/transformers/models/emu3/image_processing_emu3.py b/src/transformers/models/emu3/image_processing_emu3.py index 05431d796a30..93ad821051f3 100644 --- a/src/transformers/models/emu3/image_processing_emu3.py +++ b/src/transformers/models/emu3/image_processing_emu3.py @@ -143,7 +143,7 @@ def _preprocess( self, images: ImageInput, do_resize: Optional[bool] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -285,7 +285,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/emu3/modeling_emu3.py b/src/transformers/models/emu3/modeling_emu3.py index d50f85283fb0..5e791d1042f6 100644 --- a/src/transformers/models/emu3/modeling_emu3.py +++ b/src/transformers/models/emu3/modeling_emu3.py @@ -1410,9 +1410,9 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - image_sizes: torch.Tensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + image_sizes: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1508,9 +1508,9 @@ def decode_image_tokens(self, **kwargs): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - image_sizes: torch.Tensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + image_sizes: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/emu3/modular_emu3.py b/src/transformers/models/emu3/modular_emu3.py index 7f5ee236ef06..5dd8d02f61aa 100644 --- a/src/transformers/models/emu3/modular_emu3.py +++ b/src/transformers/models/emu3/modular_emu3.py @@ -995,9 +995,9 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - image_sizes: torch.Tensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + image_sizes: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1093,9 +1093,9 @@ def decode_image_tokens(self, **kwargs): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - image_sizes: torch.Tensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + image_sizes: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/eomt/image_processing_eomt.py b/src/transformers/models/eomt/image_processing_eomt.py index a5f482ef9b40..93a440693dee 100644 --- a/src/transformers/models/eomt/image_processing_eomt.py +++ b/src/transformers/models/eomt/image_processing_eomt.py @@ -409,7 +409,7 @@ def _preprocess_images( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_split_image: Optional[bool] = None, do_pad: Optional[bool] = None, do_rescale: Optional[bool] = None, @@ -470,7 +470,7 @@ def _preprocess_mask( do_resize: Optional[bool] = False, do_pad: Optional[bool] = False, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, data_format: Union[str, ChannelDimension] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: @@ -510,7 +510,7 @@ def preprocess( do_split_image: Optional[bool] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -647,7 +647,7 @@ def preprocess( def encode_inputs( self, pixel_values_list: list[ImageInput], - segmentation_maps: ImageInput = None, + segmentation_maps: Optional[ImageInput] = None, instance_id_to_semantic_id: Optional[Union[list[dict[int, int]], dict[int, int]]] = None, ignore_index: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/evolla/modeling_evolla.py b/src/transformers/models/evolla/modeling_evolla.py index 14ab65c1f366..d95567491fe1 100644 --- a/src/transformers/models/evolla/modeling_evolla.py +++ b/src/transformers/models/evolla/modeling_evolla.py @@ -640,7 +640,7 @@ def get_extended_attention_mask( self, attention_mask: Tensor, input_shape: tuple[int], - device: torch.device = None, + device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None, ) -> Tensor: """ @@ -810,7 +810,7 @@ def forward(self, embeds, mask): @dataclass @auto_docstring class EvollaProteinEncoderModelOutput(ModelOutput): - sequence_compressor_output: torch.FloatTensor = None + sequence_compressor_output: Optional[torch.FloatTensor] = None last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None attentions: Optional[tuple[torch.FloatTensor, ...]] = None @@ -1401,7 +1401,7 @@ def set_input_embeddings(self, value): @check_model_inputs def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1519,11 +1519,11 @@ def set_input_embeddings(self, value): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, # text input ids + input_ids: Optional[torch.LongTensor] = None, # text input ids attention_mask: Optional[torch.Tensor] = None, # text attention mask inputs_embeds: Optional[torch.FloatTensor] = None, # text input embeddings labels: Optional[torch.LongTensor] = None, - protein_input_ids: torch.LongTensor = None, + protein_input_ids: Optional[torch.LongTensor] = None, protein_attention_mask: Optional[torch.Tensor] = None, use_cache: Optional[bool] = None, **kwargs, diff --git a/src/transformers/models/evolla/modular_evolla.py b/src/transformers/models/evolla/modular_evolla.py index 1ccb5ef01220..a58a3e7b7341 100644 --- a/src/transformers/models/evolla/modular_evolla.py +++ b/src/transformers/models/evolla/modular_evolla.py @@ -272,7 +272,7 @@ def get_extended_attention_mask( self, attention_mask: Tensor, input_shape: tuple[int], - device: torch.device = None, + device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None, ) -> Tensor: """ @@ -442,7 +442,7 @@ def forward(self, embeds, mask): @dataclass @auto_docstring class EvollaProteinEncoderModelOutput(ModelOutput): - sequence_compressor_output: torch.FloatTensor = None + sequence_compressor_output: Optional[torch.FloatTensor] = None last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None attentions: Optional[tuple[torch.FloatTensor, ...]] = None @@ -840,7 +840,7 @@ def set_input_embeddings(self, value): @check_model_inputs def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -958,11 +958,11 @@ def set_input_embeddings(self, value): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, # text input ids + input_ids: Optional[torch.LongTensor] = None, # text input ids attention_mask: Optional[torch.Tensor] = None, # text attention mask inputs_embeds: Optional[torch.FloatTensor] = None, # text input embeddings labels: Optional[torch.LongTensor] = None, - protein_input_ids: torch.LongTensor = None, + protein_input_ids: Optional[torch.LongTensor] = None, protein_attention_mask: Optional[torch.Tensor] = None, use_cache: Optional[bool] = None, **kwargs, diff --git a/src/transformers/models/exaone4/modeling_exaone4.py b/src/transformers/models/exaone4/modeling_exaone4.py index 2618a4aa3b5b..34eca44936a0 100644 --- a/src/transformers/models/exaone4/modeling_exaone4.py +++ b/src/transformers/models/exaone4/modeling_exaone4.py @@ -355,7 +355,7 @@ def __init__(self, config: Exaone4Config): @check_model_inputs def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/exaone4/modular_exaone4.py b/src/transformers/models/exaone4/modular_exaone4.py index 064a288b3b23..604dc9b8f9cb 100644 --- a/src/transformers/models/exaone4/modular_exaone4.py +++ b/src/transformers/models/exaone4/modular_exaone4.py @@ -368,7 +368,7 @@ def __init__(self, config: Exaone4Config): @check_model_inputs def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/falcon_h1/modeling_falcon_h1.py b/src/transformers/models/falcon_h1/modeling_falcon_h1.py index da16bbbd0327..865daf384b49 100644 --- a/src/transformers/models/falcon_h1/modeling_falcon_h1.py +++ b/src/transformers/models/falcon_h1/modeling_falcon_h1.py @@ -1240,7 +1240,7 @@ def __init__(self, config: FalconH1Config): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[FalconHybridMambaAttentionDynamicCache] = None, @@ -1480,7 +1480,7 @@ def __init__(self, config): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[FalconHybridMambaAttentionDynamicCache] = None, diff --git a/src/transformers/models/falcon_h1/modular_falcon_h1.py b/src/transformers/models/falcon_h1/modular_falcon_h1.py index 34193212a99c..8b00de3ab97f 100644 --- a/src/transformers/models/falcon_h1/modular_falcon_h1.py +++ b/src/transformers/models/falcon_h1/modular_falcon_h1.py @@ -1021,7 +1021,7 @@ def __init__(self, config: FalconH1Config): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[FalconHybridMambaAttentionDynamicCache] = None, @@ -1245,7 +1245,7 @@ def _prepare_4d_causal_attention_mask_with_cache_position( class FalconH1ForCausalLM(LlamaForCausalLM): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[FalconHybridMambaAttentionDynamicCache] = None, diff --git a/src/transformers/models/flava/image_processing_flava.py b/src/transformers/models/flava/image_processing_flava.py index 9dcc2c85d479..7b4db246a8fa 100644 --- a/src/transformers/models/flava/image_processing_flava.py +++ b/src/transformers/models/flava/image_processing_flava.py @@ -394,7 +394,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, @@ -459,7 +459,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/fuyu/modeling_fuyu.py b/src/transformers/models/fuyu/modeling_fuyu.py index 25e13813f349..d5edfadc3ffc 100644 --- a/src/transformers/models/fuyu/modeling_fuyu.py +++ b/src/transformers/models/fuyu/modeling_fuyu.py @@ -174,9 +174,10 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - image_patches: torch.Tensor = None, # [batch_size, num_total_patches, patch_size_ x patch_size x num_channels ] - image_patches_indices: torch.Tensor = None, + input_ids: Optional[torch.LongTensor] = None, + # [batch_size, num_total_patches, patch_size_ x patch_size x num_channels ] + image_patches: Optional[torch.Tensor] = None, + image_patches_indices: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -280,9 +281,10 @@ def get_decoder(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - image_patches: torch.Tensor = None, # [batch_size, num_total_patches, patch_size_ x patch_size x num_channels ] - image_patches_indices: torch.Tensor = None, + input_ids: Optional[torch.LongTensor] = None, + # [batch_size, num_total_patches, patch_size_ x patch_size x num_channels ] + image_patches: Optional[torch.Tensor] = None, + image_patches_indices: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/fuyu/processing_fuyu.py b/src/transformers/models/fuyu/processing_fuyu.py index 07b6d6388d3b..debbcb23aac1 100644 --- a/src/transformers/models/fuyu/processing_fuyu.py +++ b/src/transformers/models/fuyu/processing_fuyu.py @@ -485,7 +485,7 @@ def get_sample_encoding( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Optional[Union[str, list[str], TextInput, PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/gemma3/image_processing_gemma3.py b/src/transformers/models/gemma3/image_processing_gemma3.py index f7bd414dbb91..8addbbfd378c 100644 --- a/src/transformers/models/gemma3/image_processing_gemma3.py +++ b/src/transformers/models/gemma3/image_processing_gemma3.py @@ -242,7 +242,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/gemma3/modeling_gemma3.py b/src/transformers/models/gemma3/modeling_gemma3.py index d2ba04298dec..2e0f6a53053d 100644 --- a/src/transformers/models/gemma3/modeling_gemma3.py +++ b/src/transformers/models/gemma3/modeling_gemma3.py @@ -844,8 +844,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None, @@ -1029,8 +1029,8 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None, @@ -1252,7 +1252,7 @@ def set_input_embeddings(self, value): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/gemma3/modular_gemma3.py b/src/transformers/models/gemma3/modular_gemma3.py index fc70fa6e9d8e..6901e0182ba7 100644 --- a/src/transformers/models/gemma3/modular_gemma3.py +++ b/src/transformers/models/gemma3/modular_gemma3.py @@ -787,8 +787,8 @@ def _update_causal_mask(self, **super_kwargs): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None, @@ -899,8 +899,8 @@ class Gemma3ForConditionalGeneration(PaliGemmaForConditionalGeneration): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None, @@ -1125,7 +1125,7 @@ def set_input_embeddings(self, value): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/gemma3/processing_gemma3.py b/src/transformers/models/gemma3/processing_gemma3.py index 4c27053e1a6f..791c47833a4e 100644 --- a/src/transformers/models/gemma3/processing_gemma3.py +++ b/src/transformers/models/gemma3/processing_gemma3.py @@ -79,7 +79,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, videos=None, audio=None, diff --git a/src/transformers/models/gemma3n/processing_gemma3n.py b/src/transformers/models/gemma3n/processing_gemma3n.py index 89d2880cd5c3..e2c2c3ae10f8 100644 --- a/src/transformers/models/gemma3n/processing_gemma3n.py +++ b/src/transformers/models/gemma3n/processing_gemma3n.py @@ -98,7 +98,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio: Optional[Union[np.ndarray, list[float], list[np.ndarray], list[list[float]]]] = None, videos=None, diff --git a/src/transformers/models/glm4v/image_processing_glm4v.py b/src/transformers/models/glm4v/image_processing_glm4v.py index 419e3d8de4de..8293545deee2 100644 --- a/src/transformers/models/glm4v/image_processing_glm4v.py +++ b/src/transformers/models/glm4v/image_processing_glm4v.py @@ -162,7 +162,7 @@ def _preprocess( images: Union[ImageInput, VideoInput], do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -296,10 +296,10 @@ def _preprocess( def preprocess( self, images: ImageInput, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/glm4v/modeling_glm4v.py b/src/transformers/models/glm4v/modeling_glm4v.py index 0f2d574193fc..c85df51f1dbf 100644 --- a/src/transformers/models/glm4v/modeling_glm4v.py +++ b/src/transformers/models/glm4v/modeling_glm4v.py @@ -665,7 +665,7 @@ class Glm4vModelOutputWithPast(ModelOutput): The rope index difference between sequence length and multimodal rope. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -1153,8 +1153,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -1194,7 +1194,7 @@ def get_placeholder_mask( @can_return_tuple def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[list[torch.FloatTensor]] = None, @@ -1374,7 +1374,7 @@ def visual(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[list[torch.FloatTensor]] = None, diff --git a/src/transformers/models/glm4v/modular_glm4v.py b/src/transformers/models/glm4v/modular_glm4v.py index 699c21ee0ce3..ccc8dc9c7e3a 100644 --- a/src/transformers/models/glm4v/modular_glm4v.py +++ b/src/transformers/models/glm4v/modular_glm4v.py @@ -1150,8 +1150,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -1191,7 +1191,7 @@ def get_placeholder_mask( @can_return_tuple def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[list[torch.FloatTensor]] = None, @@ -1304,7 +1304,7 @@ class Glm4vForConditionalGeneration(Qwen2_5_VLForConditionalGeneration): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[list[torch.FloatTensor]] = None, @@ -1539,9 +1539,9 @@ def __init__(self, image_processor=None, tokenizer=None, video_processor=None, c def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, **kwargs: Unpack[Glm4vProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/glm4v/processing_glm4v.py b/src/transformers/models/glm4v/processing_glm4v.py index 8b9d0b7c44d8..817da3630d52 100644 --- a/src/transformers/models/glm4v/processing_glm4v.py +++ b/src/transformers/models/glm4v/processing_glm4v.py @@ -94,9 +94,9 @@ def __init__(self, image_processor=None, tokenizer=None, video_processor=None, c def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, **kwargs: Unpack[Glm4vProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py b/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py index 1511ca34833b..ccb97dc5d7c4 100644 --- a/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py +++ b/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py @@ -799,7 +799,7 @@ class Glm4vMoeModelOutputWithPast(ModelOutput): The rope index difference between sequence length and multimodal rope. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -1269,8 +1269,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -1310,7 +1310,7 @@ def get_placeholder_mask( @can_return_tuple def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[list[torch.FloatTensor]] = None, @@ -1490,7 +1490,7 @@ def visual(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[list[torch.FloatTensor]] = None, diff --git a/src/transformers/models/got_ocr2/image_processing_got_ocr2.py b/src/transformers/models/got_ocr2/image_processing_got_ocr2.py index a1a48fa6cf7b..209ac88ea2fb 100644 --- a/src/transformers/models/got_ocr2/image_processing_got_ocr2.py +++ b/src/transformers/models/got_ocr2/image_processing_got_ocr2.py @@ -259,7 +259,7 @@ def preprocess( crop_to_patches: Optional[bool] = None, min_patches: Optional[int] = None, max_patches: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -422,7 +422,7 @@ def crop_image_to_patches( max_patches: int, use_thumbnail: bool = True, patch_size: Optional[Union[tuple, int, dict]] = None, - data_format: ChannelDimension = None, + data_format: Optional[ChannelDimension] = None, ): """ Crop the image to patches and return a list of cropped images. diff --git a/src/transformers/models/got_ocr2/modeling_got_ocr2.py b/src/transformers/models/got_ocr2/modeling_got_ocr2.py index a07bd06205ac..788ac69d931a 100644 --- a/src/transformers/models/got_ocr2/modeling_got_ocr2.py +++ b/src/transformers/models/got_ocr2/modeling_got_ocr2.py @@ -596,8 +596,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -717,8 +717,8 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/got_ocr2/modular_got_ocr2.py b/src/transformers/models/got_ocr2/modular_got_ocr2.py index 470afc8cbab1..0ecf39fcd03b 100644 --- a/src/transformers/models/got_ocr2/modular_got_ocr2.py +++ b/src/transformers/models/got_ocr2/modular_got_ocr2.py @@ -322,8 +322,8 @@ def get_image_features( def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -382,8 +382,8 @@ class GotOcr2ForConditionalGeneration(LlavaForConditionalGeneration): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/granite_speech/modeling_granite_speech.py b/src/transformers/models/granite_speech/modeling_granite_speech.py index c8567916751e..e4a6ad1c41f4 100644 --- a/src/transformers/models/granite_speech/modeling_granite_speech.py +++ b/src/transformers/models/granite_speech/modeling_granite_speech.py @@ -54,7 +54,7 @@ class GraniteSpeechCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -356,8 +356,8 @@ def get_audio_features(self, input_features: torch.Tensor) -> torch.Tensor: @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - input_features: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + input_features: Optional[torch.FloatTensor] = None, input_features_mask: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py b/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py index c0efccf4b5bb..e3a1e69fc861 100644 --- a/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +++ b/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py @@ -1312,7 +1312,7 @@ def __init__(self, config: GraniteMoeHybridConfig): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/granitemoehybrid/modular_granitemoehybrid.py b/src/transformers/models/granitemoehybrid/modular_granitemoehybrid.py index 25151b6936b6..4de1ff253914 100644 --- a/src/transformers/models/granitemoehybrid/modular_granitemoehybrid.py +++ b/src/transformers/models/granitemoehybrid/modular_granitemoehybrid.py @@ -192,7 +192,7 @@ def __init__(self, config: GraniteMoeHybridConfig): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py b/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py index 317d1e483342..9869e8eb4801 100644 --- a/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py @@ -407,7 +407,7 @@ def resize( self, image: torch.Tensor, size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> torch.Tensor: """ @@ -463,7 +463,7 @@ def resize_annotation( orig_size: tuple[int, int], target_size: tuple[int, int], threshold: float = 0.5, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, ): """ Resizes an annotation to a target size. diff --git a/src/transformers/models/grounding_dino/processing_grounding_dino.py b/src/transformers/models/grounding_dino/processing_grounding_dino.py index 24f13589f795..3e2cfcd86160 100644 --- a/src/transformers/models/grounding_dino/processing_grounding_dino.py +++ b/src/transformers/models/grounding_dino/processing_grounding_dino.py @@ -150,7 +150,7 @@ def __init__(self, image_processor, tokenizer): def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/hiera/modeling_hiera.py b/src/transformers/models/hiera/modeling_hiera.py index 2fcd827e89ce..69aa24b9f8f0 100644 --- a/src/transformers/models/hiera/modeling_hiera.py +++ b/src/transformers/models/hiera/modeling_hiera.py @@ -87,7 +87,7 @@ class HieraModelOutput(ModelOutput): last_hidden_state: Optional[torch.FloatTensor] = None pooler_output: Optional[torch.FloatTensor] = None - bool_masked_pos: torch.BoolTensor = None + bool_masked_pos: Optional[torch.BoolTensor] = None ids_restore: Optional[torch.LongTensor] = None hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None attentions: Optional[tuple[torch.FloatTensor, ...]] = None @@ -156,7 +156,7 @@ class HieraForPreTrainingOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: Optional[torch.FloatTensor] = None - bool_masked_pos: torch.BoolTensor = None + bool_masked_pos: Optional[torch.BoolTensor] = None ids_restore: Optional[torch.LongTensor] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/idefics2/image_processing_idefics2.py b/src/transformers/models/idefics2/image_processing_idefics2.py index 51ed8f13a6ce..3f0db7644563 100644 --- a/src/transformers/models/idefics2/image_processing_idefics2.py +++ b/src/transformers/models/idefics2/image_processing_idefics2.py @@ -397,7 +397,7 @@ def preprocess( do_convert_rgb: Optional[bool] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/idefics2/modeling_idefics2.py b/src/transformers/models/idefics2/modeling_idefics2.py index 249883eb1c85..1ed120350813 100644 --- a/src/transformers/models/idefics2/modeling_idefics2.py +++ b/src/transformers/models/idefics2/modeling_idefics2.py @@ -858,7 +858,9 @@ def inputs_merger( inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, image_hidden_states) return inputs_embeds - def get_image_features(self, pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None): + def get_image_features( + self, pixel_values: torch.FloatTensor, pixel_attention_mask: Optional[torch.LongTensor] = None + ): """ Encodes images into continuous embeddings that can be forwarded to the language model. @@ -1038,7 +1040,9 @@ def get_input_embeddings(self): def set_input_embeddings(self, value): self.model.text_model.set_input_embeddings(value) - def get_image_features(self, pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None): + def get_image_features( + self, pixel_values: torch.FloatTensor, pixel_attention_mask: Optional[torch.LongTensor] = None + ): return self.model.get_image_features(pixel_values=pixel_values, pixel_attention_mask=pixel_attention_mask) @can_return_tuple diff --git a/src/transformers/models/idefics3/image_processing_idefics3.py b/src/transformers/models/idefics3/image_processing_idefics3.py index 24afcdd6e02d..e460a041965a 100644 --- a/src/transformers/models/idefics3/image_processing_idefics3.py +++ b/src/transformers/models/idefics3/image_processing_idefics3.py @@ -608,7 +608,7 @@ def preprocess( do_convert_rgb: Optional[bool] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_image_splitting: Optional[bool] = None, do_rescale: Optional[bool] = None, max_image_size: Optional[dict[str, int]] = None, diff --git a/src/transformers/models/idefics3/image_processing_idefics3_fast.py b/src/transformers/models/idefics3/image_processing_idefics3_fast.py index b70829f5b43f..a6047ba77a87 100644 --- a/src/transformers/models/idefics3/image_processing_idefics3_fast.py +++ b/src/transformers/models/idefics3/image_processing_idefics3_fast.py @@ -215,7 +215,7 @@ def resize( self, image: "torch.Tensor", size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": @@ -254,7 +254,7 @@ def split_images( self, images: torch.Tensor, max_image_size: dict[str, int], - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, ): """ Split an image into squares of side max_image_size and the original image resized to max_image_size. @@ -313,7 +313,7 @@ def resize_for_vision_encoder( self, image: torch.Tensor, vision_encoder_max_size: int, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, ): """ Resize images to be multiples of `vision_encoder_max_size` while preserving the aspect ratio. diff --git a/src/transformers/models/idefics3/modeling_idefics3.py b/src/transformers/models/idefics3/modeling_idefics3.py index a9fb0e54339e..24429672da28 100644 --- a/src/transformers/models/idefics3/modeling_idefics3.py +++ b/src/transformers/models/idefics3/modeling_idefics3.py @@ -608,7 +608,9 @@ def inputs_merger( inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, image_hidden_states) return inputs_embeds - def get_image_features(self, pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None): + def get_image_features( + self, pixel_values: torch.FloatTensor, pixel_attention_mask: Optional[torch.LongTensor] = None + ): """ Encodes images into continuous embeddings that can be forwarded to the language model. @@ -801,7 +803,9 @@ def get_input_embeddings(self): def set_input_embeddings(self, value): self.model.text_model.set_input_embeddings(value) - def get_image_features(self, pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None): + def get_image_features( + self, pixel_values: torch.FloatTensor, pixel_attention_mask: Optional[torch.LongTensor] = None + ): return self.model.get_image_features(pixel_values=pixel_values, pixel_attention_mask=pixel_attention_mask) @can_return_tuple diff --git a/src/transformers/models/imagegpt/image_processing_imagegpt.py b/src/transformers/models/imagegpt/image_processing_imagegpt.py index 1f2026627515..9168ecaceff2 100644 --- a/src/transformers/models/imagegpt/image_processing_imagegpt.py +++ b/src/transformers/models/imagegpt/image_processing_imagegpt.py @@ -181,7 +181,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_normalize: Optional[bool] = None, do_color_quantize: Optional[bool] = None, clusters: Optional[Union[list[list[int]], np.ndarray]] = None, diff --git a/src/transformers/models/instructblip/processing_instructblip.py b/src/transformers/models/instructblip/processing_instructblip.py index eee860e45c83..122fc11622ff 100644 --- a/src/transformers/models/instructblip/processing_instructblip.py +++ b/src/transformers/models/instructblip/processing_instructblip.py @@ -17,7 +17,7 @@ """ import os -from typing import Union +from typing import Optional, Union from ...image_processing_utils import BatchFeature from ...image_utils import ImageInput @@ -83,7 +83,7 @@ def __init__(self, image_processor, tokenizer, qformer_tokenizer, num_query_toke def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py b/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py index 68c812422af2..56391b59dbdd 100644 --- a/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py +++ b/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py @@ -162,10 +162,10 @@ def resize( @filter_out_non_signature_kwargs() def preprocess( self, - images: VideoInput = None, + images: Optional[VideoInput] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -285,10 +285,10 @@ def preprocess( # Ignore copy def _preprocess_image( self, - image: ImageInput = None, + image: Optional[ImageInput] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/instructblipvideo/processing_instructblipvideo.py b/src/transformers/models/instructblipvideo/processing_instructblipvideo.py index a518c3a1a19c..ee4e843e2f33 100644 --- a/src/transformers/models/instructblipvideo/processing_instructblipvideo.py +++ b/src/transformers/models/instructblipvideo/processing_instructblipvideo.py @@ -71,7 +71,7 @@ def __init__(self, video_processor, tokenizer, qformer_tokenizer, num_query_toke def __call__( self, - images: VideoInput = None, + images: Optional[VideoInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/internvl/modeling_internvl.py b/src/transformers/models/internvl/modeling_internvl.py index b9450b4bfd36..3168546635ff 100644 --- a/src/transformers/models/internvl/modeling_internvl.py +++ b/src/transformers/models/internvl/modeling_internvl.py @@ -632,8 +632,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -820,8 +820,8 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/internvl/modular_internvl.py b/src/transformers/models/internvl/modular_internvl.py index 823adc3904db..bcef3a2ccbb0 100644 --- a/src/transformers/models/internvl/modular_internvl.py +++ b/src/transformers/models/internvl/modular_internvl.py @@ -548,8 +548,8 @@ def get_image_features( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/jamba/modeling_jamba.py b/src/transformers/models/jamba/modeling_jamba.py index db7461af73d7..f604ffd3b72e 100755 --- a/src/transformers/models/jamba/modeling_jamba.py +++ b/src/transformers/models/jamba/modeling_jamba.py @@ -619,7 +619,7 @@ def __init__(self, config: JambaConfig, layer_idx): def cuda_kernels_forward( self, hidden_states: torch.Tensor, - cache_params: HybridMambaAttentionDynamicCache = None, + cache_params: Optional[HybridMambaAttentionDynamicCache] = None, attention_mask: Optional[torch.LongTensor] = None, ): batch_size, seq_len, _ = hidden_states.shape @@ -723,7 +723,7 @@ def cuda_kernels_forward( return contextualized_states # fmt: off - def slow_forward(self, input_states, cache_params: HybridMambaAttentionDynamicCache = None, attention_mask: Optional[torch.LongTensor] = None): + def slow_forward(self, input_states, cache_params: Optional[HybridMambaAttentionDynamicCache] = None, attention_mask: Optional[torch.LongTensor] = None): batch_size, seq_len, _ = input_states.shape dtype = input_states.dtype # 1. Gated MLP's linear projection @@ -811,7 +811,7 @@ def slow_forward(self, input_states, cache_params: HybridMambaAttentionDynamicCa def forward( self, hidden_states, - cache_params: HybridMambaAttentionDynamicCache = None, + cache_params: Optional[HybridMambaAttentionDynamicCache] = None, attention_mask: Optional[torch.LongTensor] = None, ): if self.use_fast_kernels: diff --git a/src/transformers/models/janus/image_processing_janus.py b/src/transformers/models/janus/image_processing_janus.py index 84b78831d625..33f073a2b91c 100644 --- a/src/transformers/models/janus/image_processing_janus.py +++ b/src/transformers/models/janus/image_processing_janus.py @@ -205,7 +205,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/janus/image_processing_janus_fast.py b/src/transformers/models/janus/image_processing_janus_fast.py index 12f0d0f394fe..deb13f66e9f3 100644 --- a/src/transformers/models/janus/image_processing_janus_fast.py +++ b/src/transformers/models/janus/image_processing_janus_fast.py @@ -83,7 +83,7 @@ def resize( image: "torch.Tensor", size: SizeDict, min_size: int, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": diff --git a/src/transformers/models/janus/modeling_janus.py b/src/transformers/models/janus/modeling_janus.py index 64b285f84c3c..9fa2ba354dd0 100644 --- a/src/transformers/models/janus/modeling_janus.py +++ b/src/transformers/models/janus/modeling_janus.py @@ -83,7 +83,7 @@ class JanusVQVAEOutput(ModelOutput): """ decoded_pixel_values: Optional[torch.FloatTensor] = None - embedding_loss: torch.FloatTensor = None + embedding_loss: Optional[torch.FloatTensor] = None @dataclass @@ -1130,8 +1130,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1205,8 +1205,8 @@ def prepare_embeddings_for_image_generation(self, inputs: torch.Tensor) -> torch @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1299,7 +1299,7 @@ def decode_image_tokens(self, image_tokens: torch.Tensor): @torch.no_grad def generate( self, - inputs: torch.Tensor = None, + inputs: Optional[torch.Tensor] = None, attention_mask: Optional[torch.LongTensor] = None, logits_processor: Optional[LogitsProcessorList] = None, **kwargs, diff --git a/src/transformers/models/janus/modular_janus.py b/src/transformers/models/janus/modular_janus.py index 9fa52bf78d9d..eef6f57d0d1d 100644 --- a/src/transformers/models/janus/modular_janus.py +++ b/src/transformers/models/janus/modular_janus.py @@ -410,7 +410,7 @@ class JanusVQVAEOutput(ModelOutput): """ decoded_pixel_values: Optional[torch.FloatTensor] = None - embedding_loss: torch.FloatTensor = None + embedding_loss: Optional[torch.FloatTensor] = None class JanusBaseModelOutputWithPast(IdeficsBaseModelOutputWithPast): @@ -934,8 +934,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1009,8 +1009,8 @@ def prepare_embeddings_for_image_generation(self, inputs: torch.Tensor) -> torch @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1103,7 +1103,7 @@ def decode_image_tokens(self, image_tokens: torch.Tensor): @torch.no_grad def generate( self, - inputs: torch.Tensor = None, + inputs: Optional[torch.Tensor] = None, attention_mask: Optional[torch.LongTensor] = None, logits_processor: Optional[LogitsProcessorList] = None, **kwargs, diff --git a/src/transformers/models/janus/processing_janus.py b/src/transformers/models/janus/processing_janus.py index 2c106002e42e..0563edb5bd41 100644 --- a/src/transformers/models/janus/processing_janus.py +++ b/src/transformers/models/janus/processing_janus.py @@ -16,7 +16,7 @@ Processor class for Janus. """ -from typing import Union +from typing import Optional, Union from ...feature_extraction_utils import BatchFeature from ...image_utils import ImageInput @@ -80,7 +80,7 @@ def __init__(self, image_processor, tokenizer, chat_template=None, use_default_s def __call__( self, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - images: ImageInput = None, + images: Optional[ImageInput] = None, videos=None, audio=None, **kwargs: Unpack[JanusProcessorKwargs], diff --git a/src/transformers/models/kosmos2/processing_kosmos2.py b/src/transformers/models/kosmos2/processing_kosmos2.py index 2a4d09f00e8d..58b3dff1e07a 100644 --- a/src/transformers/models/kosmos2/processing_kosmos2.py +++ b/src/transformers/models/kosmos2/processing_kosmos2.py @@ -134,7 +134,7 @@ def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwa def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, list[TextInput]] = None, audio=None, videos=None, @@ -342,7 +342,7 @@ def _preprocess_single_example(self, text, image, bboxes, img_info_tokens): def preprocess_examples( self, texts: Union[TextInput, list[TextInput]], - images: ImageInput = None, + images: Optional[ImageInput] = None, bboxes: BboxInput = None, num_image_tokens: Optional[int] = 64, ) -> Union[str, list[str]]: diff --git a/src/transformers/models/kosmos2_5/modeling_kosmos2_5.py b/src/transformers/models/kosmos2_5/modeling_kosmos2_5.py index 27e692273c71..c51d9109b48b 100644 --- a/src/transformers/models/kosmos2_5/modeling_kosmos2_5.py +++ b/src/transformers/models/kosmos2_5/modeling_kosmos2_5.py @@ -283,7 +283,7 @@ class Kosmos2_5ModelOutput(ModelOutput): input) to speed up sequential decoding. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[tuple[tuple[torch.FloatTensor]]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -346,7 +346,7 @@ class Kosmos2_5ForConditionalGenerationModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/kosmos2_5/processing_kosmos2_5.py b/src/transformers/models/kosmos2_5/processing_kosmos2_5.py index 5e780ca2db9a..0e3c70c80234 100644 --- a/src/transformers/models/kosmos2_5/processing_kosmos2_5.py +++ b/src/transformers/models/kosmos2_5/processing_kosmos2_5.py @@ -79,7 +79,7 @@ def __init__(self, image_processor, tokenizer): def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, list[TextInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py index 42b00b2f5c3e..de2e7361a6d3 100644 --- a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py @@ -202,7 +202,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, apply_ocr: Optional[bool] = None, ocr_lang: Optional[str] = None, tesseract_config: Optional[str] = None, diff --git a/src/transformers/models/levit/image_processing_levit.py b/src/transformers/models/levit/image_processing_levit.py index 8a629d495b02..5bf03b39e4b9 100644 --- a/src/transformers/models/levit/image_processing_levit.py +++ b/src/transformers/models/levit/image_processing_levit.py @@ -180,7 +180,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/levit/image_processing_levit_fast.py b/src/transformers/models/levit/image_processing_levit_fast.py index b9b6018a82fd..096c846234da 100644 --- a/src/transformers/models/levit/image_processing_levit_fast.py +++ b/src/transformers/models/levit/image_processing_levit_fast.py @@ -14,6 +14,8 @@ # limitations under the License. """Fast Image processor class for LeViT.""" +from typing import Optional + from ...image_processing_utils_fast import BaseImageProcessorFast, SizeDict from ...image_transforms import ( ChannelDimension, @@ -51,7 +53,7 @@ def resize( self, image: torch.Tensor, size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> torch.Tensor: """ diff --git a/src/transformers/models/lightglue/image_processing_lightglue.py b/src/transformers/models/lightglue/image_processing_lightglue.py index c389929eea30..400475b76c77 100644 --- a/src/transformers/models/lightglue/image_processing_lightglue.py +++ b/src/transformers/models/lightglue/image_processing_lightglue.py @@ -225,7 +225,7 @@ def preprocess( images, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_grayscale: Optional[bool] = None, diff --git a/src/transformers/models/lightglue/modeling_lightglue.py b/src/transformers/models/lightglue/modeling_lightglue.py index 2a94acf1c0b2..fd460e54d393 100644 --- a/src/transformers/models/lightglue/modeling_lightglue.py +++ b/src/transformers/models/lightglue/modeling_lightglue.py @@ -690,7 +690,7 @@ def _match_image_pair( descriptors: torch.Tensor, height: int, width: int, - mask: torch.Tensor = None, + mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, tuple, tuple]: diff --git a/src/transformers/models/lightglue/modular_lightglue.py b/src/transformers/models/lightglue/modular_lightglue.py index ce18ff0d6f50..64c36f21fef9 100644 --- a/src/transformers/models/lightglue/modular_lightglue.py +++ b/src/transformers/models/lightglue/modular_lightglue.py @@ -848,7 +848,7 @@ def _match_image_pair( descriptors: torch.Tensor, height: int, width: int, - mask: torch.Tensor = None, + mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, tuple, tuple]: diff --git a/src/transformers/models/llama4/modeling_llama4.py b/src/transformers/models/llama4/modeling_llama4.py index 059011629586..223d4a107806 100644 --- a/src/transformers/models/llama4/modeling_llama4.py +++ b/src/transformers/models/llama4/modeling_llama4.py @@ -495,7 +495,7 @@ def __init__(self, config: Llama4TextConfig): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -583,7 +583,7 @@ def __init__(self, config: Llama4TextConfig): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None, @@ -668,7 +668,7 @@ class Llama4CausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -1227,8 +1227,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1242,7 +1242,7 @@ def forward( return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, logits_to_keep: Union[int, torch.Tensor] = 0, - image_sizes: torch.Tensor = None, + image_sizes: Optional[torch.Tensor] = None, **kwargs: Unpack[TransformersKwargs], ) -> Union[tuple, Llama4CausalLMOutputWithPast]: r""" diff --git a/src/transformers/models/llava/modeling_llava.py b/src/transformers/models/llava/modeling_llava.py index 783712660279..ae8956d4df70 100644 --- a/src/transformers/models/llava/modeling_llava.py +++ b/src/transformers/models/llava/modeling_llava.py @@ -245,8 +245,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -254,7 +254,7 @@ def forward( vision_feature_layer: Optional[Union[int, list[int]]] = None, vision_feature_select_strategy: Optional[str] = None, cache_position: Optional[torch.LongTensor] = None, - image_sizes: torch.Tensor = None, + image_sizes: Optional[torch.Tensor] = None, **kwargs: Unpack[TransformersKwargs], ) -> Union[tuple, LlavaModelOutputWithPast]: vision_feature_layer = ( @@ -369,8 +369,8 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/llava/processing_llava.py b/src/transformers/models/llava/processing_llava.py index 2dead73d6968..b3e95ff8f252 100644 --- a/src/transformers/models/llava/processing_llava.py +++ b/src/transformers/models/llava/processing_llava.py @@ -16,7 +16,7 @@ Processor class for Llava. """ -from typing import Union +from typing import Optional, Union import numpy as np @@ -92,7 +92,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/llava_next/image_processing_llava_next.py b/src/transformers/models/llava_next/image_processing_llava_next.py index 5a8b0bdde6e1..98f608fc6bf5 100644 --- a/src/transformers/models/llava_next/image_processing_llava_next.py +++ b/src/transformers/models/llava_next/image_processing_llava_next.py @@ -319,7 +319,7 @@ def _preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, @@ -553,7 +553,7 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, image_grid_pinpoints: Optional[list] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/llava_next/modeling_llava_next.py b/src/transformers/models/llava_next/modeling_llava_next.py index e4a05d80f08a..a319afec0337 100644 --- a/src/transformers/models/llava_next/modeling_llava_next.py +++ b/src/transformers/models/llava_next/modeling_llava_next.py @@ -453,8 +453,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -605,8 +605,8 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/llava_next/processing_llava_next.py b/src/transformers/models/llava_next/processing_llava_next.py index e28212895371..f8e90d6303cd 100644 --- a/src/transformers/models/llava_next/processing_llava_next.py +++ b/src/transformers/models/llava_next/processing_llava_next.py @@ -16,7 +16,7 @@ Processor class for LLaVa-NeXT. """ -from typing import Union +from typing import Optional, Union import numpy as np @@ -102,7 +102,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py index 787691682999..ba1cd30a1133 100644 --- a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py +++ b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py @@ -180,7 +180,7 @@ def _preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, @@ -280,7 +280,7 @@ def preprocess( images: VideoInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/llava_next_video/modeling_llava_next_video.py b/src/transformers/models/llava_next_video/modeling_llava_next_video.py index eed243cf0116..3845c301cc8f 100644 --- a/src/transformers/models/llava_next_video/modeling_llava_next_video.py +++ b/src/transformers/models/llava_next_video/modeling_llava_next_video.py @@ -480,8 +480,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -520,9 +520,9 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_values_videos: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -746,9 +746,9 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_values_videos: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/llava_next_video/modular_llava_next_video.py b/src/transformers/models/llava_next_video/modular_llava_next_video.py index d89168a7260d..f4802930f784 100644 --- a/src/transformers/models/llava_next_video/modular_llava_next_video.py +++ b/src/transformers/models/llava_next_video/modular_llava_next_video.py @@ -401,8 +401,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -439,9 +439,9 @@ def get_placeholder_mask( def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_values_videos: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -544,9 +544,9 @@ def get_video_features( def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_values_videos: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/llava_next_video/processing_llava_next_video.py b/src/transformers/models/llava_next_video/processing_llava_next_video.py index a90ea1752143..fa639bd1929e 100644 --- a/src/transformers/models/llava_next_video/processing_llava_next_video.py +++ b/src/transformers/models/llava_next_video/processing_llava_next_video.py @@ -16,7 +16,7 @@ Processor class for LLaVa-NeXT-Video. """ -from typing import Union +from typing import Optional, Union import numpy as np @@ -114,10 +114,10 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, **kwargs: Unpack[LlavaNextVideoProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/llava_onevision/image_processing_llava_onevision.py b/src/transformers/models/llava_onevision/image_processing_llava_onevision.py index d8ed03085f22..6f523e30463b 100644 --- a/src/transformers/models/llava_onevision/image_processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/image_processing_llava_onevision.py @@ -527,7 +527,7 @@ def _preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -603,7 +603,7 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, image_grid_pinpoints: Optional[list] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/llava_onevision/modeling_llava_onevision.py b/src/transformers/models/llava_onevision/modeling_llava_onevision.py index f506d01320aa..204cd157c3fd 100644 --- a/src/transformers/models/llava_onevision/modeling_llava_onevision.py +++ b/src/transformers/models/llava_onevision/modeling_llava_onevision.py @@ -455,8 +455,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -495,10 +495,10 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, - pixel_values_videos: torch.FloatTensor = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes_videos: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -738,10 +738,10 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, - pixel_values_videos: torch.FloatTensor = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes_videos: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/llava_onevision/modular_llava_onevision.py b/src/transformers/models/llava_onevision/modular_llava_onevision.py index 2f98f1f3d987..9d6d3a53f7c8 100644 --- a/src/transformers/models/llava_onevision/modular_llava_onevision.py +++ b/src/transformers/models/llava_onevision/modular_llava_onevision.py @@ -477,10 +477,10 @@ def get_video_features( def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, - pixel_values_videos: torch.FloatTensor = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes_videos: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -590,10 +590,10 @@ class LlavaOnevisionForConditionalGeneration(LlavaNextVideoForConditionalGenerat @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, - pixel_values_videos: torch.FloatTensor = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes_videos: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/llava_onevision/processing_llava_onevision.py b/src/transformers/models/llava_onevision/processing_llava_onevision.py index 6b0fbc6dd8e3..663e1531d713 100644 --- a/src/transformers/models/llava_onevision/processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/processing_llava_onevision.py @@ -18,7 +18,7 @@ import math from collections.abc import Iterable -from typing import Union +from typing import Optional, Union import numpy as np @@ -112,10 +112,10 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, **kwargs: Unpack[LlavaOnevisionProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py index 5f9639d0f9db..a0c369722b54 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former.py +++ b/src/transformers/models/mask2former/image_processing_mask2former.py @@ -605,7 +605,7 @@ def _preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, size_divisor: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -629,7 +629,7 @@ def _preprocess_image( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, size_divisor: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -711,7 +711,7 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, size_divisor: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -901,7 +901,7 @@ def pad( def encode_inputs( self, pixel_values_list: list[ImageInput], - segmentation_maps: ImageInput = None, + segmentation_maps: Optional[ImageInput] = None, instance_id_to_semantic_id: Optional[Union[list[dict[int, int]], dict[int, int]]] = None, ignore_index: Optional[int] = None, do_reduce_labels: bool = False, diff --git a/src/transformers/models/mask2former/image_processing_mask2former_fast.py b/src/transformers/models/mask2former/image_processing_mask2former_fast.py index 85d5c19c3824..b94f0d8c308c 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former_fast.py +++ b/src/transformers/models/mask2former/image_processing_mask2former_fast.py @@ -194,7 +194,7 @@ def resize( image: torch.Tensor, size: SizeDict, size_divisor: int = 0, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> torch.Tensor: """ diff --git a/src/transformers/models/mask2former/modeling_mask2former.py b/src/transformers/models/mask2former/modeling_mask2former.py index 9899a2d41ed1..e8c3d2344b8d 100644 --- a/src/transformers/models/mask2former/modeling_mask2former.py +++ b/src/transformers/models/mask2former/modeling_mask2former.py @@ -65,7 +65,7 @@ class Mask2FormerPixelDecoderOutput(ModelOutput): or when `config.output_attentions=True` """ - multi_scale_features: tuple[torch.FloatTensor] = None + multi_scale_features: Optional[tuple[torch.FloatTensor]] = None mask_features: Optional[torch.FloatTensor] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -98,8 +98,8 @@ class Mask2FormerMaskedAttentionDecoderOutput(BaseModelOutputWithCrossAttentions last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[torch.FloatTensor] = None - masks_queries_logits: tuple[torch.FloatTensor] = None - intermediate_hidden_states: tuple[torch.FloatTensor] = None + masks_queries_logits: Optional[tuple[torch.FloatTensor]] = None + intermediate_hidden_states: Optional[tuple[torch.FloatTensor]] = None @dataclass @@ -132,7 +132,7 @@ class Mask2FormerPixelLevelModuleOutput(ModelOutput): encoder_last_hidden_state: Optional[torch.FloatTensor] = None encoder_hidden_states: Optional[tuple[torch.FloatTensor]] = None decoder_last_hidden_state: Optional[torch.FloatTensor] = None - decoder_hidden_states: tuple[torch.FloatTensor] = None + decoder_hidden_states: Optional[tuple[torch.FloatTensor]] = None @dataclass @@ -178,8 +178,8 @@ class Mask2FormerModelOutput(ModelOutput): encoder_hidden_states: Optional[tuple[torch.FloatTensor]] = None pixel_decoder_hidden_states: Optional[tuple[torch.FloatTensor]] = None transformer_decoder_hidden_states: Optional[tuple[torch.FloatTensor]] = None - transformer_decoder_intermediate_states: tuple[torch.FloatTensor] = None - masks_queries_logits: tuple[torch.FloatTensor] = None + transformer_decoder_intermediate_states: Optional[tuple[torch.FloatTensor]] = None + masks_queries_logits: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index 5437af6df9ec..9ce33846170e 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -608,7 +608,7 @@ def _preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, size_divisor: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -632,7 +632,7 @@ def _preprocess_image( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, size_divisor: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -714,7 +714,7 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, size_divisor: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -903,7 +903,7 @@ def pad( def encode_inputs( self, pixel_values_list: list[ImageInput], - segmentation_maps: ImageInput = None, + segmentation_maps: Optional[ImageInput] = None, instance_id_to_semantic_id: Optional[Union[list[dict[int, int]], dict[int, int]]] = None, ignore_index: Optional[int] = None, do_reduce_labels: bool = False, diff --git a/src/transformers/models/maskformer/image_processing_maskformer_fast.py b/src/transformers/models/maskformer/image_processing_maskformer_fast.py index d2e0424265eb..ad5cb946d38d 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer_fast.py +++ b/src/transformers/models/maskformer/image_processing_maskformer_fast.py @@ -195,7 +195,7 @@ def resize( image: torch.Tensor, size: SizeDict, size_divisor: int = 0, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> torch.Tensor: """ diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py index 20998929f9f9..d1846c2531f6 100755 --- a/src/transformers/models/mbart/modeling_mbart.py +++ b/src/transformers/models/mbart/modeling_mbart.py @@ -94,7 +94,9 @@ def __init__(self, num_embeddings: int, embedding_dim: int): self.offset = 2 super().__init__(num_embeddings + self.offset, embedding_dim) - def forward(self, input_ids: torch.Tensor, past_key_values_length: int = 0, position_ids: torch.Tensor = None): + def forward( + self, input_ids: torch.Tensor, past_key_values_length: int = 0, position_ids: Optional[torch.Tensor] = None + ): """`input_ids' shape is expected to be [bsz x seqlen].""" if position_ids is None: diff --git a/src/transformers/models/mgp_str/modeling_mgp_str.py b/src/transformers/models/mgp_str/modeling_mgp_str.py index 9e6ab26a4b98..8f65375a7895 100644 --- a/src/transformers/models/mgp_str/modeling_mgp_str.py +++ b/src/transformers/models/mgp_str/modeling_mgp_str.py @@ -91,7 +91,7 @@ class MgpstrModelOutput(ModelOutput): heads. """ - logits: tuple[torch.FloatTensor] = None + logits: Optional[tuple[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None a3_attentions: Optional[tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/minimax/modeling_minimax.py b/src/transformers/models/minimax/modeling_minimax.py index 90338bde2f6e..ac5e0fe2a24c 100644 --- a/src/transformers/models/minimax/modeling_minimax.py +++ b/src/transformers/models/minimax/modeling_minimax.py @@ -652,7 +652,7 @@ def __init__(self, config: MiniMaxConfig): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[MiniMaxCache] = None, diff --git a/src/transformers/models/minimax/modular_minimax.py b/src/transformers/models/minimax/modular_minimax.py index 1090327af32c..9026457e35cb 100644 --- a/src/transformers/models/minimax/modular_minimax.py +++ b/src/transformers/models/minimax/modular_minimax.py @@ -482,7 +482,7 @@ class MiniMaxPreTrainedModel(MixtralPreTrainedModel): class MiniMaxModel(MixtralModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[MiniMaxCache] = None, diff --git a/src/transformers/models/mistral3/modeling_mistral3.py b/src/transformers/models/mistral3/modeling_mistral3.py index 644f6523d111..ecfb3080ee96 100644 --- a/src/transformers/models/mistral3/modeling_mistral3.py +++ b/src/transformers/models/mistral3/modeling_mistral3.py @@ -289,8 +289,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -301,7 +301,7 @@ def forward( output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, - image_sizes: torch.Tensor = None, + image_sizes: Optional[torch.Tensor] = None, **kwargs: Unpack[TransformersKwargs], ) -> Union[tuple, Mistral3ModelOutputWithPast]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -419,8 +419,8 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/mistral3/modular_mistral3.py b/src/transformers/models/mistral3/modular_mistral3.py index 0277568d2b00..213ab98fe902 100644 --- a/src/transformers/models/mistral3/modular_mistral3.py +++ b/src/transformers/models/mistral3/modular_mistral3.py @@ -163,8 +163,8 @@ def get_image_features( def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -175,7 +175,7 @@ def forward( output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, - image_sizes: torch.Tensor = None, + image_sizes: Optional[torch.Tensor] = None, **kwargs: Unpack[TransformersKwargs], ) -> Union[tuple, Mistral3ModelOutputWithPast]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -244,8 +244,8 @@ def get_image_features( def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py index 8b0715787545..6fa3f443c53b 100644 --- a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +++ b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py @@ -172,7 +172,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py index 7f261c5c45c3..eb6e6388bff4 100644 --- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py @@ -206,7 +206,7 @@ def _preprocess( do_center_crop: bool, do_normalize: bool, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, rescale_factor: Optional[float] = None, crop_size: Optional[dict[str, int]] = None, image_mean: Optional[Union[float, list[float]]] = None, @@ -235,7 +235,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_center_crop: Optional[bool] = None, @@ -326,7 +326,7 @@ def preprocess( segmentation_maps: Optional[ImageInput] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/mobilevit/image_processing_mobilevit.py b/src/transformers/models/mobilevit/image_processing_mobilevit.py index 1207ec0da399..5411023c3104 100644 --- a/src/transformers/models/mobilevit/image_processing_mobilevit.py +++ b/src/transformers/models/mobilevit/image_processing_mobilevit.py @@ -217,7 +217,7 @@ def _preprocess( do_center_crop: bool, do_flip_channel_order: bool, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, rescale_factor: Optional[float] = None, crop_size: Optional[dict[str, int]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -244,7 +244,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_center_crop: Optional[bool] = None, @@ -329,7 +329,7 @@ def preprocess( segmentation_maps: Optional[ImageInput] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_center_crop: Optional[bool] = None, diff --git a/src/transformers/models/musicgen/modeling_musicgen.py b/src/transformers/models/musicgen/modeling_musicgen.py index 8a66f9e13912..3860632d7306 100644 --- a/src/transformers/models/musicgen/modeling_musicgen.py +++ b/src/transformers/models/musicgen/modeling_musicgen.py @@ -85,7 +85,7 @@ class MusicgenUnconditionalInput(ModelOutput): from the prompts) and the unconditional logits (predicted without prompts). """ - encoder_outputs: tuple[torch.FloatTensor] = None + encoder_outputs: Optional[tuple[torch.FloatTensor]] = None attention_mask: Optional[torch.LongTensor] = None guidance_scale: Optional[float] = None @@ -1931,7 +1931,7 @@ def _prepare_decoder_input_ids_for_generation( model_kwargs: dict[str, torch.Tensor], decoder_start_token_id: Optional[int] = None, bos_token_id: Optional[int] = None, - device: torch.device = None, + device: Optional[torch.device] = None, ) -> tuple[torch.LongTensor, dict[str, torch.Tensor]]: """Prepares `decoder_input_ids` for generation with encoder-decoder models""" diff --git a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py index 58c012a1cfb9..2c5e53fd8910 100644 --- a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py @@ -1815,7 +1815,7 @@ def _prepare_decoder_input_ids_for_generation( model_kwargs: dict[str, torch.Tensor], decoder_start_token_id: Optional[int] = None, bos_token_id: Optional[int] = None, - device: torch.device = None, + device: Optional[torch.device] = None, ) -> tuple[torch.LongTensor, dict[str, torch.Tensor]]: """Prepares `decoder_input_ids` for generation with encoder-decoder models""" diff --git a/src/transformers/models/mvp/modeling_mvp.py b/src/transformers/models/mvp/modeling_mvp.py index a0a0aa7af18c..8dd74ded9bde 100644 --- a/src/transformers/models/mvp/modeling_mvp.py +++ b/src/transformers/models/mvp/modeling_mvp.py @@ -77,7 +77,9 @@ def __init__(self, num_embeddings: int, embedding_dim: int): self.offset = 2 super().__init__(num_embeddings + self.offset, embedding_dim) - def forward(self, input_ids: torch.Tensor, past_key_values_length: int = 0, position_ids: torch.Tensor = None): + def forward( + self, input_ids: torch.Tensor, past_key_values_length: int = 0, position_ids: Optional[torch.Tensor] = None + ): """`input_ids' shape is expected to be [bsz x seqlen].""" if position_ids is None: diff --git a/src/transformers/models/nougat/image_processing_nougat.py b/src/transformers/models/nougat/image_processing_nougat.py index 79aa421d3864..ecbea1b10b78 100644 --- a/src/transformers/models/nougat/image_processing_nougat.py +++ b/src/transformers/models/nougat/image_processing_nougat.py @@ -371,7 +371,7 @@ def preprocess( do_crop_margin: Optional[bool] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_thumbnail: Optional[bool] = None, do_align_long_axis: Optional[bool] = None, do_pad: Optional[bool] = None, diff --git a/src/transformers/models/nougat/image_processing_nougat_fast.py b/src/transformers/models/nougat/image_processing_nougat_fast.py index 29e1d6e21758..136d7f171575 100644 --- a/src/transformers/models/nougat/image_processing_nougat_fast.py +++ b/src/transformers/models/nougat/image_processing_nougat_fast.py @@ -241,7 +241,7 @@ def resize( self, image: "torch.Tensor", size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": diff --git a/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py b/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py index 966be71d700b..66fd18abf32c 100644 --- a/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py +++ b/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py @@ -59,7 +59,7 @@ class OmDetTurboEncoderOutput(ModelOutput): last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None - extracted_states: tuple[torch.FloatTensor] = None + extracted_states: Optional[tuple[torch.FloatTensor]] = None @dataclass @@ -92,7 +92,7 @@ class OmDetTurboDecoderOutput(ModelOutput): decoder_coords: Optional[torch.FloatTensor] = None decoder_classes: Optional[torch.FloatTensor] = None encoder_coord_logits: Optional[torch.FloatTensor] = None - encoder_class_logits: tuple[torch.FloatTensor] = None + encoder_class_logits: Optional[tuple[torch.FloatTensor]] = None init_reference_points: Optional[torch.FloatTensor] = None intermediate_reference_points: tuple[tuple[torch.FloatTensor]] = None @@ -147,7 +147,7 @@ class OmDetTurboObjectDetectionOutput(ModelOutput): init_reference_points: Optional[torch.FloatTensor] = None intermediate_reference_points: Optional[tuple[tuple[torch.FloatTensor]]] = None encoder_coord_logits: Optional[torch.FloatTensor] = None - encoder_class_logits: tuple[torch.FloatTensor] = None + encoder_class_logits: Optional[tuple[torch.FloatTensor]] = None encoder_extracted_states: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[tuple[torch.FloatTensor]] = None decoder_attentions: Optional[tuple[tuple[torch.FloatTensor]]] = None diff --git a/src/transformers/models/omdet_turbo/processing_omdet_turbo.py b/src/transformers/models/omdet_turbo/processing_omdet_turbo.py index 6a4786729b53..0c4cfd40eb62 100644 --- a/src/transformers/models/omdet_turbo/processing_omdet_turbo.py +++ b/src/transformers/models/omdet_turbo/processing_omdet_turbo.py @@ -225,7 +225,7 @@ def __init__(self, image_processor, tokenizer): def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Optional[Union[list[str], list[list[str]]]] = None, audio=None, videos=None, diff --git a/src/transformers/models/oneformer/image_processing_oneformer.py b/src/transformers/models/oneformer/image_processing_oneformer.py index d88b373418c6..615c71593062 100644 --- a/src/transformers/models/oneformer/image_processing_oneformer.py +++ b/src/transformers/models/oneformer/image_processing_oneformer.py @@ -571,7 +571,7 @@ def _preprocess( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -592,7 +592,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -671,7 +671,7 @@ def preprocess( instance_id_to_semantic_id: Optional[dict[int, int]] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -948,7 +948,7 @@ def encode_inputs( self, pixel_values_list: list[ImageInput], task_inputs: list[str], - segmentation_maps: ImageInput = None, + segmentation_maps: Optional[ImageInput] = None, instance_id_to_semantic_id: Optional[Union[list[dict[int, int]], dict[int, int]]] = None, ignore_index: Optional[int] = None, do_reduce_labels: bool = False, diff --git a/src/transformers/models/oneformer/modeling_oneformer.py b/src/transformers/models/oneformer/modeling_oneformer.py index 8c2e633e11bc..a5336f6fc490 100644 --- a/src/transformers/models/oneformer/modeling_oneformer.py +++ b/src/transformers/models/oneformer/modeling_oneformer.py @@ -781,7 +781,7 @@ class OneFormerPixelDecoderOutput(ModelOutput): or when `config.output_attentions=True` """ - multi_scale_features: tuple[torch.FloatTensor] = None + multi_scale_features: Optional[tuple[torch.FloatTensor]] = None mask_features: Optional[torch.FloatTensor] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -806,8 +806,8 @@ class OneFormerPixelLevelModuleOutput(ModelOutput): 1/4 scale features from the last Pixel Decoder Layer. """ - encoder_features: list[torch.FloatTensor] = None - decoder_features: list[torch.FloatTensor] = None + encoder_features: Optional[list[torch.FloatTensor]] = None + decoder_features: Optional[list[torch.FloatTensor]] = None decoder_last_feature: Optional[torch.FloatTensor] = None diff --git a/src/transformers/models/ovis2/image_processing_ovis2.py b/src/transformers/models/ovis2/image_processing_ovis2.py index 53c7a250a62e..bd6d63e83914 100644 --- a/src/transformers/models/ovis2/image_processing_ovis2.py +++ b/src/transformers/models/ovis2/image_processing_ovis2.py @@ -316,7 +316,7 @@ def preprocess( crop_to_patches: Optional[bool] = None, min_patches: Optional[int] = None, max_patches: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -481,7 +481,7 @@ def crop_image_to_patches( max_patches: int, use_covering_area_grid: bool = True, patch_size: Optional[Union[tuple, int, dict]] = None, - data_format: ChannelDimension = None, + data_format: Optional[ChannelDimension] = None, covering_threshold: float = 0.9, ): """ diff --git a/src/transformers/models/ovis2/modeling_ovis2.py b/src/transformers/models/ovis2/modeling_ovis2.py index 8390f66ff39b..6f6e95891609 100644 --- a/src/transformers/models/ovis2/modeling_ovis2.py +++ b/src/transformers/models/ovis2/modeling_ovis2.py @@ -592,8 +592,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[list[torch.FloatTensor]] = None, @@ -713,8 +713,8 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[list[torch.FloatTensor]] = None, diff --git a/src/transformers/models/ovis2/modular_ovis2.py b/src/transformers/models/ovis2/modular_ovis2.py index 4163d62be5e5..6856be8feb4f 100644 --- a/src/transformers/models/ovis2/modular_ovis2.py +++ b/src/transformers/models/ovis2/modular_ovis2.py @@ -253,8 +253,8 @@ def get_image_features( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[list[torch.FloatTensor]] = None, @@ -347,8 +347,8 @@ def get_image_features(self, pixel_values: torch.FloatTensor): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[list[torch.FloatTensor]] = None, diff --git a/src/transformers/models/ovis2/processing_ovis2.py b/src/transformers/models/ovis2/processing_ovis2.py index dcc9b25d7b34..662ff15d101a 100644 --- a/src/transformers/models/ovis2/processing_ovis2.py +++ b/src/transformers/models/ovis2/processing_ovis2.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Union +from typing import Optional, Union from ...feature_extraction_utils import BatchFeature from ...image_utils import ImageInput @@ -78,7 +78,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, **kwargs: Unpack[Ovis2ProcessorKwargs], ) -> BatchFeature: diff --git a/src/transformers/models/owlvit/image_processing_owlvit.py b/src/transformers/models/owlvit/image_processing_owlvit.py index 8040248039d4..cc9c6cfdeaa8 100644 --- a/src/transformers/models/owlvit/image_processing_owlvit.py +++ b/src/transformers/models/owlvit/image_processing_owlvit.py @@ -307,7 +307,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/paligemma/modeling_paligemma.py b/src/transformers/models/paligemma/modeling_paligemma.py index 313a47b1dd59..1ae480913ca1 100644 --- a/src/transformers/models/paligemma/modeling_paligemma.py +++ b/src/transformers/models/paligemma/modeling_paligemma.py @@ -276,8 +276,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None, @@ -436,8 +436,8 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[list[torch.FloatTensor], Cache]] = None, diff --git a/src/transformers/models/paligemma/processing_paligemma.py b/src/transformers/models/paligemma/processing_paligemma.py index 7ab447049800..3bb81d86e1aa 100644 --- a/src/transformers/models/paligemma/processing_paligemma.py +++ b/src/transformers/models/paligemma/processing_paligemma.py @@ -152,7 +152,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/perceiver/image_processing_perceiver.py b/src/transformers/models/perceiver/image_processing_perceiver.py index 850671324712..c66d7b51d463 100644 --- a/src/transformers/models/perceiver/image_processing_perceiver.py +++ b/src/transformers/models/perceiver/image_processing_perceiver.py @@ -217,7 +217,7 @@ def preprocess( crop_size: Optional[dict[str, int]] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/perception_lm/modeling_perception_lm.py b/src/transformers/models/perception_lm/modeling_perception_lm.py index 3032cc7e195e..036dc00aa55c 100644 --- a/src/transformers/models/perception_lm/modeling_perception_lm.py +++ b/src/transformers/models/perception_lm/modeling_perception_lm.py @@ -211,8 +211,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is diff --git a/src/transformers/models/perception_lm/modular_perception_lm.py b/src/transformers/models/perception_lm/modular_perception_lm.py index 9affbd3ec5dc..ef259f889f4f 100644 --- a/src/transformers/models/perception_lm/modular_perception_lm.py +++ b/src/transformers/models/perception_lm/modular_perception_lm.py @@ -172,8 +172,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is diff --git a/src/transformers/models/perception_lm/processing_perception_lm.py b/src/transformers/models/perception_lm/processing_perception_lm.py index 5af18aa155e2..f61c54554d32 100644 --- a/src/transformers/models/perception_lm/processing_perception_lm.py +++ b/src/transformers/models/perception_lm/processing_perception_lm.py @@ -16,7 +16,7 @@ """ from collections.abc import Iterable -from typing import Union +from typing import Optional, Union import numpy as np @@ -87,10 +87,10 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, **kwargs: Unpack[PerceptionLMProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/pixtral/image_processing_pixtral.py b/src/transformers/models/pixtral/image_processing_pixtral.py index 6022560ba01a..c6c6fdb163ab 100644 --- a/src/transformers/models/pixtral/image_processing_pixtral.py +++ b/src/transformers/models/pixtral/image_processing_pixtral.py @@ -322,7 +322,7 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, patch_size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/pixtral/image_processing_pixtral_fast.py b/src/transformers/models/pixtral/image_processing_pixtral_fast.py index 82996120786a..5d42bb097476 100644 --- a/src/transformers/models/pixtral/image_processing_pixtral_fast.py +++ b/src/transformers/models/pixtral/image_processing_pixtral_fast.py @@ -89,7 +89,7 @@ def resize( image: torch.Tensor, size: SizeDict, patch_size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> torch.Tensor: """ diff --git a/src/transformers/models/pixtral/processing_pixtral.py b/src/transformers/models/pixtral/processing_pixtral.py index 42edbe24f1f5..bf4d3a736367 100644 --- a/src/transformers/models/pixtral/processing_pixtral.py +++ b/src/transformers/models/pixtral/processing_pixtral.py @@ -16,7 +16,7 @@ Processor class for Pixtral. """ -from typing import Union +from typing import Optional, Union import numpy as np @@ -118,7 +118,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/plbart/modeling_plbart.py b/src/transformers/models/plbart/modeling_plbart.py index cb29bfd0cec1..38085a72264c 100644 --- a/src/transformers/models/plbart/modeling_plbart.py +++ b/src/transformers/models/plbart/modeling_plbart.py @@ -285,7 +285,9 @@ def __init__(self, num_embeddings: int, embedding_dim: int): self.offset = 2 super().__init__(num_embeddings + self.offset, embedding_dim) - def forward(self, input_ids: torch.Tensor, past_key_values_length: int = 0, position_ids: torch.Tensor = None): + def forward( + self, input_ids: torch.Tensor, past_key_values_length: int = 0, position_ids: Optional[torch.Tensor] = None + ): """`input_ids' shape is expected to be [bsz x seqlen].""" if position_ids is None: diff --git a/src/transformers/models/poolformer/image_processing_poolformer.py b/src/transformers/models/poolformer/image_processing_poolformer.py index c11a3fddf4a4..ee5500c823cc 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer.py +++ b/src/transformers/models/poolformer/image_processing_poolformer.py @@ -216,7 +216,7 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, crop_pct: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/poolformer/image_processing_poolformer_fast.py b/src/transformers/models/poolformer/image_processing_poolformer_fast.py index 21895c994035..8fefa80be432 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer_fast.py +++ b/src/transformers/models/poolformer/image_processing_poolformer_fast.py @@ -89,7 +89,7 @@ def resize( image: "torch.Tensor", size: SizeDict, crop_pct: Optional[float] = None, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": diff --git a/src/transformers/models/pvt/image_processing_pvt.py b/src/transformers/models/pvt/image_processing_pvt.py index d0c594e47e36..9f687fe7548f 100644 --- a/src/transformers/models/pvt/image_processing_pvt.py +++ b/src/transformers/models/pvt/image_processing_pvt.py @@ -151,7 +151,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py index e8419bcf40c4..51f9440001d6 100644 --- a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py @@ -1761,8 +1761,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -2069,12 +2069,12 @@ class Qwen2_5OmniTalkerCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None rope_deltas: Optional[torch.LongTensor] = None - thinker_reply_part: torch.FloatTensor = None + thinker_reply_part: Optional[torch.FloatTensor] = None @auto_docstring @@ -2271,7 +2271,7 @@ def set_input_embeddings(self, value): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py index eb5679194b90..260ead04b76c 100644 --- a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py @@ -2209,8 +2209,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -2517,12 +2517,12 @@ class Qwen2_5OmniTalkerCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None rope_deltas: Optional[torch.LongTensor] = None - thinker_reply_part: torch.FloatTensor = None + thinker_reply_part: Optional[torch.FloatTensor] = None class Qwen2_5OmniTalkerModel(Qwen2_5_VLTextModel): @@ -2570,7 +2570,7 @@ def set_input_embeddings(self, value): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py index e7f2f3f5b66f..45d8cacddeb2 100644 --- a/src/transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py @@ -113,9 +113,9 @@ def __init__( def __call__( self, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - images: ImageInput = None, - videos: VideoInput = None, - audio: AudioInput = None, + images: Optional[ImageInput] = None, + videos: Optional[VideoInput] = None, + audio: Optional[AudioInput] = None, **kwargs: Unpack[Qwen2_5OmniProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py index 91e24f4fa8cd..bc48a879c800 100644 --- a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py @@ -493,7 +493,7 @@ class Qwen2_5_VLModelOutputWithPast(ModelOutput): The rope index difference between sequence length and multimodal rope. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -1188,8 +1188,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -1227,7 +1227,7 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1417,7 +1417,7 @@ def visual(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py index ada13b315c23..859f51cd6100 100644 --- a/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py @@ -540,7 +540,7 @@ def get_rope_index( def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -661,7 +661,7 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2VLForConditionalGeneration): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -885,9 +885,9 @@ def model_input_names(self): def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, **kwargs: Unpack[Qwen2_5_VLProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py index 25dead191c7b..5cfb07353626 100644 --- a/src/transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py @@ -97,9 +97,9 @@ def __init__(self, image_processor=None, tokenizer=None, video_processor=None, c def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, **kwargs: Unpack[Qwen2_5_VLProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py index cfc76f495f83..552b289f58c0 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py @@ -167,7 +167,7 @@ def _preprocess( images: Union[ImageInput, VideoInput], do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -299,12 +299,12 @@ def _preprocess( def preprocess( self, images: ImageInput, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, min_pixels: Optional[int] = None, max_pixels: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py b/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py index 745829856db0..83200bcb904a 100644 --- a/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py @@ -73,7 +73,7 @@ class Qwen2VLModelOutputWithPast(ModelOutput): The rope index difference between sequence length and multimodal rope. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -1123,8 +1123,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -1162,7 +1162,7 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1302,7 +1302,7 @@ def visual(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/qwen2_vl/processing_qwen2_vl.py b/src/transformers/models/qwen2_vl/processing_qwen2_vl.py index 1c6fe1d22f9b..a065acc126d1 100644 --- a/src/transformers/models/qwen2_vl/processing_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/processing_qwen2_vl.py @@ -92,9 +92,9 @@ def __init__(self, image_processor=None, tokenizer=None, video_processor=None, c def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, **kwargs: Unpack[Qwen2VLProcessorKwargs], ) -> BatchFeature: """ diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py index 73efff36fbcc..9927a8d02209 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py @@ -194,7 +194,7 @@ def resize( self, image: torch.Tensor, size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> torch.Tensor: """ @@ -250,7 +250,7 @@ def resize_annotation( orig_size: tuple[int, int], target_size: tuple[int, int], threshold: float = 0.5, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, ): """ Resizes an annotation to a target size. diff --git a/src/transformers/models/sam/image_processing_sam.py b/src/transformers/models/sam/image_processing_sam.py index ea149b256f7b..33a3661c5e6d 100644 --- a/src/transformers/models/sam/image_processing_sam.py +++ b/src/transformers/models/sam/image_processing_sam.py @@ -267,7 +267,7 @@ def _preprocess( do_rescale: bool, do_normalize: bool, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, rescale_factor: Optional[float] = None, image_mean: Optional[Union[float, list[float]]] = None, image_std: Optional[Union[float, list[float]]] = None, @@ -295,7 +295,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/sam2/modeling_sam2.py b/src/transformers/models/sam2/modeling_sam2.py index 4d27c1c323cd..20ea1d5e6230 100644 --- a/src/transformers/models/sam2/modeling_sam2.py +++ b/src/transformers/models/sam2/modeling_sam2.py @@ -74,7 +74,7 @@ class Sam2VisionEncoderOutput(ModelOutput): the self-attention heads. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None fpn_hidden_states: Optional[torch.FloatTensor] = None fpn_position_encoding: Optional[torch.FloatTensor] = None hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None @@ -106,9 +106,9 @@ class Sam2ImageSegmentationOutput(ModelOutput): Attentions weights of the mask decoder. """ - iou_scores: torch.FloatTensor = None - pred_masks: torch.FloatTensor = None - object_score_logits: torch.FloatTensor = None + iou_scores: Optional[torch.FloatTensor] = None + pred_masks: Optional[torch.FloatTensor] = None + object_score_logits: Optional[torch.FloatTensor] = None image_embeddings: tuple[torch.FloatTensor, ...] = None vision_hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None vision_attentions: Optional[tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/sam2/modular_sam2.py b/src/transformers/models/sam2/modular_sam2.py index 9d4602e31239..647acde0dee9 100644 --- a/src/transformers/models/sam2/modular_sam2.py +++ b/src/transformers/models/sam2/modular_sam2.py @@ -314,7 +314,7 @@ class Sam2VisionEncoderOutput(ModelOutput): the self-attention heads. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None fpn_hidden_states: Optional[torch.FloatTensor] = None fpn_position_encoding: Optional[torch.FloatTensor] = None hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None @@ -346,9 +346,9 @@ class Sam2ImageSegmentationOutput(ModelOutput): Attentions weights of the mask decoder. """ - iou_scores: torch.FloatTensor = None - pred_masks: torch.FloatTensor = None - object_score_logits: torch.FloatTensor = None + iou_scores: Optional[torch.FloatTensor] = None + pred_masks: Optional[torch.FloatTensor] = None + object_score_logits: Optional[torch.FloatTensor] = None image_embeddings: tuple[torch.FloatTensor, ...] = None vision_hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None vision_attentions: Optional[tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/sam2/processing_sam2.py b/src/transformers/models/sam2/processing_sam2.py index 2c4f0a9b7df3..5f147aab8dfa 100644 --- a/src/transformers/models/sam2/processing_sam2.py +++ b/src/transformers/models/sam2/processing_sam2.py @@ -62,8 +62,8 @@ def __init__(self, image_processor, target_size: Optional[int] = None, point_pad def __call__( self, - images: ImageInput = None, - segmentation_maps: ImageInput = None, + images: Optional[ImageInput] = None, + segmentation_maps: Optional[ImageInput] = None, input_points: Optional[Union[list[list[list[list[float]]]], torch.Tensor]] = None, input_labels: Optional[Union[list[list[list[int]]], torch.Tensor]] = None, input_boxes: Optional[Union[list[list[list[float]]], torch.Tensor]] = None, diff --git a/src/transformers/models/sam2_video/modeling_sam2_video.py b/src/transformers/models/sam2_video/modeling_sam2_video.py index 4325018cf477..6982921fbef5 100644 --- a/src/transformers/models/sam2_video/modeling_sam2_video.py +++ b/src/transformers/models/sam2_video/modeling_sam2_video.py @@ -125,7 +125,7 @@ class Sam2VideoInferenceSession: def __init__( self, - video: torch.FloatTensor = None, + video: Optional[torch.FloatTensor] = None, video_height: Optional[int] = None, video_width: Optional[int] = None, inference_device: Union[torch.device, str] = "cpu", @@ -628,16 +628,16 @@ class Sam2VideoImageSegmentationOutput(ModelOutput): A tensor representing the object pointer, used for tracking in videos. Only used for Sam2VideoModel. """ - iou_scores: torch.FloatTensor = None - pred_masks: torch.FloatTensor = None - object_score_logits: torch.FloatTensor = None + iou_scores: Optional[torch.FloatTensor] = None + pred_masks: Optional[torch.FloatTensor] = None + object_score_logits: Optional[torch.FloatTensor] = None image_embeddings: tuple[torch.FloatTensor, ...] = None vision_hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None vision_attentions: Optional[tuple[torch.FloatTensor, ...]] = None mask_decoder_attentions: Optional[tuple[torch.FloatTensor, ...]] = None - high_res_masks: torch.FloatTensor = None - object_pointer: torch.FloatTensor = None + high_res_masks: Optional[torch.FloatTensor] = None + object_pointer: Optional[torch.FloatTensor] = None @dataclass @@ -650,8 +650,8 @@ class Sam2VideoSegmentationOutput(ModelOutput): The frame index of the video. """ - pred_masks: torch.FloatTensor = None - frame_idx: int = None + pred_masks: Optional[torch.FloatTensor] = None + frame_idx: Optional[int] = None @auto_docstring @@ -1123,7 +1123,7 @@ class Sam2VideoVisionEncoderOutput(ModelOutput): the self-attention heads. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None fpn_hidden_states: Optional[torch.FloatTensor] = None fpn_position_encoding: Optional[torch.FloatTensor] = None hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/sam2_video/modular_sam2_video.py b/src/transformers/models/sam2_video/modular_sam2_video.py index 0b6cef4e910a..bc569aec2811 100644 --- a/src/transformers/models/sam2_video/modular_sam2_video.py +++ b/src/transformers/models/sam2_video/modular_sam2_video.py @@ -401,7 +401,7 @@ class Sam2VideoInferenceSession: def __init__( self, - video: torch.FloatTensor = None, + video: Optional[torch.FloatTensor] = None, video_height: Optional[int] = None, video_width: Optional[int] = None, inference_device: Union[torch.device, str] = "cpu", @@ -974,8 +974,8 @@ class Sam2VideoImageSegmentationOutput(Sam2ImageSegmentationOutput): A tensor representing the object pointer, used for tracking in videos. Only used for Sam2VideoModel. """ - high_res_masks: torch.FloatTensor = None - object_pointer: torch.FloatTensor = None + high_res_masks: Optional[torch.FloatTensor] = None + object_pointer: Optional[torch.FloatTensor] = None @dataclass @@ -988,8 +988,8 @@ class Sam2VideoSegmentationOutput(ModelOutput): The frame index of the video. """ - pred_masks: torch.FloatTensor = None - frame_idx: int = None + pred_masks: Optional[torch.FloatTensor] = None + frame_idx: Optional[int] = None @auto_docstring diff --git a/src/transformers/models/sam2_video/processing_sam2_video.py b/src/transformers/models/sam2_video/processing_sam2_video.py index e9c385b02ee4..7588cf256788 100644 --- a/src/transformers/models/sam2_video/processing_sam2_video.py +++ b/src/transformers/models/sam2_video/processing_sam2_video.py @@ -66,8 +66,8 @@ def __init__( def __call__( self, - images: ImageInput = None, - segmentation_maps: ImageInput = None, + images: Optional[ImageInput] = None, + segmentation_maps: Optional[ImageInput] = None, input_points: Optional[Union[list[list[list[list[float]]]], torch.Tensor]] = None, input_labels: Optional[Union[list[list[list[int]]], torch.Tensor]] = None, input_boxes: Optional[Union[list[list[list[float]]], torch.Tensor]] = None, diff --git a/src/transformers/models/segformer/image_processing_segformer.py b/src/transformers/models/segformer/image_processing_segformer.py index a651795ca0cf..46e66babe4de 100644 --- a/src/transformers/models/segformer/image_processing_segformer.py +++ b/src/transformers/models/segformer/image_processing_segformer.py @@ -186,7 +186,7 @@ def _preprocess( do_rescale: bool, do_normalize: bool, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, rescale_factor: Optional[float] = None, image_mean: Optional[Union[float, list[float]]] = None, image_std: Optional[Union[float, list[float]]] = None, @@ -211,7 +211,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -299,7 +299,7 @@ def preprocess( segmentation_maps: Optional[ImageInput] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/seggpt/image_processing_seggpt.py b/src/transformers/models/seggpt/image_processing_seggpt.py index c60cad80d7fd..ffadfaf85edb 100644 --- a/src/transformers/models/seggpt/image_processing_seggpt.py +++ b/src/transformers/models/seggpt/image_processing_seggpt.py @@ -245,7 +245,7 @@ def _preprocess_step( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -392,7 +392,7 @@ def preprocess( prompt_masks: Optional[ImageInput] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/shieldgemma2/processing_shieldgemma2.py b/src/transformers/models/shieldgemma2/processing_shieldgemma2.py index 412f231bef91..4341d087361e 100644 --- a/src/transformers/models/shieldgemma2/processing_shieldgemma2.py +++ b/src/transformers/models/shieldgemma2/processing_shieldgemma2.py @@ -85,7 +85,7 @@ def __init__( def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text=None, videos=None, audio=None, diff --git a/src/transformers/models/siglip/image_processing_siglip.py b/src/transformers/models/siglip/image_processing_siglip.py index 152447bb5b87..0ffed5258de5 100644 --- a/src/transformers/models/siglip/image_processing_siglip.py +++ b/src/transformers/models/siglip/image_processing_siglip.py @@ -113,7 +113,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/siglip/processing_siglip.py b/src/transformers/models/siglip/processing_siglip.py index 8326838e20d3..c5e5ded61113 100644 --- a/src/transformers/models/siglip/processing_siglip.py +++ b/src/transformers/models/siglip/processing_siglip.py @@ -49,7 +49,7 @@ def __init__(self, image_processor, tokenizer): def __call__( self, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - images: ImageInput = None, + images: Optional[ImageInput] = None, padding: Union[bool, str, PaddingStrategy] = False, truncation: Union[bool, str, TruncationStrategy] = None, max_length: Optional[int] = None, diff --git a/src/transformers/models/smolvlm/image_processing_smolvlm.py b/src/transformers/models/smolvlm/image_processing_smolvlm.py index 12ae2cfb8e82..c08339b81732 100644 --- a/src/transformers/models/smolvlm/image_processing_smolvlm.py +++ b/src/transformers/models/smolvlm/image_processing_smolvlm.py @@ -605,7 +605,7 @@ def preprocess( do_convert_rgb: Optional[bool] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_image_splitting: Optional[bool] = None, do_rescale: Optional[bool] = None, max_image_size: Optional[dict[str, int]] = None, diff --git a/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py b/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py index a070cd87bf86..6f4bbd209bca 100644 --- a/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py +++ b/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py @@ -205,7 +205,7 @@ def resize( self, image: "torch.Tensor", size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": @@ -244,7 +244,7 @@ def split_images( self, images: torch.Tensor, max_image_size: dict[str, int], - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, ): """ Split an image into squares of side max_image_size and the original image resized to max_image_size. @@ -303,7 +303,7 @@ def resize_for_vision_encoder( self, image: torch.Tensor, vision_encoder_max_size: int, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, ): """ Resize images to be multiples of `vision_encoder_max_size` while preserving the aspect ratio. diff --git a/src/transformers/models/smolvlm/modeling_smolvlm.py b/src/transformers/models/smolvlm/modeling_smolvlm.py index 812b4df5d9c4..f0928c2cccdc 100644 --- a/src/transformers/models/smolvlm/modeling_smolvlm.py +++ b/src/transformers/models/smolvlm/modeling_smolvlm.py @@ -576,7 +576,9 @@ def inputs_merger( merged_embeds = torch.where(image_mask.unsqueeze(-1), image_embeds, inputs_embeds) return merged_embeds - def get_image_features(self, pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None): + def get_image_features( + self, pixel_values: torch.FloatTensor, pixel_attention_mask: Optional[torch.LongTensor] = None + ): """ Encodes images into continuous embeddings that can be forwarded to the language model. @@ -799,7 +801,9 @@ def get_input_embeddings(self): def set_input_embeddings(self, value): self.model.text_model.set_input_embeddings(value) - def get_image_features(self, pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None): + def get_image_features( + self, pixel_values: torch.FloatTensor, pixel_attention_mask: Optional[torch.LongTensor] = None + ): return self.model.get_image_features(pixel_values=pixel_values, pixel_attention_mask=pixel_attention_mask) @can_return_tuple diff --git a/src/transformers/models/smolvlm/modular_smolvlm.py b/src/transformers/models/smolvlm/modular_smolvlm.py index 67bea742532a..25d55b1a974a 100644 --- a/src/transformers/models/smolvlm/modular_smolvlm.py +++ b/src/transformers/models/smolvlm/modular_smolvlm.py @@ -195,7 +195,9 @@ def inputs_merger( merged_embeds = torch.where(image_mask.unsqueeze(-1), image_embeds, inputs_embeds) return merged_embeds - def get_image_features(self, pixel_values: torch.FloatTensor, pixel_attention_mask: torch.LongTensor = None): + def get_image_features( + self, pixel_values: torch.FloatTensor, pixel_attention_mask: Optional[torch.LongTensor] = None + ): """ Encodes images into continuous embeddings that can be forwarded to the language model. diff --git a/src/transformers/models/smolvlm/processing_smolvlm.py b/src/transformers/models/smolvlm/processing_smolvlm.py index 59dc75114b98..97f0eaa9e7b2 100644 --- a/src/transformers/models/smolvlm/processing_smolvlm.py +++ b/src/transformers/models/smolvlm/processing_smolvlm.py @@ -249,7 +249,7 @@ def __call__( images: Union[ImageInput, list[ImageInput], list[list[ImageInput]]] = None, text: Union[TextInput, "PreTokenizedInput", list[TextInput], list["PreTokenizedInput"]] = None, audio=None, - videos: VideoInput = None, + videos: Optional[VideoInput] = None, **kwargs: Unpack[SmolVLMProcessorKwargs], ) -> BatchEncoding: """ diff --git a/src/transformers/models/smolvlm/video_processing_smolvlm.py b/src/transformers/models/smolvlm/video_processing_smolvlm.py index 020f2d4c8e93..5ad70d870c63 100644 --- a/src/transformers/models/smolvlm/video_processing_smolvlm.py +++ b/src/transformers/models/smolvlm/video_processing_smolvlm.py @@ -156,7 +156,7 @@ def resize( self, video: "torch.Tensor", size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": diff --git a/src/transformers/models/superglue/image_processing_superglue.py b/src/transformers/models/superglue/image_processing_superglue.py index f02e2a9f65c0..bde3355d78ed 100644 --- a/src/transformers/models/superglue/image_processing_superglue.py +++ b/src/transformers/models/superglue/image_processing_superglue.py @@ -226,7 +226,7 @@ def preprocess( images, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_grayscale: Optional[bool] = None, diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index 7894e34e227d..4c895b035feb 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -187,7 +187,7 @@ def preprocess( images, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_grayscale: Optional[bool] = None, diff --git a/src/transformers/models/textnet/image_processing_textnet.py b/src/transformers/models/textnet/image_processing_textnet.py index cff4b991d8f3..153e29785289 100644 --- a/src/transformers/models/textnet/image_processing_textnet.py +++ b/src/transformers/models/textnet/image_processing_textnet.py @@ -206,7 +206,7 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, size_divisor: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/textnet/image_processing_textnet_fast.py b/src/transformers/models/textnet/image_processing_textnet_fast.py index f6a163c42687..41b201a5c4ee 100644 --- a/src/transformers/models/textnet/image_processing_textnet_fast.py +++ b/src/transformers/models/textnet/image_processing_textnet_fast.py @@ -87,7 +87,7 @@ def resize( self, image: "torch.Tensor", size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, size_divisor: int = 32, **kwargs, diff --git a/src/transformers/models/trocr/modeling_trocr.py b/src/transformers/models/trocr/modeling_trocr.py index 6bf7bfaf533c..83eb51b43444 100644 --- a/src/transformers/models/trocr/modeling_trocr.py +++ b/src/transformers/models/trocr/modeling_trocr.py @@ -51,7 +51,9 @@ def __init__(self, num_embeddings: int, embedding_dim: int): self.offset = 2 super().__init__(num_embeddings + self.offset, embedding_dim) - def forward(self, input_ids: torch.Tensor, past_key_values_length: int = 0, position_ids: torch.Tensor = None): + def forward( + self, input_ids: torch.Tensor, past_key_values_length: int = 0, position_ids: Optional[torch.Tensor] = None + ): """`input_ids' shape is expected to be [bsz x seqlen].""" if position_ids is None: diff --git a/src/transformers/models/trocr/processing_trocr.py b/src/transformers/models/trocr/processing_trocr.py index 52cd1dc041db..d6469b70c1a3 100644 --- a/src/transformers/models/trocr/processing_trocr.py +++ b/src/transformers/models/trocr/processing_trocr.py @@ -18,7 +18,7 @@ import warnings from contextlib import contextmanager -from typing import Union +from typing import Optional, Union from ...image_processing_utils import BatchFeature from ...image_utils import ImageInput @@ -71,7 +71,7 @@ def __init__(self, image_processor=None, tokenizer=None, **kwargs): def __call__( self, - images: ImageInput = None, + images: Optional[ImageInput] = None, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, audio=None, videos=None, diff --git a/src/transformers/models/tvp/image_processing_tvp.py b/src/transformers/models/tvp/image_processing_tvp.py index d906c5e6bc70..7fa758b6f484 100644 --- a/src/transformers/models/tvp/image_processing_tvp.py +++ b/src/transformers/models/tvp/image_processing_tvp.py @@ -269,7 +269,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, @@ -343,7 +343,7 @@ def preprocess( videos: Union[ImageInput, list[ImageInput], list[list[ImageInput]]], do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/tvp/image_processing_tvp_fast.py b/src/transformers/models/tvp/image_processing_tvp_fast.py index 90c39ea49fb0..a3bad696c36d 100644 --- a/src/transformers/models/tvp/image_processing_tvp_fast.py +++ b/src/transformers/models/tvp/image_processing_tvp_fast.py @@ -164,7 +164,7 @@ def resize( self, image: "torch.Tensor", size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, antialias: bool = True, **kwargs, ) -> "torch.Tensor": diff --git a/src/transformers/models/video_llava/image_processing_video_llava.py b/src/transformers/models/video_llava/image_processing_video_llava.py index eb9fbfa17888..1ed8f911af8e 100644 --- a/src/transformers/models/video_llava/image_processing_video_llava.py +++ b/src/transformers/models/video_llava/image_processing_video_llava.py @@ -175,7 +175,7 @@ def preprocess( videos: Optional[list[VideoInput]] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, @@ -326,10 +326,10 @@ def preprocess( def _preprocess_image( self, - image: ImageInput = None, + image: Optional[ImageInput] = None, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/video_llava/modeling_video_llava.py b/src/transformers/models/video_llava/modeling_video_llava.py index f2a967144b53..896c357e3cd2 100644 --- a/src/transformers/models/video_llava/modeling_video_llava.py +++ b/src/transformers/models/video_llava/modeling_video_llava.py @@ -58,7 +58,7 @@ class VideoLlavaModelOutputWithPast(ModelOutput): video_hidden_states of the model produced by the vision encoder and after projecting the last hidden state. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[list[torch.FloatTensor]] = None hidden_states: Optional[tuple[torch.FloatTensor]] = None attentions: Optional[tuple[torch.FloatTensor]] = None @@ -286,8 +286,8 @@ def get_placeholder_mask( self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, - image_features: torch.FloatTensor = None, - video_features: torch.FloatTensor = None, + image_features: Optional[torch.FloatTensor] = None, + video_features: Optional[torch.FloatTensor] = None, ): """ Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is @@ -326,9 +326,9 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values_images: torch.FloatTensor = None, - pixel_values_videos: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values_images: Optional[torch.FloatTensor] = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -482,9 +482,9 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values_images: torch.FloatTensor = None, - pixel_values_videos: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values_images: Optional[torch.FloatTensor] = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/video_llava/processing_video_llava.py b/src/transformers/models/video_llava/processing_video_llava.py index 2c7119fdc2a6..8af47aeee301 100644 --- a/src/transformers/models/video_llava/processing_video_llava.py +++ b/src/transformers/models/video_llava/processing_video_llava.py @@ -90,8 +90,8 @@ def __init__( def __call__( self, text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None, - images: ImageInput = None, - videos: ImageInput = None, + images: Optional[ImageInput] = None, + videos: Optional[ImageInput] = None, padding: Union[bool, str, PaddingStrategy] = False, truncation: Union[bool, str, TruncationStrategy] = None, max_length=None, diff --git a/src/transformers/models/videomae/image_processing_videomae.py b/src/transformers/models/videomae/image_processing_videomae.py index 489506831913..96545dc75311 100644 --- a/src/transformers/models/videomae/image_processing_videomae.py +++ b/src/transformers/models/videomae/image_processing_videomae.py @@ -182,7 +182,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, @@ -240,7 +240,7 @@ def preprocess( videos: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/vilt/image_processing_vilt.py b/src/transformers/models/vilt/image_processing_vilt.py index a624d957013f..87abf7f7a7d6 100644 --- a/src/transformers/models/vilt/image_processing_vilt.py +++ b/src/transformers/models/vilt/image_processing_vilt.py @@ -344,7 +344,7 @@ def preprocess( do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, size_divisor: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/vipllava/modeling_vipllava.py b/src/transformers/models/vipllava/modeling_vipllava.py index f435e8b75fc0..6a8c6944bcb8 100644 --- a/src/transformers/models/vipllava/modeling_vipllava.py +++ b/src/transformers/models/vipllava/modeling_vipllava.py @@ -213,8 +213,8 @@ def get_placeholder_mask( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -337,8 +337,8 @@ def multi_modal_projector(self): @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/vipllava/modular_vipllava.py b/src/transformers/models/vipllava/modular_vipllava.py index a715d2e6d817..ec12cc6ee1bf 100644 --- a/src/transformers/models/vipllava/modular_vipllava.py +++ b/src/transformers/models/vipllava/modular_vipllava.py @@ -106,8 +106,8 @@ def get_image_features( @auto_docstring def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -181,8 +181,8 @@ def get_image_features( def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index 27de1076cf54..16216e2eac90 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -156,7 +156,7 @@ def preprocess( images: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/vivit/image_processing_vivit.py b/src/transformers/models/vivit/image_processing_vivit.py index 140f7fc5fe7f..ab32d5b47eef 100644 --- a/src/transformers/models/vivit/image_processing_vivit.py +++ b/src/transformers/models/vivit/image_processing_vivit.py @@ -229,7 +229,7 @@ def _preprocess_image( image: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, @@ -292,7 +292,7 @@ def preprocess( videos: ImageInput, do_resize: Optional[bool] = None, size: Optional[dict[str, int]] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, do_center_crop: Optional[bool] = None, crop_size: Optional[dict[str, int]] = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/wav2vec2/processing_wav2vec2.py b/src/transformers/models/wav2vec2/processing_wav2vec2.py index c2da5ac9e398..1dc382d6f68a 100644 --- a/src/transformers/models/wav2vec2/processing_wav2vec2.py +++ b/src/transformers/models/wav2vec2/processing_wav2vec2.py @@ -74,7 +74,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): def __call__( self, - audio: AudioInput = None, + audio: Optional[AudioInput] = None, text: Optional[Union[str, list[str], TextInput, PreTokenizedInput]] = None, images=None, videos=None, diff --git a/src/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py b/src/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py index 253ddaac6c19..ead53edb101a 100644 --- a/src/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +++ b/src/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py @@ -71,7 +71,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): def __call__( self, - audio: AudioInput = None, + audio: Optional[AudioInput] = None, text: Optional[Union[str, list[str], TextInput, PreTokenizedInput]] = None, images=None, videos=None, diff --git a/src/transformers/models/xlstm/modeling_xlstm.py b/src/transformers/models/xlstm/modeling_xlstm.py index 14f189d2f1cc..b77ec26d2b31 100644 --- a/src/transformers/models/xlstm/modeling_xlstm.py +++ b/src/transformers/models/xlstm/modeling_xlstm.py @@ -69,12 +69,12 @@ def mlstm_chunkwise_recurrent_fw_C( matV: torch.Tensor, vecB: torch.Tensor, vecI: torch.Tensor, - matC_states: torch.Tensor = None, - vecN_states: torch.Tensor = None, - scaMinter_states: torch.Tensor = None, - matC_initial: torch.Tensor = None, - vecN_initial: torch.Tensor = None, - scaMinter_initial: torch.Tensor = None, + matC_states: Optional[torch.Tensor] = None, + vecN_states: Optional[torch.Tensor] = None, + scaMinter_states: Optional[torch.Tensor] = None, + matC_initial: Optional[torch.Tensor] = None, + vecN_initial: Optional[torch.Tensor] = None, + scaMinter_initial: Optional[torch.Tensor] = None, qk_scale: Optional[float] = None, chunk_size: int = 64, num_chunks: int = 1, @@ -237,9 +237,9 @@ def mlstm_chunkwise_fw( value: torch.Tensor, igate: torch.Tensor, fgate: torch.Tensor, - cstate: torch.Tensor = None, - nstate: torch.Tensor = None, - mstate: torch.Tensor = None, + cstate: Optional[torch.Tensor] = None, + nstate: Optional[torch.Tensor] = None, + mstate: Optional[torch.Tensor] = None, qk_scale: Optional[float] = None, return_last_states: bool = False, return_all_states: bool = False, @@ -318,9 +318,9 @@ def mlstm_chunkwise_native_autograd( value: torch.Tensor, igate: torch.Tensor, fgate: torch.Tensor, - c_initial: torch.Tensor = None, - n_initial: torch.Tensor = None, - m_initial: torch.Tensor = None, + c_initial: Optional[torch.Tensor] = None, + n_initial: Optional[torch.Tensor] = None, + m_initial: Optional[torch.Tensor] = None, return_last_states: bool = False, eps: float = 1e-6, chunk_size: int = 64, @@ -446,9 +446,9 @@ def mlstm_recurrent_sequence_native( value: torch.Tensor, igate: torch.Tensor, fgate: torch.Tensor, - c_initial: torch.Tensor = None, - n_initial: torch.Tensor = None, - m_initial: torch.Tensor = None, + c_initial: Optional[torch.Tensor] = None, + n_initial: Optional[torch.Tensor] = None, + m_initial: Optional[torch.Tensor] = None, return_last_states: bool = False, eps: float = 1e-6, dtype_state: torch.dtype = torch.float32, @@ -520,9 +520,9 @@ def wrap_chunkwise_pad_zeros( value: torch.Tensor, fgate: torch.Tensor, igate: torch.Tensor, - c_initial: torch.Tensor = None, - n_initial: torch.Tensor = None, - m_initial: torch.Tensor = None, + c_initial: Optional[torch.Tensor] = None, + n_initial: Optional[torch.Tensor] = None, + m_initial: Optional[torch.Tensor] = None, return_last_states: bool = False, eps: float = 1e-6, autocast_kernel_dtype: torch.dtype = torch.bfloat16, @@ -584,9 +584,9 @@ def wrap_chunkwise_arbitrary_sequence_length( value: torch.Tensor, fgate: torch.Tensor, igate: torch.Tensor, - c_initial: torch.Tensor = None, - n_initial: torch.Tensor = None, - m_initial: torch.Tensor = None, + c_initial: Optional[torch.Tensor] = None, + n_initial: Optional[torch.Tensor] = None, + m_initial: Optional[torch.Tensor] = None, return_last_states: bool = True, eps: float = 1e-6, autocast_kernel_dtype: torch.dtype = torch.bfloat16, @@ -773,9 +773,9 @@ def forward( value: torch.Tensor, igate: torch.Tensor, fgate: torch.Tensor, - c_initial: torch.Tensor = None, - n_initial: torch.Tensor = None, - m_initial: torch.Tensor = None, + c_initial: Optional[torch.Tensor] = None, + n_initial: Optional[torch.Tensor] = None, + m_initial: Optional[torch.Tensor] = None, return_last_states: bool = False, mode: Optional[Literal["train", "inference"]] = None, ) -> Union[torch.Tensor, tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor, torch.Tensor]]]: diff --git a/src/transformers/models/yolos/image_processing_yolos_fast.py b/src/transformers/models/yolos/image_processing_yolos_fast.py index 85aeba94bfcc..4bea14b508ea 100644 --- a/src/transformers/models/yolos/image_processing_yolos_fast.py +++ b/src/transformers/models/yolos/image_processing_yolos_fast.py @@ -424,7 +424,7 @@ def resize( self, image: torch.Tensor, size: SizeDict, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, **kwargs, ) -> torch.Tensor: """ @@ -480,7 +480,7 @@ def resize_annotation( orig_size: tuple[int, int], target_size: tuple[int, int], threshold: float = 0.5, - interpolation: "F.InterpolationMode" = None, + interpolation: Optional["F.InterpolationMode"] = None, ): """ Resizes an annotation to a target size. diff --git a/src/transformers/models/zoedepth/image_processing_zoedepth.py b/src/transformers/models/zoedepth/image_processing_zoedepth.py index 90e69105be01..08727ce5a8cc 100644 --- a/src/transformers/models/zoedepth/image_processing_zoedepth.py +++ b/src/transformers/models/zoedepth/image_processing_zoedepth.py @@ -309,7 +309,7 @@ def preprocess( size: Optional[int] = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, - resample: PILImageResampling = None, + resample: Optional[PILImageResampling] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index cbe885b997e5..0b448678d5c7 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -473,10 +473,10 @@ class MultiModalData: and we might change its API in the future. """ - num_image_tokens: list[int] = None - num_video_tokens: list[int] = None - num_audio_tokens: list[int] = None - num_image_patches: list[int] = None + num_image_tokens: Optional[list[int]] = None + num_video_tokens: Optional[list[int]] = None + num_audio_tokens: Optional[list[int]] = None + num_image_patches: Optional[list[int]] = None def __contains__(self, key): return hasattr(self, key) and getattr(self, key) is not None diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py index 7102f7a5bedc..c72bdbb70bcd 100644 --- a/src/transformers/trainer_callback.py +++ b/src/transformers/trainer_callback.py @@ -111,8 +111,8 @@ class TrainerState: is_world_process_zero: bool = True is_hyper_param_search: bool = False trial_name: Optional[str] = None - trial_params: dict[str, Union[str, float, int, bool]] = None - stateful_callbacks: list["TrainerCallback"] = None + trial_params: Optional[dict[str, Union[str, float, int, bool]]] = None + stateful_callbacks: Optional[list["TrainerCallback"]] = None def __post_init__(self): if self.log_history is None: diff --git a/src/transformers/trainer_seq2seq.py b/src/transformers/trainer_seq2seq.py index 4cbcad1f9de3..fc387772f092 100644 --- a/src/transformers/trainer_seq2seq.py +++ b/src/transformers/trainer_seq2seq.py @@ -54,8 +54,8 @@ class Seq2SeqTrainer(Trainer): @deprecate_kwarg("tokenizer", new_name="processing_class", version="5.0.0", raise_if_both_names=True) def __init__( self, - model: Union["PreTrainedModel", nn.Module] = None, - args: "TrainingArguments" = None, + model: Optional[Union["PreTrainedModel", nn.Module]] = None, + args: Optional["TrainingArguments"] = None, data_collator: Optional["DataCollator"] = None, train_dataset: Optional[Union[Dataset, "IterableDataset", "datasets.Dataset"]] = None, eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, @@ -66,7 +66,7 @@ def __init__( compute_loss_func: Optional[Callable] = None, compute_metrics: Optional[Callable[["EvalPrediction"], dict]] = None, callbacks: Optional[list["TrainerCallback"]] = None, - optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + optimizers: tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None), preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, ): super().__init__( diff --git a/src/transformers/video_utils.py b/src/transformers/video_utils.py index 305817bb41c9..304b5bd7c5b4 100644 --- a/src/transformers/video_utils.py +++ b/src/transformers/video_utils.py @@ -66,7 +66,7 @@ list["np.ndarray"], list["torch.Tensor"], list[list["PIL.Image.Image"]], - list[list["np.ndarrray"]], + list[list[np.ndarray]], list[list["torch.Tensor"]], URL, list[URL], @@ -80,12 +80,12 @@ @dataclass class VideoMetadata(Mapping): total_num_frames: int - fps: float = None - width: int = None - height: int = None - duration: float = None - video_backend: str = None - frames_indices: list[int] = None + fps: Optional[float] = None + width: Optional[int] = None + height: Optional[int] = None + duration: Optional[float] = None + video_backend: Optional[str] = None + frames_indices: Optional[list[int]] = None def __iter__(self): return (f.name for f in fields(self)) @@ -245,7 +245,7 @@ def make_batched_metadata(videos: VideoInput, video_metadata: Union[VideoMetadat return video_metadata -def get_video_size(video: np.ndarray, channel_dim: ChannelDimension = None) -> tuple[int, int]: +def get_video_size(video: np.ndarray, channel_dim: Optional[ChannelDimension] = None) -> tuple[int, int]: """ Returns the (height, width) dimensions of the video.