Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/transformers/models/detr/image_processing_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1724,7 +1724,7 @@ def get_ids_area(masks, scores, dedup=False):

# inspired by https://github.com/facebookresearch/detr/blob/master/models/detr.py#L258
def post_process_object_detection(
self, outputs, threshold: float = 0.5, target_sizes: Union[TensorType, list[tuple]] = None
self, outputs, threshold: float = 0.5, target_sizes: Optional[Union[TensorType, list[tuple]]] = None
):
"""
Converts the raw output of [`DetrForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y,
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/detr/image_processing_detr_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,7 @@ def get_ids_area(masks, scores, dedup=False):

# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.post_process_object_detection
def post_process_object_detection(
self, outputs, threshold: float = 0.5, target_sizes: Union[TensorType, list[tuple]] = None
self, outputs, threshold: float = 0.5, target_sizes: Optional[Union[TensorType, list[tuple]]] = None
):
"""
Converts the raw output of [`DetrForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2673,9 +2673,9 @@ def replace_multimodal_special_tokens(
def __call__(
self,
text: TextInput = None,
images: ImageInput = None,
videos: VideoInput = None,
audio: AudioInput = None,
images: Optional[ImageInput] = None,
videos: Optional[VideoInput] = None,
audio: Optional[AudioInput] = None,
**kwargs,
):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from typing import Union
from typing import Optional, Union

import numpy as np

Expand Down Expand Up @@ -123,9 +123,9 @@ def __init__(
def __call__(
self,
text: TextInput = None,
images: ImageInput = None,
videos: VideoInput = None,
audio: AudioInput = None,
images: Optional[ImageInput] = None,
videos: Optional[VideoInput] = None,
audio: Optional[AudioInput] = None,
**kwargs,
) -> BatchFeature:
"""
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/rag/modeling_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def from_pretrained_question_encoder_generator(
cls,
question_encoder_pretrained_model_name_or_path: Optional[str] = None,
generator_pretrained_model_name_or_path: Optional[str] = None,
retriever: RagRetriever = None,
retriever: Optional[RagRetriever] = None,
**kwargs,
) -> PreTrainedModel:
r"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,7 @@ def post_process_object_detection(
self,
outputs,
threshold: float = 0.5,
target_sizes: Union[TensorType, list[tuple]] = None,
target_sizes: Optional[Union[TensorType, list[tuple]]] = None,
use_focal_loss: bool = True,
):
"""
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/sam2_video/modular_sam2_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,9 +635,9 @@ def init_video_session(
self,
video: Optional[VideoInput] = None,
inference_device: Union[str, "torch.device"] = "cpu",
inference_state_device: Union[str, "torch.device"] = None,
processing_device: Union[str, "torch.device"] = None,
video_storage_device: Union[str, "torch.device"] = None,
inference_state_device: Optional[Union[str, "torch.device"]] = None,
processing_device: Optional[Union[str, "torch.device"]] = None,
video_storage_device: Optional[Union[str, "torch.device"]] = None,
max_vision_features_cache_size: int = 1,
dtype: torch.dtype = torch.float32,
):
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/sam2_video/processing_sam2_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,9 +530,9 @@ def init_video_session(
self,
video: Optional[VideoInput] = None,
inference_device: Union[str, "torch.device"] = "cpu",
inference_state_device: Union[str, "torch.device"] = None,
processing_device: Union[str, "torch.device"] = None,
video_storage_device: Union[str, "torch.device"] = None,
inference_state_device: Optional[Union[str, "torch.device"]] = None,
processing_device: Optional[Union[str, "torch.device"]] = None,
video_storage_device: Optional[Union[str, "torch.device"]] = None,
max_vision_features_cache_size: int = 1,
dtype: torch.dtype = torch.float32,
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,9 @@ def vocab_size(self):

def __call__(
self,
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
text_pair: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
text_target: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
text_target: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
text_pair_target: Optional[
Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]
] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,9 @@ def _from_pretrained(

def __call__(
self,
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
text_pair: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
text_target: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
text_target: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
text_pair_target: Optional[
Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]
] = None,
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/trocr/processing_trocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(self, image_processor=None, tokenizer=None, **kwargs):
def __call__(
self,
images: Optional[ImageInput] = None,
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
**kwargs: Unpack[TrOCRProcessorKwargs],
) -> BatchFeature:
"""
Expand Down
10 changes: 5 additions & 5 deletions src/transformers/models/udop/tokenization_udop.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,11 +508,11 @@ def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] =
@add_end_docstrings(UDOP_ENCODE_KWARGS_DOCSTRING)
def __call__(
self,
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
text_pair: Optional[Union[PreTokenizedInput, list[PreTokenizedInput]]] = None,
boxes: Optional[Union[list[list[int]], list[list[list[int]]]]] = None,
word_labels: Optional[Union[list[int], list[list[int]]]] = None,
text_target: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]] = None,
text_target: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
text_pair_target: Optional[
Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]
] = None,
Expand Down Expand Up @@ -703,7 +703,7 @@ def batch_encode_plus_boxes(
word_labels: Optional[list[list[int]]] = None,
add_special_tokens: bool = True,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = None,
truncation: Optional[Union[bool, str, TruncationStrategy]] = None,
max_length: Optional[int] = None,
stride: int = 0,
is_split_into_words: bool = False,
Expand Down Expand Up @@ -771,7 +771,7 @@ def encode_boxes(
word_labels: Optional[list[list[int]]] = None,
add_special_tokens: bool = True,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = None,
truncation: Optional[Union[bool, str, TruncationStrategy]] = None,
max_length: Optional[int] = None,
stride: int = 0,
return_tensors: Optional[Union[str, TensorType]] = None,
Expand Down Expand Up @@ -814,7 +814,7 @@ def encode_plus_boxes(
word_labels: Optional[list[list[int]]] = None,
add_special_tokens: bool = True,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = None,
truncation: Optional[Union[bool, str, TruncationStrategy]] = None,
max_length: Optional[int] = None,
stride: int = 0,
is_split_into_words: bool = False,
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/video_llava/processing_video_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def __call__(
images: Optional[ImageInput] = None,
videos: Optional[ImageInput] = None,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = None,
max_length=None,
truncation: Optional[Union[bool, str, TruncationStrategy]] = None,
max_length: Optional[int] = None,
return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
) -> BatchFeature:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ def post_process_pose_estimation(
boxes: Union[list[list[list[float]]], np.ndarray],
kernel_size: int = 11,
threshold: Optional[float] = None,
target_sizes: Union[TensorType, list[tuple]] = None,
target_sizes: Optional[Union[TensorType, list[tuple]]] = None,
):
"""
Transform the heatmaps into keypoint predictions and transform them back to the image.
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/wav2vec2/modeling_wav2vec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1474,7 +1474,7 @@ def compute_contrastive_logits(
target_features: torch.FloatTensor,
negative_features: torch.FloatTensor,
predicted_features: torch.FloatTensor,
temperature: int = 0.1,
temperature: float = 0.1,
):
"""
Compute logits for contrastive loss based using cosine similarity as the distance measure between
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@ def compute_contrastive_logits(
target_features: torch.FloatTensor,
negative_features: torch.FloatTensor,
predicted_features: torch.FloatTensor,
temperature: int = 0.1,
temperature: float = 0.1,
):
"""
Compute logits for contrastive loss based using cosine similarity as the distance measure between
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/xcodec/configuration_xcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ def __init__(
codebook_size: int = 1024,
codebook_dim: Optional[int] = None,
initializer_range: float = 0.02,
acoustic_model_config: Union[dict, DacConfig] = None,
semantic_model_config: Union[dict, HubertConfig] = None,
acoustic_model_config: Optional[Union[dict, DacConfig]] = None,
semantic_model_config: Optional[Union[dict, HubertConfig]] = None,
**kwargs,
):
if acoustic_model_config is None:
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/yolos/image_processing_yolos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1471,7 +1471,7 @@ def post_process(self, outputs, target_sizes):

# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.post_process_object_detection with Detr->Yolos
def post_process_object_detection(
self, outputs, threshold: float = 0.5, target_sizes: Union[TensorType, list[tuple]] = None
self, outputs, threshold: float = 0.5, target_sizes: Optional[Union[TensorType, list[tuple]]] = None
):
"""
Converts the raw output of [`YolosForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y,
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/utils/quantization_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1128,11 +1128,11 @@ def __init__(
in_features: int = -1,
indices_as_float: bool = False,
is_indice_packed: bool = True,
num_centroids: tuple = [-1, -1],
num_res_centroids: tuple = [-1, -1],
num_centroids: list = [-1, -1],
num_res_centroids: list = [-1, -1],
out_features: int = -1,
outlier_size: int = 0,
vector_lens: tuple = [-1, -1],
vector_lens: list = [-1, -1],
**kwargs,
):
self.enable_norm = enable_norm
Expand Down
2 changes: 1 addition & 1 deletion tests/models/tvp/test_image_processing_tvp.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(
do_pad: bool = True,
pad_size: dict[str, int] = {"height": 80, "width": 80},
fill: Optional[int] = None,
pad_mode: PaddingMode = None,
pad_mode: Optional[PaddingMode] = None,
do_normalize: bool = True,
image_mean: Optional[Union[float, list[float]]] = [0.48145466, 0.4578275, 0.40821073],
image_std: Optional[Union[float, list[float]]] = [0.26862954, 0.26130258, 0.27577711],
Expand Down