diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py index 4028c38ff227..983fd4e16953 100644 --- a/src/transformers/image_processing_utils_fast.py +++ b/src/transformers/image_processing_utils_fast.py @@ -61,14 +61,14 @@ if is_torchvision_available(): from .image_utils import pil_torch_interpolation_mapping - - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F else: pil_torch_interpolation_mapping = None +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +elif is_torchvision_available(): + from torchvision.transforms import functional as F + logger = logging.get_logger(__name__) diff --git a/src/transformers/models/aria/modeling_aria.py b/src/transformers/models/aria/modeling_aria.py index f3261909dd03..7303ca2e9c50 100644 --- a/src/transformers/models/aria/modeling_aria.py +++ b/src/transformers/models/aria/modeling_aria.py @@ -21,6 +21,9 @@ from dataclasses import dataclass from typing import Callable, Optional, Union +import torch +from torch import nn + from ...activations import ACT2FN from ...cache_utils import Cache, DynamicCache from ...generation import GenerationMixin @@ -35,16 +38,10 @@ from ...utils import TransformersKwargs, auto_docstring, can_return_tuple from ...utils.deprecation import deprecate_kwarg from ...utils.generic import check_model_inputs -from ...utils.import_utils import is_torch_available from ..auto import AutoModel from .configuration_aria import AriaConfig, AriaTextConfig -if is_torch_available(): - import torch - from torch import nn - - @use_kernel_forward_from_hub("RMSNorm") class AriaTextRMSNorm(nn.Module): def __init__(self, hidden_size, eps=1e-6): diff --git a/src/transformers/models/aria/modular_aria.py b/src/transformers/models/aria/modular_aria.py index 790003d853c4..a626d2cd4b82 100644 --- a/src/transformers/models/aria/modular_aria.py +++ b/src/transformers/models/aria/modular_aria.py @@ -16,6 +16,8 @@ from typing import Optional, Union import numpy as np +import torch +from torch import nn from ...activations import ACT2FN from ...cache_utils import Cache @@ -39,7 +41,6 @@ from ...processing_utils import MultiModalData, ProcessingKwargs, ProcessorMixin, Unpack from ...tokenization_utils import PreTokenizedInput, TextInput from ...utils import TensorType, TransformersKwargs, auto_docstring, can_return_tuple, logging -from ...utils.import_utils import is_torch_available from ..auto import CONFIG_MAPPING, AutoConfig, AutoTokenizer from ..llama.configuration_llama import LlamaConfig from ..llama.modeling_llama import ( @@ -62,10 +63,6 @@ logger = logging.get_logger(__name__) -if is_torch_available(): - import torch - from torch import nn - def sequential_experts_gemm(token_states, expert_weights, tokens_per_expert): """ diff --git a/src/transformers/models/beit/image_processing_beit_fast.py b/src/transformers/models/beit/image_processing_beit_fast.py index 43ed6dd1125d..e10dc552cf37 100644 --- a/src/transformers/models/beit/image_processing_beit_fast.py +++ b/src/transformers/models/beit/image_processing_beit_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -36,18 +38,13 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py b/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py index 4a7450c84498..44da5d4486e7 100644 --- a/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py +++ b/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py @@ -17,6 +17,8 @@ from collections.abc import Iterable from typing import Optional, Union +import torch + from ...image_processing_utils_fast import ( BaseImageProcessorFast, BatchFeature, @@ -29,17 +31,13 @@ reorder_images, ) from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling -from ...utils import auto_docstring, is_torch_available, is_torchvision_available, is_torchvision_v2_available - +from ...utils import auto_docstring, is_torchvision_v2_available -if is_torch_available(): - import torch -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F def make_pixel_mask( diff --git a/src/transformers/models/chameleon/image_processing_chameleon_fast.py b/src/transformers/models/chameleon/image_processing_chameleon_fast.py index 421c4ea98374..39aa4ec87b00 100644 --- a/src/transformers/models/chameleon/image_processing_chameleon_fast.py +++ b/src/transformers/models/chameleon/image_processing_chameleon_fast.py @@ -17,28 +17,18 @@ from typing import Optional import numpy as np +import PIL +import torch from ...image_processing_utils_fast import BaseImageProcessorFast from ...image_utils import ImageInput, PILImageResampling, SizeDict -from ...utils import ( - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, - is_vision_available, - logging, -) - - -if is_vision_available(): - import PIL -if is_torch_available(): - import torch -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +from ...utils import auto_docstring, is_torchvision_v2_available, logging + + +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/models/cohere2_vision/modular_cohere2_vision.py b/src/transformers/models/cohere2_vision/modular_cohere2_vision.py index 36f5d0b71ce0..7ef20305b99e 100644 --- a/src/transformers/models/cohere2_vision/modular_cohere2_vision.py +++ b/src/transformers/models/cohere2_vision/modular_cohere2_vision.py @@ -32,11 +32,7 @@ from ...cache_utils import Cache from ...modeling_flash_attention_utils import FlashAttentionKwargs from ...processing_utils import Unpack -from ...utils import ( - TransformersKwargs, - auto_docstring, - logging, -) +from ...utils import TransformersKwargs, auto_docstring, logging from ...utils.generic import check_model_inputs from .configuration_cohere2_vision import Cohere2VisionConfig diff --git a/src/transformers/models/colpali/modular_colpali.py b/src/transformers/models/colpali/modular_colpali.py index 0988b0f7aafb..cf28475f4b3c 100644 --- a/src/transformers/models/colpali/modular_colpali.py +++ b/src/transformers/models/colpali/modular_colpali.py @@ -28,7 +28,6 @@ if is_torch_available(): import torch - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/colqwen2/modular_colqwen2.py b/src/transformers/models/colqwen2/modular_colqwen2.py index a4684d670d17..f3ae79abf6fa 100644 --- a/src/transformers/models/colqwen2/modular_colqwen2.py +++ b/src/transformers/models/colqwen2/modular_colqwen2.py @@ -30,7 +30,6 @@ if is_torch_available(): import torch - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py index 86e51f2b4a60..5b9fe6325517 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py @@ -7,6 +7,10 @@ import pathlib from typing import Any, Optional, Union +import torch +from torch import nn +from torchvision.io import read_image + from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -29,14 +33,7 @@ validate_annotations, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, - logging, -) +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, logging from ...utils.import_utils import requires from .image_processing_conditional_detr import ( compute_segments, @@ -46,20 +43,9 @@ ) -if is_torch_available(): - import torch - - -if is_torch_available(): - from torch import nn - - if is_torchvision_v2_available(): - from torchvision.io import read_image from torchvision.transforms.v2 import functional as F - -elif is_torchvision_available(): - from torchvision.io import read_image +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/conditional_detr/modular_conditional_detr.py b/src/transformers/models/conditional_detr/modular_conditional_detr.py index 176ae8b6604b..9d0faf2c4b9e 100644 --- a/src/transformers/models/conditional_detr/modular_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modular_conditional_detr.py @@ -1,5 +1,7 @@ from typing import Union +import torch + from transformers.models.detr.image_processing_detr_fast import DetrImageProcessorFast from ...image_transforms import ( @@ -7,15 +9,10 @@ ) from ...utils import ( TensorType, - is_torch_available, logging, ) -if is_torch_available(): - import torch - - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/convnext/image_processing_convnext_fast.py b/src/transformers/models/convnext/image_processing_convnext_fast.py index 0866b230a52e..a1002d950399 100644 --- a/src/transformers/models/convnext/image_processing_convnext_fast.py +++ b/src/transformers/models/convnext/image_processing_convnext_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -35,20 +37,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class ConvNextFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/deepseek_vl/configuration_deepseek_vl.py b/src/transformers/models/deepseek_vl/configuration_deepseek_vl.py index a6c35f6be0d5..b3abae5af0a7 100644 --- a/src/transformers/models/deepseek_vl/configuration_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/configuration_deepseek_vl.py @@ -21,9 +21,7 @@ from typing import Optional from ...configuration_utils import PretrainedConfig -from ...utils import ( - logging, -) +from ...utils import logging from ..auto import CONFIG_MAPPING, AutoConfig diff --git a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py index 7764a8250159..896e91f0692c 100644 --- a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +++ b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py @@ -20,6 +20,7 @@ from typing import Optional, Union +import torch import torch.nn.functional as F from ...image_processing_utils import BatchFeature @@ -31,15 +32,7 @@ ) from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling, SizeDict from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, -) - - -if is_torch_available(): - import torch +from ...utils import TensorType, auto_docstring class DeepseekVLFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py b/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py index 039602a159f2..22d8e0928a6e 100644 --- a/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py @@ -21,6 +21,9 @@ from dataclasses import dataclass from typing import Optional, Union +import torch +import torch.nn as nn + from ...cache_utils import Cache from ...generation import GenerationMixin from ...modeling_outputs import ModelOutput @@ -30,17 +33,11 @@ TransformersKwargs, auto_docstring, can_return_tuple, - is_torch_available, ) from ..auto import AutoModel from .configuration_deepseek_vl import DeepseekVLConfig -if is_torch_available(): - import torch - import torch.nn as nn - - @dataclass @auto_docstring( custom_intro=""" diff --git a/src/transformers/models/deepseek_vl/modular_deepseek_vl.py b/src/transformers/models/deepseek_vl/modular_deepseek_vl.py index 33f1cf26bb65..5bfc0ae7d74c 100644 --- a/src/transformers/models/deepseek_vl/modular_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/modular_deepseek_vl.py @@ -14,6 +14,9 @@ from typing import Optional, Union +import torch +import torch.nn as nn + from ...configuration_utils import PretrainedConfig from ...image_processing_utils import BatchFeature from ...image_utils import ImageInput @@ -24,7 +27,6 @@ ) from ...utils import ( auto_docstring, - is_torch_available, logging, ) from ..auto import CONFIG_MAPPING, AutoConfig, AutoModel @@ -34,10 +36,6 @@ from ..janus.modeling_janus import JanusForConditionalGeneration, JanusModel, JanusPreTrainedModel -if is_torch_available(): - import torch - import torch.nn as nn - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py index 9fd82dbfefdf..e8c6e2df6ea3 100644 --- a/src/transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py @@ -21,9 +21,7 @@ from typing import Optional from ...configuration_utils import PretrainedConfig -from ...utils import ( - logging, -) +from ...utils import logging from ..auto import CONFIG_MAPPING, AutoConfig diff --git a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py index 3770cf18303e..db9c9ad987c1 100644 --- a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +++ b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py @@ -30,25 +30,23 @@ group_images_by_shape, reorder_images, ) -from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, ChannelDimension, PILImageResampling, SizeDict -from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torchvision_available, - is_torchvision_v2_available, +from ...image_utils import ( + OPENAI_CLIP_MEAN, + OPENAI_CLIP_STD, + ChannelDimension, + PILImageResampling, + SizeDict, + pil_torch_interpolation_mapping, ) +from ...processing_utils import Unpack +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F - - from ...image_utils import pil_torch_interpolation_mapping -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F - from ...image_utils import pil_torch_interpolation_mapping - class DeepseekVLHybridFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): r""" diff --git a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py index c6cf71b09613..d97b00f7fbd2 100644 --- a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py @@ -37,6 +37,7 @@ infer_channel_dimension_format, is_scaled_image, make_flat_list_of_images, + pil_torch_interpolation_mapping, to_numpy_array, valid_images, validate_preprocess_arguments, @@ -52,7 +53,6 @@ auto_docstring, can_return_tuple, filter_out_non_signature_kwargs, - is_torchvision_available, is_torchvision_v2_available, logging, ) @@ -72,13 +72,9 @@ if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F - - from ...image_utils import pil_torch_interpolation_mapping -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F - from ...image_utils import pil_torch_interpolation_mapping - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py index 2bfbedddc5d0..cd07f8db350b 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py @@ -7,6 +7,9 @@ import pathlib from typing import Any, Optional, Union +import torch +from torchvision.io import read_image + from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -29,28 +32,14 @@ validate_annotations, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, - logging, -) +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, logging from ...utils.import_utils import requires from .image_processing_deformable_detr import get_size_with_aspect_ratio -if is_torch_available(): - import torch - - if is_torchvision_v2_available(): - from torchvision.io import read_image from torchvision.transforms.v2 import functional as F - -elif is_torchvision_available(): - from torchvision.io import read_image +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/deformable_detr/modular_deformable_detr.py b/src/transformers/models/deformable_detr/modular_deformable_detr.py index 57aa52e364b6..2e38df7845a2 100644 --- a/src/transformers/models/deformable_detr/modular_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modular_deformable_detr.py @@ -1,19 +1,16 @@ from typing import Union +import torch + from transformers.models.detr.image_processing_detr_fast import DetrImageProcessorFast from ...image_transforms import center_to_corners_format from ...utils import ( TensorType, - is_torch_available, logging, ) -if is_torch_available(): - import torch - - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/depth_pro/image_processing_depth_pro_fast.py b/src/transformers/models/depth_pro/image_processing_depth_pro_fast.py index d27220c3d2be..76c1a53e0073 100644 --- a/src/transformers/models/depth_pro/image_processing_depth_pro_fast.py +++ b/src/transformers/models/depth_pro/image_processing_depth_pro_fast.py @@ -16,14 +16,20 @@ from typing import TYPE_CHECKING, Optional, Union +import torch + from ...image_processing_base import BatchFeature from ...image_processing_utils_fast import BaseImageProcessorFast, group_images_by_shape, reorder_images -from ...image_utils import IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, PILImageResampling, SizeDict +from ...image_utils import ( + IMAGENET_STANDARD_MEAN, + IMAGENET_STANDARD_STD, + PILImageResampling, + SizeDict, + pil_torch_interpolation_mapping, +) from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, requires_backends, @@ -34,20 +40,14 @@ if TYPE_CHECKING: from .modeling_depth_pro import DepthProDepthEstimatorOutput -logger = logging.get_logger(__name__) - - -if is_torch_available(): - import torch +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F -if is_torchvision_available(): - from ...image_utils import pil_torch_interpolation_mapping - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +logger = logging.get_logger(__name__) @auto_docstring diff --git a/src/transformers/models/detr/image_processing_detr_fast.py b/src/transformers/models/detr/image_processing_detr_fast.py index ba216a6f2d49..96a89a98074c 100644 --- a/src/transformers/models/detr/image_processing_detr_fast.py +++ b/src/transformers/models/detr/image_processing_detr_fast.py @@ -19,6 +19,11 @@ from collections import defaultdict from typing import Any, Optional, Union +import PIL +import torch +from torch import nn +from torchvision.io import read_image + from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -44,10 +49,7 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, - is_vision_available, logging, ) from ...utils.import_utils import requires @@ -59,20 +61,9 @@ ) -if is_torch_available(): - import torch - from torch import nn - -if is_vision_available(): - import PIL - - if is_torchvision_v2_available(): - from torchvision.io import read_image from torchvision.transforms.v2 import functional as F - -elif is_torchvision_available(): - from torchvision.io import read_image +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py b/src/transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py index fba0d3089438..cdb68044bfc4 100644 --- a/src/transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +++ b/src/transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py @@ -16,31 +16,27 @@ from typing import Optional, Union +import torch + from transformers.image_processing_base import BatchFeature from transformers.image_processing_utils_fast import BaseImageProcessorFast, group_images_by_shape, reorder_images from transformers.image_utils import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, PILImageResampling, SizeDict from transformers.utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, ) from transformers.utils.import_utils import requires -logger = logging.get_logger(__name__) - - -if is_torch_available(): - import torch - if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F +logger = logging.get_logger(__name__) + @auto_docstring @requires(backends=("torchvision", "torch")) diff --git a/src/transformers/models/donut/image_processing_donut_fast.py b/src/transformers/models/donut/image_processing_donut_fast.py index 23714affe1e8..7c808ab60cd4 100644 --- a/src/transformers/models/donut/image_processing_donut_fast.py +++ b/src/transformers/models/donut/image_processing_donut_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import group_images_by_shape, reorder_images from ...image_utils import IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, ImageInput, PILImageResampling, SizeDict @@ -23,23 +25,17 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, ) -logger = logging.get_logger(__name__) - -if is_torch_available(): - import torch +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +logger = logging.get_logger(__name__) class DonutFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/dpt/image_processing_dpt_fast.py b/src/transformers/models/dpt/image_processing_dpt_fast.py index 7fce8a9f64db..d4848c50653c 100644 --- a/src/transformers/models/dpt/image_processing_dpt_fast.py +++ b/src/transformers/models/dpt/image_processing_dpt_fast.py @@ -24,6 +24,8 @@ from collections.abc import Iterable from typing import TYPE_CHECKING, Optional, Union +import torch + from ...image_processing_base import BatchFeature from ...image_processing_utils_fast import BaseImageProcessorFast, DefaultFastImageProcessorKwargs from ...image_transforms import group_images_by_shape, reorder_images @@ -37,25 +39,15 @@ is_torch_tensor, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, - requires_backends, -) +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, requires_backends if TYPE_CHECKING: from ...modeling_outputs import DepthEstimatorOutput -if is_torch_available(): - import torch - if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/dpt/modular_dpt.py b/src/transformers/models/dpt/modular_dpt.py index 7ae6bb40c3af..32ca94a2d43f 100644 --- a/src/transformers/models/dpt/modular_dpt.py +++ b/src/transformers/models/dpt/modular_dpt.py @@ -18,6 +18,8 @@ from collections.abc import Iterable from typing import TYPE_CHECKING, Optional, Union +import torch + from ...image_processing_base import BatchFeature from ...image_processing_utils_fast import BaseImageProcessorFast, DefaultFastImageProcessorKwargs from ...image_transforms import group_images_by_shape, reorder_images @@ -30,8 +32,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, requires_backends, ) @@ -41,12 +41,9 @@ if TYPE_CHECKING: from ...modeling_outputs import DepthEstimatorOutput -if is_torch_available(): - import torch - if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/efficientloftr/image_processing_efficientloftr_fast.py b/src/transformers/models/efficientloftr/image_processing_efficientloftr_fast.py index 5eb6e6589058..5f7437c45b2e 100644 --- a/src/transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +++ b/src/transformers/models/efficientloftr/image_processing_efficientloftr_fast.py @@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Optional, Union import torch +from PIL import Image, ImageDraw from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -38,27 +39,18 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, - is_vision_available, ) -if is_torch_available(): - import torch - if TYPE_CHECKING: from .modeling_efficientloftr import KeypointMatchingOutput if is_torchvision_v2_available(): import torchvision.transforms.v2.functional as F -elif is_torchvision_available(): +else: import torchvision.transforms.functional as F -if is_vision_available(): - from PIL import Image, ImageDraw - def _is_valid_image(image): return is_pil_image(image) or ( diff --git a/src/transformers/models/efficientnet/image_processing_efficientnet_fast.py b/src/transformers/models/efficientnet/image_processing_efficientnet_fast.py index 41689e3dc080..3544d927c146 100644 --- a/src/transformers/models/efficientnet/image_processing_efficientnet_fast.py +++ b/src/transformers/models/efficientnet/image_processing_efficientnet_fast.py @@ -17,6 +17,8 @@ from functools import lru_cache from typing import Optional, Union +import torch + from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import group_images_by_shape, reorder_images from ...image_utils import IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, ImageInput, PILImageResampling, SizeDict @@ -24,20 +26,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class EfficientNetFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/eomt/image_processing_eomt_fast.py b/src/transformers/models/eomt/image_processing_eomt_fast.py index 58457064412d..97a13a0745eb 100644 --- a/src/transformers/models/eomt/image_processing_eomt_fast.py +++ b/src/transformers/models/eomt/image_processing_eomt_fast.py @@ -18,6 +18,7 @@ from typing import Optional, Union import numpy as np +import torch from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -39,8 +40,6 @@ TensorType, auto_docstring, filter_out_non_signature_kwargs, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) from .image_processing_eomt import ( @@ -51,14 +50,10 @@ ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class EomtImageProcessorFastKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/flava/image_processing_flava_fast.py b/src/transformers/models/flava/image_processing_flava_fast.py index 5dcc5326d968..97409ddd57ed 100644 --- a/src/transformers/models/flava/image_processing_flava_fast.py +++ b/src/transformers/models/flava/image_processing_flava_fast.py @@ -20,6 +20,8 @@ from functools import lru_cache from typing import Any, Optional, Union +import torch + from ...image_processing_utils_fast import ( BaseImageProcessorFast, BatchFeature, @@ -27,13 +29,11 @@ get_size_dict, ) from ...image_transforms import ChannelDimension, group_images_by_shape, reorder_images -from ...image_utils import ImageInput, PILImageResampling, SizeDict +from ...image_utils import ImageInput, PILImageResampling, SizeDict, pil_torch_interpolation_mapping from ...processing_utils import Unpack from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) from .image_processing_flava import ( @@ -45,16 +45,10 @@ ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - from ...image_utils import pil_torch_interpolation_mapping - - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class FlavaMaskingGenerator: diff --git a/src/transformers/models/florence2/modeling_florence2.py b/src/transformers/models/florence2/modeling_florence2.py index 0c1cf26fa4bc..763756faf73f 100644 --- a/src/transformers/models/florence2/modeling_florence2.py +++ b/src/transformers/models/florence2/modeling_florence2.py @@ -22,6 +22,9 @@ from dataclasses import dataclass from typing import Any, Callable, Optional, Union +import torch.nn as nn +import torch.nn.functional as F + from ...activations import ACT2FN from ...cache_utils import Cache from ...generation import GenerationMixin @@ -41,8 +44,6 @@ if is_torch_available(): import torch - import torch.nn as nn - import torch.nn.functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/models/florence2/modular_florence2.py b/src/transformers/models/florence2/modular_florence2.py index d82d9ac5255e..f8732257f102 100644 --- a/src/transformers/models/florence2/modular_florence2.py +++ b/src/transformers/models/florence2/modular_florence2.py @@ -18,6 +18,8 @@ from typing import Any, Callable, Optional, Union import numpy as np +import torch.nn as nn +import torch.nn.functional as F from ...activations import ACT2FN from ...cache_utils import Cache @@ -28,13 +30,7 @@ from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import MultiModalData, ProcessorMixin, Unpack from ...tokenization_utils_base import PreTokenizedInput, TextInput -from ...utils import ( - TransformersKwargs, - auto_docstring, - can_return_tuple, - is_torch_available, - logging, -) +from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torch_available, logging from ..auto import CONFIG_MAPPING, AutoConfig from ..bart.modeling_bart import eager_attention_forward, shift_tokens_right from ..beit.modeling_beit import BeitDropPath @@ -45,9 +41,6 @@ if is_torch_available(): import torch - import torch.nn as nn - import torch.nn.functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/florence2/processing_florence2.py b/src/transformers/models/florence2/processing_florence2.py index 53e3d562aa29..91b63e9da7db 100644 --- a/src/transformers/models/florence2/processing_florence2.py +++ b/src/transformers/models/florence2/processing_florence2.py @@ -33,7 +33,6 @@ if is_torch_available(): import torch - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/gemma3/image_processing_gemma3_fast.py b/src/transformers/models/gemma3/image_processing_gemma3_fast.py index 3826f40bd997..eb828a89643d 100644 --- a/src/transformers/models/gemma3/image_processing_gemma3_fast.py +++ b/src/transformers/models/gemma3/image_processing_gemma3_fast.py @@ -18,6 +18,8 @@ import math from typing import Optional, Union +import torch + from ...image_processing_utils_fast import ( BaseImageProcessorFast, BatchFeature, @@ -25,30 +27,20 @@ group_images_by_shape, reorder_images, ) -from ...image_utils import IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, ImageInput, SizeDict +from ...image_utils import IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, ImageInput, PILImageResampling, SizeDict from ...processing_utils import Unpack from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, - is_vision_available, logging, ) -if is_vision_available(): - from ...image_utils import PILImageResampling - -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/models/glm4v/image_processing_glm4v_fast.py b/src/transformers/models/glm4v/image_processing_glm4v_fast.py index 061654519d21..fbf4aebaac6a 100644 --- a/src/transformers/models/glm4v/image_processing_glm4v_fast.py +++ b/src/transformers/models/glm4v/image_processing_glm4v_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import ( BatchFeature, ) @@ -36,22 +38,16 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, ) from .image_processing_glm4v import smart_resize -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py b/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py index 38b87aed623f..5277f1c4e13b 100644 --- a/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +++ b/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -28,21 +30,15 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) from .image_processing_got_ocr2 import get_optimal_tiled_canvas -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class GotOcr2FastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py b/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py index 59866c9a410e..66528519eef8 100644 --- a/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py @@ -7,6 +7,9 @@ import pathlib from typing import TYPE_CHECKING, Any, Optional, Union +import torch +from torchvision.io import read_image + from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -29,14 +32,7 @@ validate_annotations, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, - logging, -) +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, logging from ...utils.import_utils import requires from .image_processing_grounding_dino import get_size_with_aspect_ratio @@ -44,16 +40,10 @@ if TYPE_CHECKING: from .modeling_grounding_dino import GroundingDinoObjectDetectionOutput -if is_torch_available(): - import torch - if is_torchvision_v2_available(): - from torchvision.io import read_image from torchvision.transforms.v2 import functional as F - -elif is_torchvision_available(): - from torchvision.io import read_image +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/grounding_dino/modular_grounding_dino.py b/src/transformers/models/grounding_dino/modular_grounding_dino.py index f066f762cfa3..a7b9c570e7b0 100644 --- a/src/transformers/models/grounding_dino/modular_grounding_dino.py +++ b/src/transformers/models/grounding_dino/modular_grounding_dino.py @@ -1,11 +1,12 @@ from typing import TYPE_CHECKING, Optional, Union +import torch + from transformers.models.detr.image_processing_detr_fast import DetrImageProcessorFast from ...image_transforms import center_to_corners_format from ...utils import ( TensorType, - is_torch_available, logging, ) @@ -13,9 +14,6 @@ if TYPE_CHECKING: from .modeling_grounding_dino import GroundingDinoObjectDetectionOutput -if is_torch_available(): - import torch - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/imagegpt/image_processing_imagegpt_fast.py b/src/transformers/models/imagegpt/image_processing_imagegpt_fast.py index 736666fd28a0..ddfee7c757fe 100644 --- a/src/transformers/models/imagegpt/image_processing_imagegpt_fast.py +++ b/src/transformers/models/imagegpt/image_processing_imagegpt_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import numpy as np +import torch from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -29,20 +30,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F def squared_euclidean_distance_torch(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: diff --git a/src/transformers/models/janus/image_processing_janus_fast.py b/src/transformers/models/janus/image_processing_janus_fast.py index 3e9483f21bfe..9ed2732fb3d0 100644 --- a/src/transformers/models/janus/image_processing_janus_fast.py +++ b/src/transformers/models/janus/image_processing_janus_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -34,17 +36,13 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/janus/modeling_janus.py b/src/transformers/models/janus/modeling_janus.py index eee387664832..94e1c6288bd3 100644 --- a/src/transformers/models/janus/modeling_janus.py +++ b/src/transformers/models/janus/modeling_janus.py @@ -24,6 +24,7 @@ from typing import Callable, Optional, Union import torch +import torch.nn.functional as F from torch import nn from ...activations import ACT2FN @@ -34,23 +35,12 @@ from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ModelOutput from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack -from ...utils import ( - TransformersKwargs, - auto_docstring, - can_return_tuple, - is_torch_available, - logging, - torch_int, -) +from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, logging, torch_int from ...utils.generic import check_model_inputs from ..auto import AutoModel from .configuration_janus import JanusConfig, JanusVisionConfig, JanusVQVAEConfig -if is_torch_available(): - import torch.nn.functional as F - - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/janus/modular_janus.py b/src/transformers/models/janus/modular_janus.py index 0541854200a5..dcd5c1e1e730 100644 --- a/src/transformers/models/janus/modular_janus.py +++ b/src/transformers/models/janus/modular_janus.py @@ -20,12 +20,15 @@ import numpy as np import torch +import torch.nn.functional as F +import torch.utils.checkpoint from torch import nn from transformers.models.blip.image_processing_blip import BlipImageProcessor from ...activations import ACT2FN from ...cache_utils import Cache +from ...configuration_utils import PretrainedConfig from ...generation import ClassifierFreeGuidanceLogitsProcessor, GenerationMixin, GenerationMode, LogitsProcessorList from ...generation.utils import GenerateDecoderOnlyOutput from ...image_processing_utils import BatchFeature, get_size_dict @@ -51,11 +54,10 @@ auto_docstring, can_return_tuple, filter_out_non_signature_kwargs, - is_torch_available, is_vision_available, logging, ) -from ..auto import AutoModel +from ..auto import CONFIG_MAPPING, AutoConfig, AutoModel from ..blip_2.modeling_blip_2 import Blip2VisionModel from ..chameleon.configuration_chameleon import ChameleonVQVAEConfig from ..chameleon.modeling_chameleon import ( @@ -71,19 +73,9 @@ from ..siglip.modeling_siglip import SiglipEncoder, SiglipEncoderLayer, SiglipVisionEmbeddings -if is_torch_available(): - import torch - import torch.nn as nn - import torch.nn.functional as F - - if is_vision_available(): import PIL -from ...configuration_utils import PretrainedConfig -from ..auto import CONFIG_MAPPING, AutoConfig - - logger = logging.get_logger(__name__) # General docstring diff --git a/src/transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py b/src/transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py index 7b9613ed0074..c6d8b1b1edf5 100644 --- a/src/transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +++ b/src/transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py @@ -17,6 +17,8 @@ import math from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -26,11 +28,7 @@ ) from ...image_utils import ChannelDimension, ImageInput, get_image_size from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torch_available - - -if is_torch_available(): - import torch +from ...utils import TensorType, auto_docstring # Similar to transformers.models.pix2struct.image_processing_pix2struct.torch_extract_patches but dealing with a batch of images directly. diff --git a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py index c22612da5858..723687d58219 100644 --- a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +++ b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import ChannelDimension, group_images_by_shape, reorder_images from ...image_utils import ImageInput, PILImageResampling, SizeDict @@ -23,8 +25,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, requires_backends, @@ -32,16 +32,12 @@ from .image_processing_layoutlmv2 import apply_tesseract -logger = logging.get_logger(__name__) - -if is_torch_available(): - import torch +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +logger = logging.get_logger(__name__) class LayoutLMv2FastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py index c7580bb528da..2ab8f8dd48cc 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import ChannelDimension, group_images_by_shape, reorder_images from ...image_utils import IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, ImageInput, PILImageResampling, SizeDict @@ -23,8 +25,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, requires_backends, @@ -32,16 +32,12 @@ from .image_processing_layoutlmv3 import apply_tesseract -logger = logging.get_logger(__name__) - -if is_torch_available(): - import torch +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +logger = logging.get_logger(__name__) class LayoutLMv3FastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/levit/image_processing_levit_fast.py b/src/transformers/models/levit/image_processing_levit_fast.py index 096c846234da..e452894d6e2e 100644 --- a/src/transformers/models/levit/image_processing_levit_fast.py +++ b/src/transformers/models/levit/image_processing_levit_fast.py @@ -16,23 +16,21 @@ from typing import Optional +import torch + from ...image_processing_utils_fast import BaseImageProcessorFast, SizeDict from ...image_transforms import ( ChannelDimension, get_resize_output_image_size, ) from ...image_utils import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, PILImageResampling -from ...utils import auto_docstring, is_torch_available, is_torchvision_available, is_torchvision_v2_available - +from ...utils import auto_docstring, is_torchvision_v2_available -if is_torch_available(): - import torch -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F @auto_docstring diff --git a/src/transformers/models/llama4/image_processing_llama4_fast.py b/src/transformers/models/llama4/image_processing_llama4_fast.py index fcb1555dd316..946fdde0a643 100644 --- a/src/transformers/models/llama4/image_processing_llama4_fast.py +++ b/src/transformers/models/llama4/image_processing_llama4_fast.py @@ -19,6 +19,8 @@ from functools import lru_cache from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -31,20 +33,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F def get_factors(dividend: int) -> set[int]: diff --git a/src/transformers/models/llava/image_processing_llava_fast.py b/src/transformers/models/llava/image_processing_llava_fast.py index cf62f250bc2f..41bb94f5b7e0 100644 --- a/src/transformers/models/llava/image_processing_llava_fast.py +++ b/src/transformers/models/llava/image_processing_llava_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -36,24 +38,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, - is_vision_available, ) -if is_vision_available(): - from ...image_utils import PILImageResampling - -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class LlavaFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): ... diff --git a/src/transformers/models/llava_next/image_processing_llava_next_fast.py b/src/transformers/models/llava_next/image_processing_llava_next_fast.py index 201a65260589..b502d98d6ac3 100644 --- a/src/transformers/models/llava_next/image_processing_llava_next_fast.py +++ b/src/transformers/models/llava_next/image_processing_llava_next_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature, get_patch_output_size, select_best_resolution from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -37,20 +39,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class LlavaNextFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/llava_onevision/modeling_llava_onevision.py b/src/transformers/models/llava_onevision/modeling_llava_onevision.py index e4cb0c9aeafd..eae6e3046f94 100644 --- a/src/transformers/models/llava_onevision/modeling_llava_onevision.py +++ b/src/transformers/models/llava_onevision/modeling_llava_onevision.py @@ -35,11 +35,7 @@ from ...modeling_outputs import BaseModelOutputWithPast, ModelOutput from ...modeling_utils import PreTrainedModel from ...processing_utils import Unpack -from ...utils import ( - TransformersKwargs, - auto_docstring, - can_return_tuple, -) +from ...utils import TransformersKwargs, auto_docstring, can_return_tuple from ..auto import AutoModel from .configuration_llava_onevision import LlavaOnevisionConfig diff --git a/src/transformers/models/llava_onevision/modular_llava_onevision.py b/src/transformers/models/llava_onevision/modular_llava_onevision.py index 45dfac3b37ef..21688e7763bf 100644 --- a/src/transformers/models/llava_onevision/modular_llava_onevision.py +++ b/src/transformers/models/llava_onevision/modular_llava_onevision.py @@ -50,18 +50,15 @@ TensorType, auto_docstring, can_return_tuple, - is_torchvision_available, is_torchvision_v2_available, logging, ) -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F - +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/models/mask2former/image_processing_mask2former_fast.py b/src/transformers/models/mask2former/image_processing_mask2former_fast.py index c61d531eb077..a5d662288119 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former_fast.py +++ b/src/transformers/models/mask2former/image_processing_mask2former_fast.py @@ -21,6 +21,9 @@ import math from typing import Any, Optional, Union +import torch +from torch import nn + from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -39,14 +42,7 @@ PILImageResampling, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, - logging, -) +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, logging from .image_processing_mask2former import ( compute_segments, convert_segmentation_to_rle, @@ -55,18 +51,11 @@ ) -if is_torch_available(): - import torch - from torch import nn - - if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F - -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/mask2former/modular_mask2former.py b/src/transformers/models/mask2former/modular_mask2former.py index 9efbd8bdd340..c5f3f58fedbb 100644 --- a/src/transformers/models/mask2former/modular_mask2former.py +++ b/src/transformers/models/mask2former/modular_mask2former.py @@ -14,11 +14,13 @@ # limitations under the License. from typing import Optional +import torch +from torch import nn + from transformers.models.maskformer.image_processing_maskformer_fast import MaskFormerImageProcessorFast from ...utils import ( TensorType, - is_torch_available, logging, ) from .image_processing_mask2former import ( @@ -28,11 +30,6 @@ ) -if is_torch_available(): - import torch - from torch import nn - - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/maskformer/image_processing_maskformer_fast.py b/src/transformers/models/maskformer/image_processing_maskformer_fast.py index 0b1c95aa1012..ab6411f1bb3f 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer_fast.py +++ b/src/transformers/models/maskformer/image_processing_maskformer_fast.py @@ -18,6 +18,9 @@ import warnings from typing import TYPE_CHECKING, Any, Optional, Union +import torch +from torch import nn + from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -39,8 +42,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, ) @@ -52,6 +53,11 @@ ) +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F + logger = logging.get_logger(__name__) @@ -59,18 +65,6 @@ from transformers import MaskFormerForInstanceSegmentationOutput -if is_torch_available(): - import torch - from torch import nn - - -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - -elif is_torchvision_available(): - from torchvision.transforms import functional as F - - def convert_segmentation_map_to_binary_masks_fast( segmentation_map: "torch.Tensor", instance_id_to_semantic_id: Optional[dict[int, int]] = None, diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py index e50d71025d54..97ca39da78bf 100644 --- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -36,20 +38,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class MobileNetV2FastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/mobilevit/image_processing_mobilevit_fast.py b/src/transformers/models/mobilevit/image_processing_mobilevit_fast.py index 442f88a3a848..71c8ababba36 100644 --- a/src/transformers/models/mobilevit/image_processing_mobilevit_fast.py +++ b/src/transformers/models/mobilevit/image_processing_mobilevit_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -34,20 +36,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class MobileVitFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/nougat/image_processing_nougat_fast.py b/src/transformers/models/nougat/image_processing_nougat_fast.py index ebe37389f3f6..d6579029e4f5 100644 --- a/src/transformers/models/nougat/image_processing_nougat_fast.py +++ b/src/transformers/models/nougat/image_processing_nougat_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -38,20 +40,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class NougatFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/oneformer/image_processing_oneformer_fast.py b/src/transformers/models/oneformer/image_processing_oneformer_fast.py index 10869f50f622..20b34bb7fd39 100644 --- a/src/transformers/models/oneformer/image_processing_oneformer_fast.py +++ b/src/transformers/models/oneformer/image_processing_oneformer_fast.py @@ -16,6 +16,9 @@ from typing import Optional, Union +import torch +from torch import nn + from ...image_processing_utils_fast import ( BaseImageProcessorFast, BatchFeature, @@ -36,25 +39,18 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, ) from .image_processing_oneformer import load_metadata, prepare_metadata -logger = logging.get_logger(__name__) - -if is_torch_available(): - import torch - from torch import nn +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +logger = logging.get_logger(__name__) def make_pixel_mask(image: "torch.Tensor", output_size: tuple[int, int]) -> "torch.Tensor": diff --git a/src/transformers/models/ovis2/image_processing_ovis2_fast.py b/src/transformers/models/ovis2/image_processing_ovis2_fast.py index f12a9c70ee57..07fbf82f9fbe 100644 --- a/src/transformers/models/ovis2/image_processing_ovis2_fast.py +++ b/src/transformers/models/ovis2/image_processing_ovis2_fast.py @@ -15,6 +15,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -33,21 +35,15 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) from .image_processing_ovis2 import get_min_tile_covering_grid, get_optimal_tiled_canvas -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class Ovis2ImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/owlv2/image_processing_owlv2_fast.py b/src/transformers/models/owlv2/image_processing_owlv2_fast.py index 926da9b27ffc..70441feba3c2 100644 --- a/src/transformers/models/owlv2/image_processing_owlv2_fast.py +++ b/src/transformers/models/owlv2/image_processing_owlv2_fast.py @@ -22,6 +22,8 @@ import warnings from typing import TYPE_CHECKING, Optional, Union +import torch + from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import center_to_corners_format, group_images_by_shape, reorder_images from ...image_utils import ( @@ -33,22 +35,13 @@ SizeDict, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, -) - - -if is_torch_available(): - import torch +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available +from .image_processing_owlv2 import _scale_boxes, box_iou if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F @@ -56,10 +49,6 @@ from .modeling_owlv2 import Owlv2ObjectDetectionOutput -if is_torch_available(): - from .image_processing_owlv2 import _scale_boxes, box_iou - - class Owlv2FastImageProcessorKwargs(DefaultFastImageProcessorKwargs): ... diff --git a/src/transformers/models/owlv2/modular_owlv2.py b/src/transformers/models/owlv2/modular_owlv2.py index 7fe4d75ee9ea..2e6d917a791a 100644 --- a/src/transformers/models/owlv2/modular_owlv2.py +++ b/src/transformers/models/owlv2/modular_owlv2.py @@ -17,6 +17,8 @@ import warnings from typing import Optional, Union +import torch + from ...image_processing_utils_fast import ( BaseImageProcessorFast, BatchFeature, @@ -35,20 +37,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) from ..owlvit.image_processing_owlvit_fast import OwlViTImageProcessorFast -if is_torch_available(): - import torch - - if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/owlvit/image_processing_owlvit_fast.py b/src/transformers/models/owlvit/image_processing_owlvit_fast.py index 8689ac72dc44..1e458f964a04 100644 --- a/src/transformers/models/owlvit/image_processing_owlvit_fast.py +++ b/src/transformers/models/owlvit/image_processing_owlvit_fast.py @@ -17,22 +17,19 @@ import warnings from typing import TYPE_CHECKING, Optional, Union +import torch + from ...image_processing_utils_fast import BaseImageProcessorFast from ...image_transforms import center_to_corners_format from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling -from ...utils import TensorType, auto_docstring, is_torch_available, logging +from ...utils import TensorType, auto_docstring, logging +from .image_processing_owlvit import _scale_boxes, box_iou if TYPE_CHECKING: from .modeling_owlvit import OwlViTObjectDetectionOutput -if is_torch_available(): - import torch - - from .image_processing_owlvit import _scale_boxes, box_iou - - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/perceiver/image_processing_perceiver_fast.py b/src/transformers/models/perceiver/image_processing_perceiver_fast.py index ecd7f938f569..82c1bcd9d319 100644 --- a/src/transformers/models/perceiver/image_processing_perceiver_fast.py +++ b/src/transformers/models/perceiver/image_processing_perceiver_fast.py @@ -16,26 +16,22 @@ from typing import Optional, Union +import torch + from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature from ...image_transforms import group_images_by_shape, reorder_images from ...image_utils import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, PILImageResampling, SizeDict from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F @auto_docstring diff --git a/src/transformers/models/perception_lm/image_processing_perception_lm_fast.py b/src/transformers/models/perception_lm/image_processing_perception_lm_fast.py index c8b7c52d9a23..be55c39572d5 100644 --- a/src/transformers/models/perception_lm/image_processing_perception_lm_fast.py +++ b/src/transformers/models/perception_lm/image_processing_perception_lm_fast.py @@ -17,6 +17,8 @@ from typing import Optional, Union import numpy as np +import torch +from torchvision.transforms import functional as F from ...image_processing_utils import ( BatchFeature, @@ -35,19 +37,7 @@ PILImageResampling, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, -) - - -if is_torch_available(): - import torch - -if is_torchvision_available(): - from torchvision.transforms import functional as F +from ...utils import TensorType, auto_docstring class PerceptionLMFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py b/src/transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py index 1f079005b01e..532136f8108e 100644 --- a/src/transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +++ b/src/transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py @@ -23,24 +23,19 @@ DefaultFastImageProcessorKwargs, Unpack, ) -from ...image_utils import ImageInput, SizeDict +from ...image_utils import ImageInput, PILImageResampling, SizeDict from ...utils import ( TensorType, auto_docstring, - is_torchvision_available, is_torchvision_v2_available, - is_vision_available, logging, ) -if is_vision_available(): - from ...image_utils import PILImageResampling -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/models/pixtral/image_processing_pixtral_fast.py b/src/transformers/models/pixtral/image_processing_pixtral_fast.py index 585405627023..db3e75760318 100644 --- a/src/transformers/models/pixtral/image_processing_pixtral_fast.py +++ b/src/transformers/models/pixtral/image_processing_pixtral_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -28,28 +30,18 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, - is_vision_available, logging, ) from .image_processing_pixtral import get_resize_output_image_size -logger = logging.get_logger(__name__) - -if is_torch_available(): - import torch +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F -if is_torchvision_available(): - if is_vision_available(): - pass - - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +logger = logging.get_logger(__name__) class PixtralFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/poolformer/image_processing_poolformer_fast.py b/src/transformers/models/poolformer/image_processing_poolformer_fast.py index 8fefa80be432..70c6ed55bc8a 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer_fast.py +++ b/src/transformers/models/poolformer/image_processing_poolformer_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import ( ChannelDimension, @@ -36,20 +38,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class PoolFormerFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py b/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py index 4cb6c6732e90..763fd613c218 100644 --- a/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +++ b/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py @@ -23,6 +23,8 @@ if TYPE_CHECKING: from ...modeling_outputs import DepthEstimatorOutput +import torch + from ...image_processing_utils_fast import ( BaseImageProcessorFast, DefaultFastImageProcessorKwargs, @@ -40,21 +42,15 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, requires_backends, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F def _constrain_to_multiple_of(val, multiple, min_val=0, max_val=None): diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py index 33efe1929c06..80242a331ace 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py @@ -21,6 +21,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -40,8 +42,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, ) @@ -49,15 +49,10 @@ from .image_processing_qwen2_vl import smart_resize -if is_torch_available(): - import torch - - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py index eefc45bf9f9a..68c5497b0205 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py @@ -7,6 +7,8 @@ import pathlib from typing import Any, Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -29,25 +31,14 @@ validate_annotations, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, - requires_backends, -) +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, requires_backends from ...utils.import_utils import requires from .image_processing_rt_detr import get_size_with_aspect_ratio -if is_torch_available(): - import torch - - if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/rt_detr/modular_rt_detr.py b/src/transformers/models/rt_detr/modular_rt_detr.py index 938f070d3672..760e4a6675cf 100644 --- a/src/transformers/models/rt_detr/modular_rt_detr.py +++ b/src/transformers/models/rt_detr/modular_rt_detr.py @@ -1,6 +1,8 @@ import pathlib from typing import Optional, Union +import torch + from transformers.models.detr.image_processing_detr_fast import DetrFastImageProcessorKwargs, DetrImageProcessorFast from ...image_processing_utils import BatchFeature @@ -20,21 +22,15 @@ from ...processing_utils import Unpack from ...utils import ( TensorType, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, requires_backends, ) -if is_torch_available(): - import torch - - if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/sam/image_processing_sam_fast.py b/src/transformers/models/sam/image_processing_sam_fast.py index 1bfb6adf5234..ba75e73c8680 100644 --- a/src/transformers/models/sam/image_processing_sam_fast.py +++ b/src/transformers/models/sam/image_processing_sam_fast.py @@ -21,6 +21,8 @@ import numpy as np import torch +from torch.nn import functional as F +from torchvision.ops.boxes import batched_nms from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( @@ -37,23 +39,12 @@ pil_torch_interpolation_mapping, ) from ...processing_utils import Unpack -from ...utils import ( - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, -) - +from ...utils import auto_docstring, is_torchvision_v2_available -if is_torch_available(): - import torch - from torch.nn import functional as F if is_torchvision_v2_available(): - from torchvision.ops.boxes import batched_nms from torchvision.transforms.v2 import functional as F_t -elif is_torchvision_available(): - from torchvision.ops.boxes import batched_nms +else: from torchvision.transforms import functional as F_t diff --git a/src/transformers/models/sam2/image_processing_sam2_fast.py b/src/transformers/models/sam2/image_processing_sam2_fast.py index 8cb5381f0977..a55188f4e786 100644 --- a/src/transformers/models/sam2/image_processing_sam2_fast.py +++ b/src/transformers/models/sam2/image_processing_sam2_fast.py @@ -26,6 +26,7 @@ import numpy as np import torch import torch.nn.functional as F +from torchvision.ops.boxes import batched_nms from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import BaseImageProcessorFast, DefaultFastImageProcessorKwargs @@ -42,17 +43,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_available, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.ops.boxes import batched_nms -elif is_torchvision_available(): - from torchvision.ops.boxes import batched_nms - - class Sam2FastImageProcessorKwargs(DefaultFastImageProcessorKwargs): r""" mask_size (`dict[str, int]`, *optional*): diff --git a/src/transformers/models/sam2/modular_sam2.py b/src/transformers/models/sam2/modular_sam2.py index be2a5eb1c6d2..daab10855512 100644 --- a/src/transformers/models/sam2/modular_sam2.py +++ b/src/transformers/models/sam2/modular_sam2.py @@ -41,7 +41,6 @@ ModelOutput, TensorType, auto_docstring, - is_torch_available, logging, ) from ...utils.generic import TransformersKwargs, check_model_inputs @@ -68,11 +67,6 @@ ) -if is_torch_available(): - import torch - from torch.nn import functional as F - - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/sam2_video/modular_sam2_video.py b/src/transformers/models/sam2_video/modular_sam2_video.py index 9ba8e6526305..c0c9b3e1ef7a 100644 --- a/src/transformers/models/sam2_video/modular_sam2_video.py +++ b/src/transformers/models/sam2_video/modular_sam2_video.py @@ -36,7 +36,6 @@ from ...utils import ( ModelOutput, auto_docstring, - is_torch_available, is_torchvision_available, is_torchvision_v2_available, logging, @@ -60,12 +59,9 @@ from ..sam2.processing_sam2 import Sam2Processor -if is_torch_available(): - import torch - if is_torchvision_available() and is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/segformer/image_processing_segformer_fast.py b/src/transformers/models/segformer/image_processing_segformer_fast.py index 77ac7281ef1b..da4bef3e9ee8 100644 --- a/src/transformers/models/segformer/image_processing_segformer_fast.py +++ b/src/transformers/models/segformer/image_processing_segformer_fast.py @@ -21,6 +21,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -38,21 +40,12 @@ is_torch_tensor, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, -) - +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available -if is_torch_available(): - import torch if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/segformer/modular_segformer.py b/src/transformers/models/segformer/modular_segformer.py index fbf35afd820e..341e6949d8b7 100644 --- a/src/transformers/models/segformer/modular_segformer.py +++ b/src/transformers/models/segformer/modular_segformer.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from transformers.models.beit.image_processing_beit_fast import BeitFastImageProcessorKwargs, BeitImageProcessorFast from ...image_processing_utils import BatchFeature @@ -34,18 +36,13 @@ from ...processing_utils import Unpack from ...utils import ( TensorType, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/siglip2/image_processing_siglip2_fast.py b/src/transformers/models/siglip2/image_processing_siglip2_fast.py index bbab91961962..64dcfa1ad566 100644 --- a/src/transformers/models/siglip2/image_processing_siglip2_fast.py +++ b/src/transformers/models/siglip2/image_processing_siglip2_fast.py @@ -32,23 +32,16 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, ) from .image_processing_siglip2 import get_image_size_for_max_num_patches -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F - +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/models/smolvlm/video_processing_smolvlm.py b/src/transformers/models/smolvlm/video_processing_smolvlm.py index eda3bdb1c811..7e8e544b8fc7 100644 --- a/src/transformers/models/smolvlm/video_processing_smolvlm.py +++ b/src/transformers/models/smolvlm/video_processing_smolvlm.py @@ -21,7 +21,7 @@ from ...image_processing_utils import BatchFeature, get_size_dict from ...image_utils import IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, PILImageResampling, SizeDict from ...processing_utils import Unpack, VideosKwargs -from ...utils import TensorType, is_torchvision_v2_available +from ...utils import TensorType, is_torchvision_v2_available, logging from ...video_processing_utils import BaseVideoProcessor from ...video_utils import VideoMetadata, group_videos_by_shape, reorder_videos @@ -31,8 +31,6 @@ else: from torchvision.transforms import functional as F -from ...utils import logging - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/superpoint/image_processing_superpoint_fast.py b/src/transformers/models/superpoint/image_processing_superpoint_fast.py index e70bb397ff6a..a752e08ac5f0 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint_fast.py +++ b/src/transformers/models/superpoint/image_processing_superpoint_fast.py @@ -16,6 +16,8 @@ from typing import TYPE_CHECKING, Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -31,21 +33,16 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - if TYPE_CHECKING: from .modeling_superpoint import SuperPointKeypointDescriptionOutput if is_torchvision_v2_available(): import torchvision.transforms.v2.functional as F -elif is_torchvision_available(): +else: import torchvision.transforms.functional as F diff --git a/src/transformers/models/swin2sr/image_processing_swin2sr_fast.py b/src/transformers/models/swin2sr/image_processing_swin2sr_fast.py index f99ab99274f5..c10bd5081754 100644 --- a/src/transformers/models/swin2sr/image_processing_swin2sr_fast.py +++ b/src/transformers/models/swin2sr/image_processing_swin2sr_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature, ChannelDimension, get_image_size from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -28,24 +30,18 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, ) from ...utils.deprecation import deprecate_kwarg -logger = logging.get_logger(__name__) - -if is_torch_available(): - import torch +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +logger = logging.get_logger(__name__) class Swin2SRFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/textnet/image_processing_textnet_fast.py b/src/transformers/models/textnet/image_processing_textnet_fast.py index 41b201a5c4ee..2f5ef22ef5e3 100644 --- a/src/transformers/models/textnet/image_processing_textnet_fast.py +++ b/src/transformers/models/textnet/image_processing_textnet_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import BaseImageProcessorFast, DefaultFastImageProcessorKwargs from ...image_transforms import ( @@ -35,20 +37,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class TextNetFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/tvp/image_processing_tvp_fast.py b/src/transformers/models/tvp/image_processing_tvp_fast.py index b96e4991f619..e7fe7e621d8c 100644 --- a/src/transformers/models/tvp/image_processing_tvp_fast.py +++ b/src/transformers/models/tvp/image_processing_tvp_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -32,23 +34,13 @@ make_nested_list_of_images, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, -) - +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available -if is_torch_available(): - import torch -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F class TvpFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/vilt/image_processing_vilt_fast.py b/src/transformers/models/vilt/image_processing_vilt_fast.py index 1c169994ba3f..79e601648c55 100644 --- a/src/transformers/models/vilt/image_processing_vilt_fast.py +++ b/src/transformers/models/vilt/image_processing_vilt_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -28,20 +30,14 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F # Set maximum size based on the typical aspect ratio of the COCO dataset MAX_LONGER_EDGE = 1333 diff --git a/src/transformers/models/vitmatte/image_processing_vitmatte_fast.py b/src/transformers/models/vitmatte/image_processing_vitmatte_fast.py index 014a6939af5c..ae8797789df8 100644 --- a/src/transformers/models/vitmatte/image_processing_vitmatte_fast.py +++ b/src/transformers/models/vitmatte/image_processing_vitmatte_fast.py @@ -16,6 +16,8 @@ from typing import Optional, Union +import torch + from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -35,22 +37,15 @@ TensorType, auto_docstring, filter_out_non_signature_kwargs, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, ) -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F - +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/models/yolos/image_processing_yolos_fast.py b/src/transformers/models/yolos/image_processing_yolos_fast.py index 81fb0b008e0d..fda06dfc522a 100644 --- a/src/transformers/models/yolos/image_processing_yolos_fast.py +++ b/src/transformers/models/yolos/image_processing_yolos_fast.py @@ -7,6 +7,9 @@ import pathlib from typing import Any, Optional, Union +import torch +from torchvision.io import read_image + from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -29,27 +32,13 @@ validate_annotations, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, - is_torch_available, - is_torchvision_available, - is_torchvision_v2_available, - logging, -) +from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, logging from ...utils.import_utils import requires -if is_torch_available(): - import torch - - if is_torchvision_v2_available(): - from torchvision.io import read_image from torchvision.transforms.v2 import functional as F - -elif is_torchvision_available(): - from torchvision.io import read_image +else: from torchvision.transforms import functional as F diff --git a/src/transformers/models/yolos/modular_yolos.py b/src/transformers/models/yolos/modular_yolos.py index d1391008227c..13f3db41b675 100644 --- a/src/transformers/models/yolos/modular_yolos.py +++ b/src/transformers/models/yolos/modular_yolos.py @@ -1,19 +1,16 @@ from typing import Optional, Union +import torch + from transformers.models.detr.image_processing_detr_fast import DetrImageProcessorFast from ...image_transforms import center_to_corners_format from ...utils import ( TensorType, - is_torch_available, logging, ) -if is_torch_available(): - import torch - - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/zoedepth/image_processing_zoedepth_fast.py b/src/transformers/models/zoedepth/image_processing_zoedepth_fast.py index c89ec8b2ebf1..7967932729e5 100644 --- a/src/transformers/models/zoedepth/image_processing_zoedepth_fast.py +++ b/src/transformers/models/zoedepth/image_processing_zoedepth_fast.py @@ -20,6 +20,7 @@ ) import numpy as np +import torch from ...image_processing_utils import ( BatchFeature, @@ -43,8 +44,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torch_available, - is_torchvision_available, is_torchvision_v2_available, logging, requires_backends, @@ -53,16 +52,10 @@ from .modeling_zoedepth import ZoeDepthDepthEstimatorOutput -if is_torch_available(): - import torch - -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F - - from torchvision.transforms import InterpolationMode +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +else: + from torchvision.transforms import functional as F logger = logging.get_logger(__name__) @@ -296,7 +289,7 @@ def post_process_depth_estimation( depth = F.resize( depth, size=[source_size[0] + 2 * pad_h, source_size[1] + 2 * pad_w], - interpolation=InterpolationMode.BICUBIC, + interpolation=F.InterpolationMode.BICUBIC, antialias=False, ) @@ -310,7 +303,7 @@ def post_process_depth_estimation( depth = F.resize( depth, size=target_size, - interpolation=InterpolationMode.BICUBIC, + interpolation=F.InterpolationMode.BICUBIC, antialias=False, ) depth = depth.squeeze(0) diff --git a/src/transformers/video_processing_utils.py b/src/transformers/video_processing_utils.py index 9f6545ebe10e..4d0e9c58f314 100644 --- a/src/transformers/video_processing_utils.py +++ b/src/transformers/video_processing_utils.py @@ -68,11 +68,11 @@ if is_torch_available(): import torch -if is_torchvision_available(): - if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F - else: - from torchvision.transforms import functional as F +if is_torchvision_v2_available(): + from torchvision.transforms.v2 import functional as F +elif is_torchvision_available(): + from torchvision.transforms import functional as F + logger = logging.get_logger(__name__)