From 02853ff841aa52c12f8d0943b737edc6c070af44 Mon Sep 17 00:00:00 2001 From: tangyunxiang <546783735@qq.com> Date: Wed, 20 Mar 2024 12:17:52 +0800 Subject: [PATCH 1/5] replace the 'decord' with 'av' in VideoClassificationPipeline --- src/transformers/__init__.py | 2 ++ .../pipelines/video_classification.py | 21 ++++++++++++++----- src/transformers/testing_utils.py | 8 +++++++ src/transformers/utils/__init__.py | 1 + src/transformers/utils/import_utils.py | 16 ++++++++++++++ .../test_pipelines_video_classification.py | 4 ++-- 6 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 7c3b34ce6e505..37b029a1dafcf 100644 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -1077,6 +1077,7 @@ "add_end_docstrings", "add_start_docstrings", "is_apex_available", + "is_av_available", "is_bitsandbytes_available", "is_datasets_available", "is_decord_available", @@ -5934,6 +5935,7 @@ add_end_docstrings, add_start_docstrings, is_apex_available, + is_av_available, is_bitsandbytes_available, is_datasets_available, is_decord_available, diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py index f8596ce14c714..e3abf737514da 100644 --- a/src/transformers/pipelines/video_classification.py +++ b/src/transformers/pipelines/video_classification.py @@ -7,9 +7,9 @@ from .base import Pipeline, build_pipeline_init_args -if is_decord_available(): +if is_av_available(): import numpy as np - from decord import VideoReader + import av if is_torch_available(): @@ -90,14 +90,13 @@ def preprocess(self, video, num_frames=None, frame_sampling_rate=1): if video.startswith("http://") or video.startswith("https://"): video = BytesIO(requests.get(video).content) - videoreader = VideoReader(video) - videoreader.seek(0) + container = av.open(video) start_idx = 0 end_idx = num_frames * frame_sampling_rate - 1 indices = np.linspace(start_idx, end_idx, num=num_frames, dtype=np.int64) - video = videoreader.get_batch(indices).asnumpy() + video = read_video_pyav(container, indices) video = list(video) model_inputs = self.image_processor(video, return_tensors=self.framework) @@ -120,3 +119,15 @@ def postprocess(self, model_outputs, top_k=5): scores = scores.tolist() ids = ids.tolist() return [{"score": score, "label": self.model.config.id2label[_id]} for score, _id in zip(scores, ids)] + +def read_video_pyav(container, indices): + frames = [] + container.seek(0) + start_index = indices[0] + end_index = indices[-1] + for i, frame in enumerate(container.decode(video=0)): + if i > end_index: + break + if i >= start_index and i in indices: + frames.append(frame) + return np.stack([x.to_ndarray(format="rgb24") for x in frames]) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 8b7814163739c..44e0e1ebb2f64 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -57,6 +57,7 @@ is_aqlm_available, is_auto_awq_available, is_auto_gptq_available, + is_av_available, is_bitsandbytes_available, is_bs4_available, is_cv2_available, @@ -1010,6 +1011,13 @@ def require_aqlm(test_case): return unittest.skipUnless(is_aqlm_available(), "test requires aqlm")(test_case) +def require_av(test_case): + """ + Decorator marking a test that requires av + """ + return unittest.skipUnless(is_av_available(), "test requires av")(test_case) + + def require_bitsandbytes(test_case): """ Decorator marking a test that requires the bitsandbytes library. Will be skipped when the library or its hard dependency torch is not installed. diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index b8da221a8c914..a3f596d0e9559 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -109,6 +109,7 @@ is_aqlm_available, is_auto_awq_available, is_auto_gptq_available, + is_av_available, is_bitsandbytes_available, is_bs4_available, is_coloredlogs_available, diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 3835831e88a44..9382ca9528da8 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -94,6 +94,7 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[ _accelerate_available, _accelerate_version = _is_package_available("accelerate", return_version=True) _apex_available = _is_package_available("apex") _aqlm_available = _is_package_available("aqlm") +_av_available = importlib.util.find_spec("av") is not None _bitsandbytes_available = _is_package_available("bitsandbytes") _galore_torch_available = _is_package_available("galore_torch") # `importlib.metadata.version` doesn't work with `bs4` but `beautifulsoup4`. For `importlib.util.find_spec`, reversed. @@ -656,6 +657,10 @@ def is_aqlm_available(): return _aqlm_available +def is_av_available(): + return _av_available + + def is_ninja_available(): r""" Code comes from *torch.utils.cpp_extension.is_ninja_available()*. Returns `True` if the @@ -1012,6 +1017,16 @@ def is_mlx_available(): return _mlx_available +# docstyle-ignore +AV_IMPORT_ERROR = """ +{0} requires the PyAv library but it was not found in your environment. You can install it with: +``` +pip install av +``` +Please note that you may need to restart your runtime after installation. +""" + + # docstyle-ignore CV2_IMPORT_ERROR = """ {0} requires the OpenCV library but it was not found in your environment. You can install it with: @@ -1336,6 +1351,7 @@ def is_mlx_available(): BACKENDS_MAPPING = OrderedDict( [ + ("av", (is_av_available, AV_IMPORT_ERROR)), ("bs4", (is_bs4_available, BS4_IMPORT_ERROR)), ("cv2", (is_cv2_available, CV2_IMPORT_ERROR)), ("datasets", (is_datasets_available, DATASETS_IMPORT_ERROR)), diff --git a/tests/pipelines/test_pipelines_video_classification.py b/tests/pipelines/test_pipelines_video_classification.py index 33e06e30f5ae0..d23916bad84fa 100644 --- a/tests/pipelines/test_pipelines_video_classification.py +++ b/tests/pipelines/test_pipelines_video_classification.py @@ -21,7 +21,7 @@ from transformers.testing_utils import ( is_pipeline_test, nested_simplify, - require_decord, + require_av, require_tf, require_torch, require_torch_or_tf, @@ -34,7 +34,7 @@ @is_pipeline_test @require_torch_or_tf @require_vision -@require_decord +@require_av class VideoClassificationPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING From c106ae56427395a79a754434983761fd2438bf8d Mon Sep 17 00:00:00 2001 From: tangyunxiang <546783735@qq.com> Date: Wed, 20 Mar 2024 14:26:26 +0800 Subject: [PATCH 2/5] fix the check of backend in VideoClassificationPipeline --- src/transformers/pipelines/video_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py index e3abf737514da..16b221f29172d 100644 --- a/src/transformers/pipelines/video_classification.py +++ b/src/transformers/pipelines/video_classification.py @@ -3,7 +3,7 @@ import requests -from ..utils import add_end_docstrings, is_decord_available, is_torch_available, logging, requires_backends +from ..utils import add_end_docstrings, is_av_available, is_torch_available, logging, requires_backends from .base import Pipeline, build_pipeline_init_args @@ -33,7 +33,7 @@ class VideoClassificationPipeline(Pipeline): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - requires_backends(self, "decord") + requires_backends(self, "av") self.check_model_type(MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES) def _sanitize_parameters(self, top_k=None, num_frames=None, frame_sampling_rate=None): From 62c331d615fe2a092711e7e9442b44f94f812276 Mon Sep 17 00:00:00 2001 From: wanqiancheng <13541261013@163.com> Date: Mon, 25 Mar 2024 13:35:51 +0000 Subject: [PATCH 3/5] adjust the order of imports --- src/transformers/pipelines/video_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py index 16b221f29172d..f80b2933312b6 100644 --- a/src/transformers/pipelines/video_classification.py +++ b/src/transformers/pipelines/video_classification.py @@ -8,8 +8,8 @@ if is_av_available(): - import numpy as np import av + import numpy as np if is_torch_available(): From d747839940f58c0d0b4ad1f491dfb2c5a7d7388f Mon Sep 17 00:00:00 2001 From: wanqiancheng <13541261013@163.com> Date: Mon, 25 Mar 2024 14:02:44 +0000 Subject: [PATCH 4/5] format 'video_classification.py' --- .../pipelines/video_classification.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py index f80b2933312b6..d8c9492622611 100644 --- a/src/transformers/pipelines/video_classification.py +++ b/src/transformers/pipelines/video_classification.py @@ -3,7 +3,13 @@ import requests -from ..utils import add_end_docstrings, is_av_available, is_torch_available, logging, requires_backends +from ..utils import ( + add_end_docstrings, + is_av_available, + is_torch_available, + logging, + requires_backends, +) from .base import Pipeline, build_pipeline_init_args @@ -36,7 +42,9 @@ def __init__(self, *args, **kwargs): requires_backends(self, "av") self.check_model_type(MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES) - def _sanitize_parameters(self, top_k=None, num_frames=None, frame_sampling_rate=None): + def _sanitize_parameters( + self, top_k=None, num_frames=None, frame_sampling_rate=None + ): preprocess_params = {} if frame_sampling_rate is not None: preprocess_params["frame_sampling_rate"] = frame_sampling_rate @@ -118,7 +126,11 @@ def postprocess(self, model_outputs, top_k=5): scores = scores.tolist() ids = ids.tolist() - return [{"score": score, "label": self.model.config.id2label[_id]} for score, _id in zip(scores, ids)] + return [ + {"score": score, "label": self.model.config.id2label[_id]} + for score, _id in zip(scores, ids) + ] + def read_video_pyav(container, indices): frames = [] From e27cb41a63eaf56cd1e8fc65f2ae2eed14408046 Mon Sep 17 00:00:00 2001 From: wanqiancheng <13541261013@163.com> Date: Mon, 25 Mar 2024 14:10:00 +0000 Subject: [PATCH 5/5] format 'video_classification.py' with ruff --- src/transformers/pipelines/video_classification.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py index d8c9492622611..5702f23c5f609 100644 --- a/src/transformers/pipelines/video_classification.py +++ b/src/transformers/pipelines/video_classification.py @@ -42,9 +42,7 @@ def __init__(self, *args, **kwargs): requires_backends(self, "av") self.check_model_type(MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES) - def _sanitize_parameters( - self, top_k=None, num_frames=None, frame_sampling_rate=None - ): + def _sanitize_parameters(self, top_k=None, num_frames=None, frame_sampling_rate=None): preprocess_params = {} if frame_sampling_rate is not None: preprocess_params["frame_sampling_rate"] = frame_sampling_rate @@ -126,10 +124,7 @@ def postprocess(self, model_outputs, top_k=5): scores = scores.tolist() ids = ids.tolist() - return [ - {"score": score, "label": self.model.config.id2label[_id]} - for score, _id in zip(scores, ids) - ] + return [{"score": score, "label": self.model.config.id2label[_id]} for score, _id in zip(scores, ids)] def read_video_pyav(container, indices):