From 2d4f74ecc8a9118665a9d54c2a9c3debc4c09515 Mon Sep 17 00:00:00 2001 From: boomb0om Date: Mon, 13 May 2024 16:19:18 +0300 Subject: [PATCH 1/2] feat: add videos cutting --- DPF/transforms/video_ffmpeg_transforms.py | 34 +++++++++++++++++++---- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/DPF/transforms/video_ffmpeg_transforms.py b/DPF/transforms/video_ffmpeg_transforms.py index 9c2edac..64c6833 100644 --- a/DPF/transforms/video_ffmpeg_transforms.py +++ b/DPF/transforms/video_ffmpeg_transforms.py @@ -34,17 +34,27 @@ def __init__( resizer: Optional[Resizer] = None, fps: Optional[int] = None, fps_eps: float = 0.1, + cut_start_col: Optional[str] = None, + cut_duration_col: Optional[str] = None, + copy_stream: bool = False, + preset: Optional[str] = None, + crf: Optional[int] = None, + copy_audio_stream: bool = True, pool_type: PoolOptions = 'threads', workers: int = 16, pbar: bool = True, - preset: Optional[str] = None, - crf: Optional[int] = None, - copy_audio_stream: bool = True ): super().__init__(pool_type, workers, pbar) self.resizer = resizer self.fps = fps self.fps_eps = fps_eps + self.cut_start_col = cut_start_col + self.cut_duration_col = cut_duration_col + if self.cut_duration_col or self.cut_start_col: + assert self.cut_duration_col and self.cut_start_col, f"Both {self.cut_duration_col} and {self.cut_start_col} must be specified" + self.copy_when_cut = copy_stream + if self.copy_when_cut: + assert self.copy_when_cut and not (self.fps or self.resizer), "Copy stream can be used only for cutting videos" self.preset = preset self.crf = crf @@ -53,7 +63,7 @@ def __init__( self.default_args = ' '.join(self.get_default_ffmpeg_args()) assert is_ffmpeg_installed(), "Install ffmpeg first" - assert self.resizer or self.fps, "At least one transform should be specified" + assert self.resizer or self.fps or self.cut_start_col, "At least one transform should be specified" def get_default_ffmpeg_args(self) -> list[str]: args = [] @@ -72,6 +82,8 @@ def required_metadata(self) -> list[str]: meta += ['width', 'height'] if self.fps: meta += ['fps'] + if self.cut_duration_col and self.cut_start_col: + meta += [self.cut_start_col, self.cut_duration_col] return meta @property @@ -90,6 +102,7 @@ def modality(self) -> str: def _process_filepath(self, data: TransformsFileData) -> TransformsFileData: filepath = data.filepath ext = filepath.split('.')[-1] + ffmpeg_args_start: list[str] = [] ffmpeg_args_map: dict[str, list[str]] = {} result_metadata: dict[str, Any] = {} @@ -110,10 +123,21 @@ def _process_filepath(self, data: TransformsFileData) -> TransformsFileData: video_fps = float(self.fps) result_metadata['fps'] = video_fps + if self.cut_start_col and self.cut_duration_col and data.metadata[self.cut_start_col] is not None: + start = data.metadata[self.cut_start_col] + cut_duration = data.metadata[self.cut_duration_col] + ffmpeg_args_start.append(f'-ss {start}') + ffmpeg_args_map['-t'] = [str(cut_duration)] + if self.copy_when_cut: + ffmpeg_args_map['-c'] = ['copy'] + ffmpeg_args_map['-avoid_negative_ts'] = ['1'] + if len(ffmpeg_args_map) > 0: args_str = convert_ffmpeg_args_to_str(ffmpeg_args_map) + args_start_str = ' '.join(ffmpeg_args_start) temp_filename = str(uuid.uuid4()) + '.' + ext - ffmpeg_command = f'ffmpeg -hide_banner -i {filepath} {args_str} {self.default_args} {temp_filename} -y' + ffmpeg_command = f'ffmpeg -hide_banner {args_start_str} -i {filepath} {args_str} {self.default_args} {temp_filename} -y' + print(ffmpeg_command) subprocess.run(ffmpeg_command, shell=True, capture_output=True, check=True) shutil.move(temp_filename, filepath) From 99f6c6fe9bb1a96119c1563b17372202100a5404 Mon Sep 17 00:00:00 2001 From: boomb0om Date: Mon, 13 May 2024 16:23:04 +0300 Subject: [PATCH 2/2] docs: update doc --- DPF/transforms/video_ffmpeg_transforms.py | 1 - docs/transforms.md | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/DPF/transforms/video_ffmpeg_transforms.py b/DPF/transforms/video_ffmpeg_transforms.py index 64c6833..1b7c382 100644 --- a/DPF/transforms/video_ffmpeg_transforms.py +++ b/DPF/transforms/video_ffmpeg_transforms.py @@ -137,7 +137,6 @@ def _process_filepath(self, data: TransformsFileData) -> TransformsFileData: args_start_str = ' '.join(ffmpeg_args_start) temp_filename = str(uuid.uuid4()) + '.' + ext ffmpeg_command = f'ffmpeg -hide_banner {args_start_str} -i {filepath} {args_str} {self.default_args} {temp_filename} -y' - print(ffmpeg_command) subprocess.run(ffmpeg_command, shell=True, capture_output=True, check=True) shutil.move(temp_filename, filepath) diff --git a/docs/transforms.md b/docs/transforms.md index 8b5c7ca..fc2db92 100644 --- a/docs/transforms.md +++ b/docs/transforms.md @@ -38,4 +38,19 @@ transforms = VideoFFMPEGTransforms( workers=8 ) processor.apply_transform(transforms) +``` + +Fast video cutting: +_cut_start_ and _cut_duration_ columns should have values in seconds. +For example, cutting video from 7 to 11 second should be specified in dataframe as: _cut_start_ = 7, _cut_duration_ = 4 +```python +from DPF.transforms import VideoFFMPEGTransforms + +transforms = VideoFFMPEGTransforms( + cut_start_col='cut_start', + cut_duration_col='cut_duration', + copy_stream=True, + workers=4, +) +processor.apply_transform(transforms) ``` \ No newline at end of file