In [3]:
import os
from scenedetect import open_video, SceneManager
from scenedetect.detectors import ContentDetector

In [49]:
import scenedetect
scenedetect.__version__

'0.6.5.2'

In [14]:
'asdasd.mp3'.endswith(('.mp4', '.mov', '.mkv'))

False

In [69]:
RAW_DATA_DIR = '../../data/raw'
ACCEPTED_VIDEO_EXTENSIONS = ('.mp4', '.mov', '.mkv')
OUTPUT_VIDEO_DIR = '../../data/processed/video/{file_dir_name}'
OUTPUT_AUDIO_DIR = '../../data/processed/audio/{file_dir_name}'
OUTPUT_METADATA_DIR = '../../data/processed/metadata'
raw_files_dir = os.listdir(RAW_DATA_DIR)

video_path = "../../data/raw/tt0816692/tt0816692.mov"

In [36]:
def _get_file_paths(directory: str, extensions: str) -> list:
    #generate documentations
    """
    Get all the files in a directory with a specific extension.
    :param directory: The directory to search for files.
    :type directory: str
    :param extensions: The extensions of the files to search for.
    :type extensions: tuple
    :return: A list of the files in the directory with the specified extensions.
    :rtype: list
    """
    files = []
    try:
        with os.scandir(directory) as entries:
            for entry in entries:
                if entry.is_dir():
                    subdirectorio = entry.path
                    with os.scandir(subdirectorio) as subentries:
                        for subentry in subentries:
                            if subentry.is_file() and subentry.name.endswith(extensions):
                                files.append(subentry.path)
    except Exception as e:
        print(f"Error: {e}")
    return files

In [73]:
from scenedetect import open_video, SceneManager, split_video_ffmpeg
from scenedetect.detectors import ContentDetector
from scenedetect.video_splitter import split_video_ffmpeg
from typing import Iterable, Tuple
import json

def _save_shots_info(
    shot_list: Iterable[Tuple[scenedetect.frame_timecode.FrameTimecode, scenedetect.frame_timecode.FrameTimecode]],
    file_video_name: str,
    output_dir: str,
    ):
    
    imdb_id = file_video_name.split('.')[0]
    metadata = {
        'imdb_id': imdb_id,
        'shots_video_info': []
    }
    for shot in shot_list:
        start_time = shot[0].get_timecode()
        end_time = shot[1].get_timecode()

        metadata['shots_video_info'].append({
            'shot_number': f'{shot_list.index(shot) + 1:03d}',
            'start_time': start_time,
            'end_time': end_time,
        })

    with open(f'{output_dir}/{imdb_id}.json', 'w') as f:
        json.dump(metadata, f, indent=4)


def _get_scene_list(
        video_path:str,
        threshold:float = 27.0
    ) -> list:

    video = open_video(video_path)
    scene_manager = SceneManager()
    scene_manager.add_detector(ContentDetector(threshold=threshold))
    scene_manager.detect_scenes(video, show_progress=True)
    scene_list = scene_manager.get_scene_list()
    
    return scene_list
    
def _save_video_shots(scene_list: list, video_path: str, output_dir: str):
    split_video_ffmpeg(
        video_path,
        scene_list=scene_list,
        show_progress=True,
        output_dir=output_dir.format(file_dir_name=video_path.split('/')[-2]),
        output_file_template='$VIDEO_NAME-Shot-$SCENE_NUMBER.mp4',
    )

# def _save_audio_shots(scene_list: list, video_path: str, output_dir: str):
#     split_video_ffmpeg(
#         video_path,
#         scene_list=scene_list,
#         show_progress=True,
#         output_dir=output_dir,
#         output_file_template='$VIDEO_NAME-Shot-$SCENE_NUMBER.mp3',
#         video_output=False,
#     )

def run():
    threshold = 27.0
    scene_list = _get_scene_list(video_path, threshold)
    _save_video_shots(scene_list, video_path, output_dir=OUTPUT_VIDEO_DIR)
    _save_shots_info(scene_list, OUTPUT_METADATA_DIR)

run()

  Detected: 74 | Progress: 100%|██████████| 8285/8285 [00:03<00:00, 2561.36frames/s]
100%|██████████| 8285/8285 [00:09<00:00, 892.08frame/s] 


In [52]:
from scenedetect import open_video, SceneManager, split_video_ffmpeg
?split_video_ffmpeg

[0;31mSignature:[0m
[0msplit_video_ffmpeg[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0minput_video_path[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mscene_list[0m[0;34m:[0m [0mIterable[0m[0;34m[[0m[0mTuple[0m[0;34m[[0m[0mscenedetect[0m[0;34m.[0m[0mframe_timecode[0m[0;34m.[0m[0mFrameTimecode[0m[0;34m,[0m [0mscenedetect[0m[0;34m.[0m[0mframe_timecode[0m[0;34m.[0m[0mFrameTimecode[0m[0;34m][0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moutput_dir[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mpathlib[0m[0;34m.[0m[0mPath[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moutput_file_template[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;34m'$VIDEO_NAME-Scene-$SCENE_NUMBER.mp4'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mvideo_name[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0marg_overr

In [35]:
_get_video_paths(RAW_DATA_DIR, ACCEPTED_VIDEO_EXTENSIONS)

['../../data/raw/tt0816692/tt0816692.mov']

In [38]:
video_path = "../../data/raw/tt0816692/tt0816692.mov"

video_manager = open_video(video_path)
scene_manager = SceneManager()
scene_manager.add_detector(ContentDetector(threshold=30.0))  # Ajusta el umbral según la sensibilidad deseada.

scene_manager.detect_scenes(video_manager)

scene_list = scene_manager.get_scene_list()

print(f"Se detectaron {len(scene_list)} escenas.")
for i, scene in enumerate(scene_list):
    print(f"Escena {i + 1}: {scene[0]} - {scene[1]}")


Se detectaron 72 escenas.
Escena 1: 00:00:00.000 - 00:00:01.217
Escena 2: 00:00:01.217 - 00:00:02.300
Escena 3: 00:00:02.300 - 00:00:03.633
Escena 4: 00:00:03.633 - 00:00:04.500
Escena 5: 00:00:04.500 - 00:00:05.233
Escena 6: 00:00:05.233 - 00:00:06.733
Escena 7: 00:00:06.733 - 00:00:07.367
Escena 8: 00:00:07.367 - 00:00:08.583
Escena 9: 00:00:08.583 - 00:00:09.383
Escena 10: 00:00:09.383 - 00:00:11.350
Escena 11: 00:00:11.350 - 00:00:12.867
Escena 12: 00:00:12.867 - 00:00:13.500
Escena 13: 00:00:13.500 - 00:00:15.850
Escena 14: 00:00:15.850 - 00:00:16.817
Escena 15: 00:00:16.817 - 00:00:18.300
Escena 16: 00:00:18.300 - 00:00:20.133
Escena 17: 00:00:20.133 - 00:00:22.550
Escena 18: 00:00:22.550 - 00:00:27.767
Escena 19: 00:00:27.767 - 00:00:28.033
Escena 20: 00:00:28.033 - 00:00:28.600
Escena 21: 00:00:28.600 - 00:00:29.250
Escena 22: 00:00:29.250 - 00:00:29.883
Escena 23: 00:00:29.883 - 00:00:30.767
Escena 24: 00:00:30.767 - 00:00:32.367
Escena 25: 00:00:32.367 - 00:00:33.567
Escena 2

In [45]:
scene_list[1][0].get_frames()

73

In [47]:
scene_list[1][1]

00:00:02.300 [frame=138, fps=60.000]