<a href="https://colab.research.google.com/github/detektor777/colab_list_video/blob/main/split_video.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://drive.google.com/drive

In [None]:
#@title ##**Select Video File** { display-mode: "form" }
import os
import ipywidgets as widgets
from IPython.display import display, clear_output
from google.colab import files
from google.colab import drive

upload_option = "Load from Google Drive Root"  #@param ["Upload from PC", "Load from Google Drive Root", "Load from Google Drive"]

file_name = None
last_selected_button = None

def reset_button_colors(buttons):
    for btn in buttons:
        btn.style.button_color = None

if upload_option == "Upload from PC":
    print("Please upload a video file.")
    uploaded = files.upload()
    if uploaded:
        file_name = list(uploaded.keys())[0]
    else:
        print("No file uploaded.")
        file_name = None

elif upload_option == "Load from Google Drive Root":
    drive.mount('/content/drive')
    root_dir = '/content/drive/MyDrive/'

    video_extensions = ['.mp4', '.mkv', '.avi', '.mov']
    files_list = []

    for f in os.listdir(root_dir):
        if os.path.isfile(os.path.join(root_dir, f)) and os.path.splitext(f)[1].lower() in video_extensions:
            files_list.append(f)

    if not files_list:
        print("No video files found in Google Drive root.")
        file_name = None
    else:
        print("Select a video file from Google Drive root:")

        output = widgets.Output()
        buttons = []

        def on_button_clicked(b):
            global file_name, last_selected_button
            with output:
                clear_output()
                reset_button_colors(buttons)
                selected_file = b.description
                file_name = os.path.join(root_dir, selected_file)

                if file_name and os.path.exists(file_name):
                    b.style.button_color = 'green'
                else:
                    b.style.button_color = 'red'

                last_selected_button = b
                print(f"Selected file: {file_name if file_name else 'None'}")

        for file in files_list:
            button = widgets.Button(description=file, layout=widgets.Layout(width='500px', overflow='hidden', text_overflow='ellipsis'))
            button.on_click(on_button_clicked)
            buttons.append(button)

        display(widgets.VBox(buttons), output)

elif upload_option == "Load from Google Drive":
    drive.mount('/content/drive')
    root_dir = '/content/drive/MyDrive/'

    video_extensions = ['.mp4', '.mkv', '.avi', '.mov']
    files_list = []

    for dirpath, _, filenames in os.walk(root_dir):
        for f in filenames:
            if os.path.splitext(f)[1].lower() in video_extensions:
                relative_path = os.path.relpath(os.path.join(dirpath, f), root_dir)
                files_list.append(relative_path)

    if not files_list:
        print("No video files found in Google Drive or its subfolders.")
        file_name = None
    else:
        print("Select a video file from Google Drive (including subfolders):")

        output = widgets.Output()
        buttons = []

        def on_button_clicked(b):
            global file_name, last_selected_button
            with output:
                clear_output()
                reset_button_colors(buttons)
                selected_file = b.description
                file_name = os.path.join(root_dir, selected_file)

                if file_name and os.path.exists(file_name):
                    b.style.button_color = 'green'
                else:
                    b.style.button_color = 'red'

                last_selected_button = b
                print(f"Selected file: {file_name if file_name else 'None'}")

        for file in files_list:
            button = widgets.Button(description=file, layout=widgets.Layout(width='500px', overflow='hidden', text_overflow='ellipsis'))
            button.on_click(on_button_clicked)
            buttons.append(button)

        display(widgets.VBox(buttons), output)

if file_name:
    print(f"Video file path set to: {file_name}")
else:
    print("Video file path not set. Please select a file.")

In [None]:
#@title ##**Video Split Parameters** { display-mode: "form" }
segment_duration = 10  #@param {type:"integer"}
split_method = "Precise (with re-encoding)" #@param ["Fast (without re-encoding)", "Precise (with re-encoding)"]

print(f"Video will be split into segments of {segment_duration} seconds each")
print(f"Method: {split_method}")

In [None]:
#@title ##**Split Video and Create Archive** { display-mode: "form" }
import subprocess
import zipfile
import shutil
import json
from pathlib import Path
from IPython.display import HTML, display, clear_output
import time

def show_progress(current, total, text=""):
    """Показать прогресс-бар"""
    percent = int((current / total) * 100)
    bar_length = 50
    filled_length = int(bar_length * current // total)
    bar = '█' * filled_length + '░' * (bar_length - filled_length)

    clear_output(wait=True)
    print(f"\n{text}")
    print(f"Progress: |{bar}| {percent}% ({current}/{total})")

def get_video_info(video_path):
    """Получить информацию о видео используя ffprobe"""
    cmd = [
        'ffprobe', '-v', 'error',
        '-select_streams', 'v:0',
        '-show_entries', 'stream=duration,r_frame_rate,nb_frames',
        '-of', 'json',
        video_path
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    info = json.loads(result.stdout)

    duration = float(info['streams'][0]['duration'])
    fps_str = info['streams'][0]['r_frame_rate']
    fps_num, fps_den = map(int, fps_str.split('/'))
    fps = fps_num / fps_den

    return duration, fps

def split_video_fast_accurate(video_path, segment_duration, output_dir):
    """Разделить видео без перекодирования с максимальной точностью"""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Получить информацию о видео
    duration, fps = get_video_info(video_path)

    print(f"Video duration: {duration:.2f} seconds")
    print(f"FPS: {fps:.2f}")

    # Рассчитать количество сегментов
    num_segments = int(duration / segment_duration) + (1 if duration % segment_duration > 0 else 0)

    print(f"\nTotal segments to create: {num_segments}")

    output_files = []
    segments_info = []

    # Разделить видео по сегментам
    for i in range(num_segments):
        start_time = i * segment_duration
        end_time = min((i + 1) * segment_duration, duration)
        duration_seg = end_time - start_time

        output_file = os.path.join(output_dir, f"segment_{i:04d}.mp4")

        # Показать прогресс
        show_progress(i + 1, num_segments, f"Splitting video into {num_segments} segments")

        # Использовать точные параметры для разделения
        cmd = [
            'ffmpeg', '-y',
            '-ss', str(start_time),
            '-i', video_path,
            '-t', str(duration_seg),
            '-c', 'copy',
            '-avoid_negative_ts', 'make_zero',
            '-async', '1',
            '-vsync', '0',
            '-copyts',
            output_file
        ]

        # Для первого сегмента не используем copyts
        if i == 0:
            cmd.remove('-copyts')

        result = subprocess.run(cmd, capture_output=True, text=True)

        if result.returncode == 0 and os.path.exists(output_file):
            # Получить фактическую длительность сегмента
            seg_duration, _ = get_video_info(output_file)

            segments_info.append({
                'num': i,
                'filename': os.path.basename(output_file),
                'start': start_time,
                'end': end_time,
                'expected_duration': duration_seg,
                'actual_duration': seg_duration
            })

            output_files.append(output_file)

            # Показать информацию о сегменте
            print(f"\n  Segment {i+1}: {start_time:.2f}s - {end_time:.2f}s")
            print(f"  Expected: {duration_seg:.3f}s, Actual: {seg_duration:.3f}s")
        else:
            print(f"\n  ❌ Error processing segment {i+1}")
            if result.stderr:
                print(f"  Error: {result.stderr[:200]}...")

    # Финальная проверка
    clear_output(wait=True)
    print(f"✅ Successfully created {len(output_files)} out of {num_segments} segments")

    # Сохранить информацию
    info_file = os.path.join(output_dir, 'segments_info.json')
    with open(info_file, 'w') as f:
        json.dump({
            'original_video': os.path.basename(video_path),
            'duration': duration,
            'fps': fps,
            'segment_duration': segment_duration,
            'segments': segments_info,
            'method': 'fast_copy'
        }, f, indent=2)

    return output_files, info_file

def split_video_precise_reencoded(video_path, segment_duration, output_dir):
    """Разделить видео с перекодированием для максимальной точности"""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    duration, fps = get_video_info(video_path)

    print(f"Video duration: {duration:.2f} seconds")
    print(f"FPS: {fps:.2f}")

    # Рассчитать количество сегментов
    num_segments = int(duration / segment_duration) + (1 if duration % segment_duration > 0 else 0)

    print(f"\nTotal segments to create: {num_segments}")

    output_files = []
    segments_info = []

    # Разделить видео по сегментам с перекодированием
    for i in range(num_segments):
        start_time = i * segment_duration
        end_time = min((i + 1) * segment_duration, duration)
        duration_seg = end_time - start_time

        output_file = os.path.join(output_dir, f"segment_{i:04d}.mp4")

        # Показать прогресс
        show_progress(i + 1, num_segments, f"Re-encoding and splitting video into {num_segments} segments")

        cmd = [
            'ffmpeg', '-y',
            '-ss', str(start_time),
            '-i', video_path,
            '-t', str(duration_seg),
            '-c:v', 'libx264',
            '-preset', 'fast',
            '-crf', '18',
            '-c:a', 'aac',
            '-b:a', '192k',
            output_file
        ]

        result = subprocess.run(cmd, capture_output=True, text=True)

        if result.returncode == 0 and os.path.exists(output_file):
            seg_duration, _ = get_video_info(output_file)

            segments_info.append({
                'num': i,
                'filename': os.path.basename(output_file),
                'start': start_time,
                'end': end_time,
                'expected_duration': duration_seg,
                'actual_duration': seg_duration
            })

            output_files.append(output_file)

            print(f"\n  Segment {i+1}: {start_time:.2f}s - {end_time:.2f}s")
            print(f"  Duration: {seg_duration:.3f}s")

    clear_output(wait=True)
    print(f"✅ Successfully created {len(output_files)} segments")

    info_file = os.path.join(output_dir, 'segments_info.json')
    with open(info_file, 'w') as f:
        json.dump({
            'original_video': os.path.basename(video_path),
            'duration': duration,
            'fps': fps,
            'segment_duration': segment_duration,
            'segments': segments_info,
            'method': 'reencoded'
        }, f, indent=2)

    return output_files, info_file

# Основной код
if file_name and os.path.exists(file_name):
    # Создать временную директорию
    video_basename = os.path.splitext(os.path.basename(file_name))[0]
    temp_dir = f"/content/temp_segments_{video_basename}"

    print(f"Processing: {video_basename}")
    print(f"Method: {split_method}")
    print("-" * 50)

    # Выбрать метод на основе параметра
    if split_method == "Fast (without re-encoding)":
        segment_files, info_file = split_video_fast_accurate(file_name, segment_duration, temp_dir)
    else:
        segment_files, info_file = split_video_precise_reencoded(file_name, segment_duration, temp_dir)

    if segment_files:
        # Создать архив
        archive_name = f"{video_basename}.zip"
        archive_path = os.path.join('/content/drive/MyDrive/', archive_name)

        print("\n" + "="*50)
        print("Creating archive...")

        total_files = len(segment_files) + 1  # +1 для info файла
        current = 0

        with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            # Добавить сегменты
            for segment_file in segment_files:
                current += 1
                show_progress(current, total_files, f"Adding files to archive: {archive_name}")
                arcname = os.path.basename(segment_file)
                zipf.write(segment_file, arcname)

            # Добавить info файл
            current += 1
            show_progress(current, total_files, f"Adding files to archive: {archive_name}")
            zipf.write(info_file, 'segments_info.json')

        # Очистить временные файлы
        shutil.rmtree(temp_dir)

        clear_output(wait=True)
        print(f"✅ Archive created successfully!")
        print(f"📁 Location: {archive_path}")
        print(f"📊 Total segments: {len(segment_files)}")
        print(f"⚙️  Method used: {split_method}")

        # Показать размер архива
        archive_size = os.path.getsize(archive_path) / (1024 * 1024)  # В МБ
        print(f"💾 Archive size: {archive_size:.2f} MB")
    else:
        print("❌ Failed to split video")
else:
    print("❌ Please select a video file first!")

In [None]:
#@title ##**Test: Reassemble Video from Segments** { display-mode: "form" }
import tempfile

def reassemble_video_precise(archive_path, output_path):
    """Собрать видео из сегментов"""
    with tempfile.TemporaryDirectory() as temp_dir:
        print("Extracting archive...")

        # Распаковать архив
        with zipfile.ZipFile(archive_path, 'r') as zipf:
            file_list = zipf.namelist()
            total_files = len(file_list)

            for i, file in enumerate(file_list):
                show_progress(i + 1, total_files, "Extracting files from archive")
                zipf.extract(file, temp_dir)

        # Прочитать информацию
        info_path = os.path.join(temp_dir, 'segments_info.json')
        with open(info_path, 'r') as f:
            info = json.load(f)

        clear_output(wait=True)
        print(f"Original video: {info['original_video']}")
        print(f"Total segments: {len(info['segments'])}")
        print(f"Method: {info.get('method', 'unknown')}")
        print("-" * 50)

        # Создать список для concat
        concat_file = os.path.join(temp_dir, 'concat_list.txt')
        with open(concat_file, 'w') as f:
            for i in range(len(info['segments'])):
                f.write(f"file 'segment_{i:04d}.mp4'\n")

        print("\nReassembling video...")

        # Собрать видео
        cmd = [
            'ffmpeg', '-y',
            '-f', 'concat',
            '-safe', '0',
            '-i', concat_file,
            '-c', 'copy',
            output_path
        ]

        # Запустить с отображением прогресса
        process = subprocess.Popen(cmd, cwd=temp_dir,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE,
                                 universal_newlines=True)

        # Читать вывод для прогресса
        while True:
            line = process.stderr.readline()
            if not line:
                break
            if 'time=' in line:
                # Попытаться извлечь время из строки ffmpeg
                try:
                    time_str = line.split('time=')[1].split()[0]
                    # Конвертировать время в секунды (формат HH:MM:SS.ms)
                    parts = time_str.split(':')
                    if len(parts) == 3:
                        hours, minutes, seconds = parts
                        current_time = int(hours) * 3600 + int(minutes) * 60 + float(seconds)
                        progress = int((current_time / info['duration']) * 100)
                        print(f"\rProgress: {progress}%", end='', flush=True)
                except:
                    pass

        process.wait()

        if process.returncode == 0:
            print("\n✅ Video reassembled successfully!")

            # Проверить длительность
            reassembled_duration, _ = get_video_info(output_path)
            original_duration = info['duration']

            print(f"\nVerification:")
            print(f"Original duration: {original_duration:.3f} seconds")
            print(f"Reassembled duration: {reassembled_duration:.3f} seconds")
            print(f"Difference: {abs(original_duration - reassembled_duration):.3f} seconds")

            if abs(original_duration - reassembled_duration) < 0.5:
                print("✅ Duration match - reassembly successful!")
            else:
                print("⚠️ Duration mismatch detected")

            # Показать детали по сегментам
            print("\nSegment details:")
            total_expected = 0
            total_actual = 0

            for seg in info['segments']:
                expected = seg.get('expected_duration', seg.get('duration', 0))
                actual = seg.get('actual_duration', seg.get('duration', 0))
                total_expected += expected
                total_actual += actual

                diff = abs(expected - actual)
                status = "✓" if diff < 0.1 else "⚠"
                print(f"  Segment {seg['num']+1}: {actual:.3f}s (expected: {expected:.3f}s) {status}")

            print(f"\nTotal expected: {total_expected:.3f}s")
            print(f"Total actual: {total_actual:.3f}s")
            print(f"Sum difference: {abs(total_expected - total_actual):.3f}s")

        else:
            print("\n❌ Error reassembling video")
            error_output = process.stderr.read()
            if error_output:
                print(f"Error details: {error_output[:500]}...")

# Тестирование
if file_name and os.path.exists(file_name):
    video_basename = os.path.splitext(os.path.basename(file_name))[0]
    archive_path = os.path.join('/content/drive/MyDrive/', f"{video_basename}.zip")

    if os.path.exists(archive_path):
        print(f"Testing reassembly of: {archive_path}")
        print("="*50)

        test_output = f"/content/reassembled_{video_basename}.mp4"
        reassemble_video_precise(archive_path, test_output)

        # Сравнить с оригиналом
        if os.path.exists(test_output):
            print("\n" + "="*50)
            print("Final comparison with original:")

            orig_duration, orig_fps = get_video_info(file_name)
            reasm_duration, reasm_fps = get_video_info(test_output)

            print(f"\nOriginal video:")
            print(f"  Duration: {orig_duration:.3f}s")
            print(f"  FPS: {orig_fps:.2f}")

            print(f"\nReassembled video:")
            print(f"  Duration: {reasm_duration:.3f}s")
            print(f"  FPS: {reasm_fps:.2f}")

            # Проверить размеры файлов
            orig_size = os.path.getsize(file_name) / (1024 * 1024)
            reasm_size = os.path.getsize(test_output) / (1024 * 1024)

            print(f"\nFile sizes:")
            print(f"  Original: {orig_size:.2f} MB")
            print(f"  Reassembled: {reasm_size:.2f} MB")
            print(f"  Size difference: {abs(orig_size - reasm_size):.2f} MB")

            # Итоговый вердикт
            duration_match = abs(orig_duration - reasm_duration) < 0.5
            fps_match = abs(orig_fps - reasm_fps) < 0.1

            print("\n" + "="*50)
            print("FINAL VERDICT:")
            if duration_match and fps_match:
                print("✅ Video successfully split and reassembled!")
                print("   The reassembled video matches the original.")
            else:
                print("⚠️ Some differences detected:")
                if not duration_match:
                    print(f"   - Duration difference: {abs(orig_duration - reasm_duration):.3f}s")
                if not fps_match:
                    print(f"   - FPS difference: {abs(orig_fps - reasm_fps):.2f}")
                print("\n   Consider using the 'Precise (with re-encoding)' method for better accuracy.")

            # Очистить тестовый файл
            os.remove(test_output)
            print("\n✅ Test file cleaned up")
    else:
        print(f"❌ Archive not found: {archive_path}")
        print("   Please run the split operation first.")
else:
    print("❌ Please select a video file first!")
