In [14]:
import cv2
import numpy as np
from moviepy.editor import VideoFileClip
from pydub import AudioSegment
from pydub.silence import detect_silence

# Initialize video capture
video_path = 'test_videos/567.mp4'
video = VideoFileClip(video_path)
audio = video.audio.to_soundarray(fps=44100)

# Constants
RATE = 44100  # Audio sample rate (Hz)
CHANNELS = 2  # Number of audio channels
AUDIO_CHUNK_MILLIS = 200  # 1 second
SILENCE_THRESHOLD = -10.0  # Silence threshold in dB

# Initialize audio stream and other variables
audio_stream = AudioSegment.silent(duration=0, frame_rate=RATE)

current_millis = 0
audio_pointer = 0  # Pointer for where we are in the audio array

# Video capture with OpenCV for synchronization
cap = cv2.VideoCapture(video_path)

# Calculate how many audio frames correspond to one video frame
audio_frames_per_video_frame = int(RATE / video.fps)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Extract corresponding audio for this video frame
    audio_chunk_data = audio[audio_pointer:audio_pointer + audio_frames_per_video_frame]
    audio_pointer += audio_frames_per_video_frame

    # Convert numpy array to pydub AudioSegment
    audio_chunk = AudioSegment(
        audio_chunk_data.tobytes(),
        frame_rate=RATE,
        sample_width=2,
        channels=CHANNELS
    )

    # Append to ongoing audio stream
    audio_stream += audio_chunk
    current_millis += int(1000 / video.fps)

    # Process the audio_stream if it's long enough
    if len(audio_stream) >= AUDIO_CHUNK_MILLIS:
        # Detect silence in the 1-second audio chunk
        silence_ranges = detect_silence(audio_stream, min_silence_len=100, silence_thresh=SILENCE_THRESHOLD)

        if silence_ranges:
            print(f'Pause detected at video timestamp: {current_millis // 1000} seconds')

        # Remove this chunk from the ongoing audio stream
        audio_stream = audio_stream[AUDIO_CHUNK_MILLIS:]

# Release the video capture object
cap.release()


Pause detected at video timestamp: 0 seconds
Pause detected at video timestamp: 36 seconds
Pause detected at video timestamp: 36 seconds
Pause detected at video timestamp: 36 seconds
Pause detected at video timestamp: 36 seconds
Pause detected at video timestamp: 36 seconds
Pause detected at video timestamp: 51 seconds
Pause detected at video timestamp: 51 seconds
Pause detected at video timestamp: 66 seconds
Pause detected at video timestamp: 66 seconds
Pause detected at video timestamp: 66 seconds
Pause detected at video timestamp: 80 seconds
Pause detected at video timestamp: 80 seconds
Pause detected at video timestamp: 81 seconds
Pause detected at video timestamp: 95 seconds
Pause detected at video timestamp: 126 seconds
Pause detected at video timestamp: 127 seconds
Pause detected at video timestamp: 127 seconds
Pause detected at video timestamp: 128 seconds
Pause detected at video timestamp: 128 seconds
Pause detected at video timestamp: 129 seconds
Pause detected at video times