In [1]:
import cv2, os, pandas as pd, sys

# --- Auto-detect environment for proper tqdm ---
try:
    if 'ipykernel' in sys.modules:
        from tqdm.notebook import tqdm  # Kaggle / Colab / Jupyter
    else:
        from tqdm import tqdm           # Terminal / script
except ImportError:
    from tqdm import tqdm

# --- Define input and output paths ---
BASE_INPUT = "/kaggle/input/iitm-road-safety-clips"
BASE_OUTPUT = "/kaggle/working/frames"

# Create base output folders
os.makedirs(f"{BASE_OUTPUT}/base", exist_ok=True)
os.makedirs(f"{BASE_OUTPUT}/present", exist_ok=True)

def get_video_duration(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"‚ùå Error opening video file: {video_path}")
        return 0
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    duration = total_frames / fps
    cap.release()
    return duration, fps, int(total_frames)

def extract_frames(video_path, output_dir, prefix, fps_extract=2, resize_dim=(1280, 720), max_seconds=None):
    """
    Extracts frames up to `max_seconds` if provided.
    Ensures both base and present videos output equal number of frames.
    """
    os.makedirs(output_dir, exist_ok=True)
    log_path = os.path.join(output_dir, f"{prefix}_frame_log.csv")

    # Setup video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("‚ùå Error opening video file:", video_path)
        return

    video_fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / video_fps

    # Restrict duration to max_seconds
    if max_seconds is not None:
        total_frames = int(min(duration, max_seconds) * video_fps)
        print(f"‚è±Ô∏è Limiting {prefix} video to {max_seconds:.2f}s ({total_frames} frames)")

    frame_interval = max(1, int(video_fps / fps_extract))
    total_expected = int(total_frames // frame_interval)

    # ---- Setup progress bar ----
    progress = tqdm(
        total=total_expected,
        desc=f"Extracting {os.path.basename(video_path)} ({prefix})",
        unit="frame",
        dynamic_ncols=True,
        leave=False
    )

    # Create / reset log
    with open(log_path, "w") as f:
        f.write("frame_no,timestamp_sec\n")

    saved = 0
    frame_no = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret or frame_no >= total_frames:
            break

        if frame_no % frame_interval == 0:
            if resize_dim:
                frame = cv2.resize(frame, resize_dim)
            filename = f"{prefix}_frame_{saved:04d}.jpg"
            save_path = os.path.join(output_dir, filename)
            cv2.imwrite(save_path, frame)

            timestamp = frame_no / video_fps
            with open(log_path, "a") as f:
                f.write(f"{saved},{timestamp:.2f}\n")

            saved += 1
            progress.update(1)

        frame_no += 1

    cap.release()
    progress.close()
    print(f"üéØ Extraction complete for {prefix} ‚Üí Total frames saved: {saved}/{total_expected}\n")

# === Determine shortest duration between both videos ===
base_duration, base_fps, base_frames = get_video_duration(f"{BASE_INPUT}/Trial_1_Video.MP4")
present_duration, present_fps, present_frames = get_video_duration(f"{BASE_INPUT}/Trial_2_Video.mp4")
common_duration = min(base_duration, present_duration)

print(f"‚è≥ Base video: {base_duration:.2f}s ({base_frames} frames)")
print(f"‚è≥ Present video: {present_duration:.2f}s ({present_frames} frames)")
print(f"üîÅ Common duration to extract: {common_duration:.2f}s")

# === RUN EXTRACTION ===
extract_frames(f"{BASE_INPUT}/Trial_1_Video.MP4", f"{BASE_OUTPUT}/base", prefix="base", fps_extract=2, max_seconds=common_duration)
extract_frames(f"{BASE_INPUT}/Trial_2_Video.mp4", f"{BASE_OUTPUT}/present", prefix="present", fps_extract=2, max_seconds=common_duration)


‚è≥ Base video: 73.66s (1766 frames)
‚è≥ Present video: 42.11s (1262 frames)
üîÅ Common duration to extract: 42.11s
‚è±Ô∏è Limiting base video to 42.11s (1009 frames)


Extracting Trial_1_Video.MP4 (base):   0%|          | 0/91 [00:00<?, ?frame/s]

üéØ Extraction complete for base ‚Üí Total frames saved: 92/91

‚è±Ô∏è Limiting present video to 42.11s (1262 frames)


Extracting Trial_2_Video.mp4 (present):   0%|          | 0/90 [00:00<?, ?frame/s]

üéØ Extraction complete for present ‚Üí Total frames saved: 91/90



In [2]:
import os

BASE_OUTPUT = "/kaggle/working/frames"

base_frames = [f for f in os.listdir(f"{BASE_OUTPUT}/base") if f.endswith(".jpg")]
present_frames = [f for f in os.listdir(f"{BASE_OUTPUT}/present") if f.endswith(".jpg")]

print(f"Base video frames   : {len(base_frames)}")
print(f"Present video frames: {len(present_frames)}")


Base video frames   : 92
Present video frames: 91
