In [1]:
import cv2, os, pandas as pd, sys

# --- Auto-detect environment for proper tqdm ---
try:
    if 'ipykernel' in sys.modules:
        from tqdm.notebook import tqdm  # Kaggle / Colab / Jupyter
    else:
        from tqdm import tqdm           # Terminal / script
except ImportError:
    from tqdm import tqdm

# --- Define input and output paths ---
BASE_INPUT = "/kaggle/input/iitm-road-safety-clips"
BASE_OUTPUT = "/kaggle/working/frames"

# Create base output folders
os.makedirs(f"{BASE_OUTPUT}/base", exist_ok=True)
os.makedirs(f"{BASE_OUTPUT}/present", exist_ok=True)


def extract_frames(video_path, output_dir, prefix, fps_extract=2, resize_dim=(1280, 720)):
    """
    Frame extractor with progress bar, prefix naming, and resume functionality.
    Saves logs but does not duplicate progress bars in Kaggle/Colab.
    """
    os.makedirs(output_dir, exist_ok=True)
    log_path = os.path.join(output_dir, f"{prefix}_frame_log.csv")

    # ---- Load checkpoint if exists ----
    if os.path.exists(log_path):
        df_log = pd.read_csv(log_path)
        saved_frames = set(df_log['frame_no'].tolist())
        start_index = max(saved_frames) + 1 if len(saved_frames) > 0 else 0
        print(f"üü° Resuming from frame {start_index} ... ({len(saved_frames)} already done)")
    else:
        with open(log_path, "w") as f:
            f.write("frame_no,timestamp_sec\n")
        saved_frames = set()
        start_index = 0
        print(f"üü¢ Starting fresh extraction for {prefix} ...")

    # ---- Setup video ----
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("‚ùå Error opening video file:", video_path)
        return

    video_fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(1, int(video_fps / fps_extract))
    total_expected = int(total_frames // frame_interval)

    # ---- Setup progress bar ----
    progress = tqdm(
        total=total_expected,
        desc=f"Extracting {os.path.basename(video_path)} ({prefix})",
        unit="frame",
        initial=len(saved_frames),
        dynamic_ncols=True,
        leave=False  # prevents duplicate progress bars in notebooks
    )

    frame_no, saved = 0, len(saved_frames)

    # ---- Main frame extraction loop ----
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        current_frame_id = frame_no // frame_interval

        # Skip already processed frames
        if current_frame_id in saved_frames:
            frame_no += 1
            continue

        if frame_no % frame_interval == 0:
            if resize_dim:
                frame = cv2.resize(frame, resize_dim)

            # Save with prefix (base_ / present_)
            filename = f"{prefix}_frame_{current_frame_id:04d}.jpg"
            save_path = os.path.join(output_dir, filename)
            cv2.imwrite(save_path, frame)

            timestamp = frame_no / video_fps
            with open(log_path, "a") as f:
                f.write(f"{current_frame_id},{timestamp:.2f}\n")

            saved += 1
            progress.update(1)

        frame_no += 1

    cap.release()
    progress.close()
    print(f"üéØ Extraction complete for {prefix} ‚Üí Total frames saved: {saved}/{total_expected}\n")


# === RUN EXTRACTION ===
extract_frames(f"{BASE_INPUT}/Trial_1_Video.MP4", f"{BASE_OUTPUT}/base", prefix="base", fps_extract=2)
extract_frames(f"{BASE_INPUT}/Trial_2_Video.mp4", f"{BASE_OUTPUT}/present", prefix="present", fps_extract=2)


üü¢ Starting fresh extraction for base ...


Extracting Trial_1_Video.MP4 (base):   0%|          | 0/160 [00:00<?, ?frame/s]

üéØ Extraction complete for base ‚Üí Total frames saved: 161/160

üü¢ Starting fresh extraction for present ...


Extracting Trial_2_Video.mp4 (present):   0%|          | 0/90 [00:00<?, ?frame/s]

üéØ Extraction complete for present ‚Üí Total frames saved: 91/90



In [2]:
import os

BASE_OUTPUT = "/kaggle/working/frames"

base_frames = [f for f in os.listdir(f"{BASE_OUTPUT}/base") if f.endswith(".jpg")]
present_frames = [f for f in os.listdir(f"{BASE_OUTPUT}/present") if f.endswith(".jpg")]

print(f"Base video frames   : {len(base_frames)}")
print(f"Present video frames: {len(present_frames)}")


Base video frames   : 161
Present video frames: 91
