In [None]:
import os
import cv2
import random

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# CHANGE THIS: folder where your videos live
GDRIVE_VIDEO_FOLDER = '/content/drive/MyDrive/darker_skin_BCH'
OUTPUT_FRAMES_DIR = '/content/drive/MyDrive/darker_skin_BCH_random'

# Make output dir if it doesn't exist
os.makedirs(OUTPUT_FRAMES_DIR, exist_ok=True)


In [None]:
def extract_n_random_frames_across_videos(
    video_folder: str,
    output_folder: str,
    n: int = 50
):
    os.makedirs(output_folder, exist_ok=True)

    # Collect all video files
    video_files = [f for f in os.listdir(video_folder)
                   if f.lower().endswith(('.mp4', '.mov', '.avi'))]

    print(f"Found {len(video_files)} videos in {video_folder}.")

    # Get (video_path, total_frames) tuples
    video_info = []
    for filename in video_files:
        path = os.path.join(video_folder, filename)
        cap = cv2.VideoCapture(path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        cap.release()
        if frame_count > 0:
            video_info.append((path, frame_count))

    total_frames_available = sum(f for _, f in video_info)
    if n > total_frames_available:
        print(f"‚ö†Ô∏è Requested {n} frames, but only {total_frames_available} available. Reducing to {total_frames_available}.")
        n = total_frames_available

    # Sample n frame indices globally across all videos
    global_frame_indices = sorted(random.sample(range(total_frames_available), n))

    print(f"Sampling {n} frames across all videos...")

    frame_counter = 0
    global_index_pointer = 0
    current_global_index = global_frame_indices[global_index_pointer]

    frame_offset = 0
    for path, frame_count in video_info:
        filename = os.path.basename(path)
        cap = cv2.VideoCapture(path)

        while current_global_index < frame_offset + frame_count:
            local_frame_idx = current_global_index - frame_offset
            cap.set(cv2.CAP_PROP_POS_FRAMES, local_frame_idx)
            success, frame = cap.read()

            if success:
                frame_name = f'{os.path.splitext(filename)[0]}_global{current_global_index}_local{local_frame_idx}.jpg'
                frame_path = os.path.join(output_folder, frame_name)
                cv2.imwrite(frame_path, frame)
                print(f"‚úÖ Saved: {frame_name}")
            else:
                print(f"‚ùå Failed to read frame {local_frame_idx} from {filename}")

            frame_counter += 1
            global_index_pointer += 1
            if global_index_pointer >= len(global_frame_indices):
                break
            current_global_index = global_frame_indices[global_index_pointer]

        frame_offset += frame_count
        cap.release()
        if global_index_pointer >= len(global_frame_indices):
            break

    print(f"üéâ Done! Saved {frame_counter} frames to {output_folder}.")


In [None]:
extract_n_random_frames_across_videos(GDRIVE_VIDEO_FOLDER, OUTPUT_FRAMES_DIR, 25)

Found 14 videos in /content/drive/MyDrive/darker_skin_BCH.
Sampling 25 frames across all videos...
‚úÖ Saved: 2025-04-17_12-08-25_global1655_local1655.jpg
‚úÖ Saved: 2025-04-17_12-08-25_global2177_local2177.jpg
‚úÖ Saved: 2025-04-17_12-11-39_global5643_local1970.jpg
‚úÖ Saved: 2025-04-17_12-11-39_global5664_local1991.jpg
‚úÖ Saved: 2025-04-17_12-19-49_global9424_local1117.jpg
‚úÖ Saved: 2025-04-17_12-19-49_global10581_local2274.jpg
‚úÖ Saved: 2025-04-17_12-29-50_global20649_local1853.jpg
‚úÖ Saved: 2025-04-17_12-29-50_global21754_local2958.jpg
‚úÖ Saved: 2025-04-17_17-20-39_global25023_local1133.jpg
‚úÖ Saved: 2025-04-17_17-27-44_global31561_local1031.jpg
‚úÖ Saved: 2025-04-17_17-27-44_global31812_local1282.jpg
‚úÖ Saved: 2025-04-17_17-27-44_global32213_local1683.jpg
‚úÖ Saved: 2025-04-17_17-27-44_global34695_local4165.jpg
‚úÖ Saved: 2025-04-17_17-37-18_global35792_local244.jpg
‚úÖ Saved: 2025-04-17_17-33-25_global39976_local340.jpg
‚úÖ Saved: 2025-04-17_17-33-25_global40227_local591.j