In [None]:
import os
import random
import cv2 as cv
from tqdm import tqdm

In [None]:
# base paths
ROOT_DIR = os.path.join(os.getcwd(), os.pardir)
DATA_DIR = os.path.join(ROOT_DIR, 'data')
TRAIN_VIDEOS_DIR = os.path.join(DATA_DIR, 'train_videos')
OUTPUT_SCREENSHOTS_DIR = os.path.join(DATA_DIR, 'screenshots')

os.makedirs(OUTPUT_SCREENSHOTS_DIR, exist_ok=True)

In [None]:
video_paths = []

# get video paths
for filename in os.listdir(TRAIN_VIDEOS_DIR):
    # ignore txt
    if filename.endswith('.txt'):
        continue

    video_path = os.path.join(TRAIN_VIDEOS_DIR, filename)
    video_paths.append(video_path)

In [None]:
# parameters
n_screenshots = 1000
n_random_screenshots_from_video = int(n_screenshots / len(video_paths))
random.seed(42)

# get screenshots
with tqdm(total=n_screenshots, desc="Capturing screenshots") as pbar:
    screenshots_counter = 0 # counts successfully got screenshots
    for input_video_path in video_paths:
        # capture video
        cap = cv.VideoCapture(input_video_path)
        total_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))

        # get n random screenshots
        random_indices = random.sample(range(total_frames), n_random_screenshots_from_video)
        for idx in random_indices:
            # set capture to random position and read it
            cap.set(cv.CAP_PROP_POS_FRAMES, idx)
            status_ok, frame = cap.read()

            # if status ok then write screenshot to output dir
            if status_ok:
                filename = os.path.join(OUTPUT_SCREENSHOTS_DIR, f'{screenshots_counter}.jpg')
                cv.imwrite(filename, frame)
                screenshots_counter += 1
                pbar.update(1)

        cap.release()