In [None]:
import os
os.chdir('cyclegan')

!pip install -r requirements_dev.txt

In [None]:
import hashlib
import cv2
import logging
import shutil
import random
import math

from tqdm.notebook import tqdm_notebook

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

RANDOM_FILENAME_LENGTH = 12

# Folder names
ROOT_DATASET_FOLDER = "datasets"
DATASET_NAME = "threeD2twoD"
A_3D_VIDEO_FILE = "A_3D_256.mp4"
B_2D_VIDEO_FILE = "B_2D_256.mp4"

A_3D_OUTPUT_FOLDER = "A_3D_256_frames"
B_2D_OUTPUT_FOLDER = "B_2D_256_frames"

# Paths
A_3D_VIDEO_PATH = os.path.join(ROOT_DATASET_FOLDER, DATASET_NAME, A_3D_VIDEO_FILE)
A_3D_RAW_DATASET_PATH = os.path.join(
    ROOT_DATASET_FOLDER, DATASET_NAME, A_3D_OUTPUT_FOLDER
)

B_2D_VIDEO_PATH = os.path.join(ROOT_DATASET_FOLDER, DATASET_NAME, B_2D_VIDEO_FILE)
B_2D_RAW_DATASET_PATH = os.path.join(
    ROOT_DATASET_FOLDER, DATASET_NAME, B_2D_OUTPUT_FOLDER
)

OUTPUT_IMAGE_EXT = ".png"

SHOULD_RANDOMISE_FILENAMES = True


In [None]:
def extract_frames(
    input_file_path: str,
    output_folder_path: str,
    should_randomise: bool = SHOULD_RANDOMISE_FILENAMES,
):
    if not os.path.exists(output_folder_path):
        os.mkdir(output_folder_path)

    vidcap = cv2.VideoCapture(input_file_path)
    frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    logger.info(f"Extracting {frame_count} frames from `{input_file_path}`")
    _, image = vidcap.read()
    count = 0

    for count in tqdm_notebook(range(frame_count)):
        raw_frame_file_name = (
            f"{hashlib.sha1(os.urandom(32)).hexdigest()[:RANDOM_FILENAME_LENGTH]}{OUTPUT_IMAGE_EXT}"
            if should_randomise
            else f"{count:03d}{OUTPUT_IMAGE_EXT}"
        )
        frame_file_path = os.path.join(output_folder_path, raw_frame_file_name)
        cv2.imwrite(frame_file_path, image)
        _, image = vidcap.read()


In [None]:
extract_frames(input_file_path=A_3D_VIDEO_PATH, output_folder_path=A_3D_RAW_DATASET_PATH)


In [None]:
remove_dup_cmd = f"image-cleaner {A_3D_RAW_DATASET_PATH}"
!{remove_dup_cmd}

In [None]:
extract_frames(input_file_path=B_2D_VIDEO_PATH, output_folder_path=B_2D_RAW_DATASET_PATH)

In [None]:
remove_dup_cmd = f"image-cleaner {B_2D_RAW_DATASET_PATH}"
!{remove_dup_cmd}

In [None]:
# Train/Test Split Config

TRAIN_SPLIT = 0.9
TEST_SPLIT = 1 - TRAIN_SPLIT

TRAIN_A_PATH = os.path.join(ROOT_DATASET_FOLDER, DATASET_NAME, "trainA")
TRAIN_B_PATH = os.path.join(ROOT_DATASET_FOLDER, DATASET_NAME, "trainB")
TEST_A_PATH = os.path.join(ROOT_DATASET_FOLDER, DATASET_NAME, "testA")
TEST_B_PATH = os.path.join(ROOT_DATASET_FOLDER, DATASET_NAME, "testB")

In [None]:
def split_train_test(raw_dataset_path: str, train_path: str, test_path: str):
    files = os.listdir(raw_dataset_path)
    total_file_count = len(files)
    logger.info(f"Found total de-duplicated {total_file_count} images.")

    if not os.path.exists(train_path):
        os.mkdir(train_path)
    
    if not os.path.exists(test_path):
        os.mkdir(test_path)

    # Train
    no_of_files = math.floor(total_file_count * TRAIN_SPLIT)
    logger.info(f"Moving {no_of_files} files to training set...")
    for file_name in tqdm_notebook(random.sample(files, no_of_files)):
        shutil.move(os.path.join(raw_dataset_path, file_name), train_path)

    # Test
    remaining_files = os.listdir(raw_dataset_path)
    logger.info(f"Moving {len(remaining_files)} files to test set...")
    for file_name in tqdm_notebook(remaining_files):
        shutil.move(os.path.join(raw_dataset_path, file_name), test_path)


In [None]:
split_train_test(raw_dataset_path=A_3D_RAW_DATASET_PATH, train_path=TRAIN_A_PATH, test_path=TEST_A_PATH)

In [None]:
split_train_test(raw_dataset_path=B_2D_RAW_DATASET_PATH, train_path=TRAIN_B_PATH, test_path=TEST_B_PATH)