In [3]:
from glob import glob
import os
import numpy as np
import random

data_dir = "/home/dilith/Projects/SerpSeg/data-serp"
target_dir = "/home/dilith/Projects/DCAMA/datasets/Serp"
image_paths = sorted(glob(os.path.join(data_dir, "images", "*.npy")))
mask_paths = sorted(glob(os.path.join(data_dir, "masks", "*.npy")))

In [4]:
def eval_expression(exp: list, image: np.ndarray = None):
    expression = ""

    for token in exp:
        if token[0] == "c":
            channel = eval(token[1:])
            expression += f"(image[{channel}] + 0.0001)"  # To prevent divide by zero
        elif token == "sq":
            expression += "**2"
        elif token == "sqrt":
            expression += "**0.5"
        elif token == "=":
            break
        else:
            expression += token

    return eval(expression)

def get_updated_image(exp: list, img: np.ndarray):
    idx = eval_expression(exp, img)
    max_z = 3
    idx = (idx - idx.mean()) / idx.std()
    idx = (np.clip(idx, -max_z, max_z) + max_z) / (2 * max_z)
    return np.concatenate([img[:10, :, :], idx[None, :, :]], axis=0)

In [3]:
expression = []
fold_sizes = [6, 4, 4, 6, 4]
size = sum(fold_sizes)
for cur_fold in range(len(fold_sizes)):
    fold_dir = os.path.join(target_dir, str(cur_fold))
    fold_train_dir = os.path.join(fold_dir, "train")
    fold_val_dir = os.path.join(fold_dir, "val")
    fold_train_ann_dir = os.path.join(fold_dir, "annotations", "train")
    fold_val_ann_dir = os.path.join(fold_dir, "annotations", "val")
    os.makedirs(fold_train_dir, exist_ok=True)
    os.makedirs(fold_val_dir, exist_ok=True)
    os.makedirs(fold_train_ann_dir, exist_ok=True)
    os.makedirs(fold_val_ann_dir, exist_ok=True)
    
    l_idx = sum(fold_sizes[:cur_fold])
    r_idx = l_idx + fold_sizes[cur_fold]

    train_paths = image_paths[:l_idx] + (image_paths[r_idx:] if r_idx < size else [])
    for path in train_paths:
        img = np.load(path)
        if expression:
            img = get_updated_image(expression, img)
        np.save(os.path.join(fold_train_dir, os.path.basename(path)), img)
    val_paths = image_paths[l_idx:r_idx]
    for path in val_paths:
        img = np.load(path)
        if expression:
            img = get_updated_image(expression, img)
        np.save(os.path.join(fold_val_dir, os.path.basename(path)), img)

    train_paths = mask_paths[:l_idx] + (mask_paths[r_idx:] if r_idx < size else [])
    for path in train_paths:
        mask = np.load(path)
        np.save(os.path.join(fold_train_ann_dir, os.path.basename(path)), mask)
    val_paths = mask_paths[l_idx:r_idx]
    for path in val_paths:
        mask = np.load(path)
        np.save(os.path.join(fold_val_ann_dir, os.path.basename(path)), mask)

Shuffled

In [7]:
expression = []
size = 24
n_folds = 5
fold_size = size // n_folds
indices = list(range(size))
random.shuffle(indices)

for cur_fold in range(n_folds):
    fold_dir = os.path.join(target_dir, str(cur_fold))
    fold_train_dir = os.path.join(fold_dir, "train")
    fold_val_dir = os.path.join(fold_dir, "val")
    fold_train_ann_dir = os.path.join(fold_dir, "annotations", "train")
    fold_val_ann_dir = os.path.join(fold_dir, "annotations", "val")
    os.makedirs(fold_train_dir, exist_ok=True)
    os.makedirs(fold_val_dir, exist_ok=True)
    os.makedirs(fold_train_ann_dir, exist_ok=True)
    os.makedirs(fold_val_ann_dir, exist_ok=True)
    
    l_idx = fold_size * cur_fold
    r_idx = l_idx + fold_size

    train_indices = indices[:l_idx] + (indices[r_idx:] if r_idx < size else [])
    for i in train_indices:
        path = image_paths[i]
        img = np.load(path)
        if expression:
            img = get_updated_image(expression, img)
        np.save(os.path.join(fold_train_dir, os.path.basename(path)), img)
    val_indices = indices[l_idx:r_idx]
    for i in val_indices:
        path = image_paths[i]
        img = np.load(path)
        if expression:
            img = get_updated_image(expression, img)
        np.save(os.path.join(fold_val_dir, os.path.basename(path)), img)

    for i in train_indices:
        path = mask_paths[i]
        mask = np.load(path)
        np.save(os.path.join(fold_train_ann_dir, os.path.basename(path)), mask)
    
    for i in val_indices:
        path = mask_paths[i]
        mask = np.load(path)
        np.save(os.path.join(fold_val_ann_dir, os.path.basename(path)), mask)