In [None]:
# MODIFY THESE
SRC = None #path to folder of TIFF videos
DST = None #path to save folder of datasets
N_CHANNELS = 2 #length of each segment/observation
N_SEGMENTS = 10000 #number of segments per dataset
RULER = 'preprocessing/ruler.tif' #path to vanishing-field reference segment

In [None]:
import os
import shutil
import numpy as np
from skimage import io
from pathlib import Path

In [None]:
def segment_sim(in_path, out_path, channels, N, ruler_path=None):
    '''
    Saves substacks to folderPath
  
    in_path: str, input path to simulation
    out_path: str, output path to dataset
    channels: int, number of channels/consecutive frames per datum
    N: int, number of data per output dataset
    ruler_path: str, path to ruler/reference datum (optional, default None)
    '''
    im = io.imread(in_path)
    os.mkdir(out_path)
    form = '{:0' + str(len(str(N))) + 'd}'
    
    if ruler_path != None:
        shutil.copyfile(ruler_path, os.path.join(out_path, form.format(N)+'.tif'))

    for i in range(N):
        substack = os.path.join(out_path, form.format(i)+'.tif')
        io.imsave(substack, im[i:i+channels])

In [None]:
def mk_datasets(sims_path, dsets_path, channels, N, r_path=None):
    '''
    Makes dataset for each simulation file in sims_dir
    
    sims_path: str, input path to folder of simulations
    dsets_path: str, output path to folder of datasets
    channels: int, number of channels/consecutive frames per datum
    N: int, number of data per output dataset
    ruler_path: str, path to ruler/reference datum (optional, default None)
    '''
    simulations = np.sort([i for i in os.listdir(sims_path) if '.tif' in i])
    Path(dsets_path).mkdir(parents=True)
    for sim in simulations:
        in_path = os.path.join(sims_path, sim)
        out_path = os.path.join(dsets_path, sim.replace('.tif',''))
        segment_sim(in_path, out_path, channels, N, ruler_path=r_path)

In [None]:
mk_datasets(SRC, DST, N_CHANNELS, N_SEGMENTS, RULER)