## Marker Model Training Dataset Generator

In [1]:
import os
import os.path as osp
import glob
import celldom
import pandas as pd

In [7]:
# Contains pre-annotated images
dir1 = osp.join(celldom.get_dataset_dir(), 'dataset03', 'MarkerTraining')
dir1

'/lab/data/celldom/dataset/dataset03/MarkerTraining'

In [2]:
# Contains raw images to copy and then annotate
dir2 = osp.join(celldom.get_dataset_dir(), 'dataset05')
dir2

'/lab/data/celldom/dataset/dataset05'

In [13]:
# Randomly sample 10 images from second dataset
files = !find $dir2 | grep '.tif'
files = pd.Series(files).sample(n=250, random_state=1).tolist()
files[:3]

['/lab/data/celldom/dataset/dataset05/_2018.06.17 EXP SUM Control 0.3uM 0.5uM with 5mL gravity/_2018.06.17 Yellow 1 0.3uM/_2018.06.17 Yellow 1 0.3uM 120 hr/BFF_16X_St_009_Apt_030_201806221839.tif',
 '/lab/data/celldom/dataset/dataset05/_2018.06.17 EXP SUM Control 0.3uM 0.5uM with 5mL gravity/_2018.06.17 Blue 3 Control/_2018.06.17 Blue 3 Control 48 hr/BFF_16X_St_015_Apt_018_201806192240.tif',
 '/lab/data/celldom/dataset/dataset05/_2018.06.17 EXP SUM Control 0.3uM 0.5uM with 5mL gravity/_2018.06.17 Yellow 3 0.3uM/_2018.06.17 Yellow 3 0.3uM 48 hr/BFF_16X_St_000_Apt_018_201806192258.tif']

In [11]:
# Set destination
dest = osp.join(celldom.get_training_dataset_dir(), 'marker', 'r0.6')
dest

'/lab/data/celldom/dataset/training/marker/r0.6'

### Copy Files

In [23]:
# Move all data in pre-annotated dir
!cp -r $dir1/* $dest/

In [5]:
from skimage import io
io.imread(files[0]).dtype

dtype('uint8')

In [14]:
# Move selected images from second dir
# * reflect the results to make them compatible with dir1 (which has flipped annotations)
from skimage import io
import numpy as np
for f in files:
    img = io.imread(f)
    assert img.dtype == np.uint8
    assert img.ndim == 2
    img = img[:,::-1]
    path = osp.join(dest, osp.basename(f))
    #print(path, img.dtype, img.shape)
    io.imsave(path, img)

Now all files in `dest` are ready for review and annotation via RectLabel