## Marker Model Training Dataset Generator

In [13]:
import os
import os.path as osp
import glob
import celldom
import pandas as pd
from shutil import copyfile

In [2]:
# Contains pre-annotated images
dir1 = osp.join(celldom.get_dataset_dir(), 'dataset03', 'MarkerTraining')
dir1

'/lab/data/celldom/dataset/dataset03/MarkerTraining'

In [3]:
# Contains raw images to copy and then annotate
dir2 = osp.join(celldom.get_dataset_dir(), 'dataset05')
dir2

'/lab/data/celldom/dataset/dataset05'

In [26]:
# Randomly sample 250 images from second dataset
d2_files = !find $dir2 | grep '.tif'
d2_files = pd.Series(d2_files).sample(n=250, random_state=1).tolist()
d2_files[:3]

['/lab/data/celldom/dataset/dataset05/_2018.06.17 EXP SUM Control 0.3uM 0.5uM with 5mL gravity/_2018.06.17 Yellow 1 0.3uM/_2018.06.17 Yellow 1 0.3uM 120 hr/BFF_16X_St_009_Apt_030_201806221839.tif',
 '/lab/data/celldom/dataset/dataset05/_2018.06.17 EXP SUM Control 0.3uM 0.5uM with 5mL gravity/_2018.06.17 Blue 3 Control/_2018.06.17 Blue 3 Control 48 hr/BFF_16X_St_015_Apt_018_201806192240.tif',
 '/lab/data/celldom/dataset/dataset05/_2018.06.17 EXP SUM Control 0.3uM 0.5uM with 5mL gravity/_2018.06.17 Yellow 3 0.3uM/_2018.06.17 Yellow 3 0.3uM 48 hr/BFF_16X_St_000_Apt_018_201806192258.tif']

In [17]:
# Contains raw (G1) images to copy and then annotate
dir3 = osp.join(celldom.get_dataset_dir(), 'dataset06')
dir3

'/lab/data/celldom/dataset/dataset06'

In [18]:
# Randomly sample 250 images from third dataset
d3_files = !find $dir3 | grep '.tif'
d3_files = pd.Series(d3_files).sample(n=250, random_state=1).tolist()
d3_files[:3]

['/lab/data/celldom/dataset/dataset06/2018.02.17 G1 35 K562 Test 2 41 hr 201802181700/BF_16X_St_012_Apt_028_F_000.tif',
 '/lab/data/celldom/dataset/dataset06/2018.02.17 G1 35 K562 Test 2 88 hr 201802201600/BF_16X_St_030_Apt_028_F_000.tif',
 '/lab/data/celldom/dataset/dataset06/2018.02.17 G1 35 K562 Test 2 41 hr 201802181700/BF_16X_St_018_Apt_004_F_000.tif']

In [19]:
# Set destination
dest = osp.join(celldom.get_training_dataset_dir(), 'marker', 'r0.6')
dest

'/lab/data/celldom/dataset/training/marker/r0.6'

### Copy Files

##### G2 Images

In [14]:
# Move all data in pre-annotated dir but prefix filenames by chip type (G2 in this case)
ct = 0
for f in os.listdir(dir1):
    if not f.endswith('.tif'):
        continue
    ct += 1
    psrc = osp.join(dir1, f)
    pdest = osp.join(dest, 'G02_' + f)
    #print(psrc, pdest)
    #copyfile(psrc, pdest)
print('Finished copy for {} files'.format(ct))

Finished copy for 70 files


##### G3 Images

In [30]:
len(d2_files)

250

In [33]:
d2_files[0]

'/lab/data/celldom/dataset/dataset05/_2018.06.17 EXP SUM Control 0.3uM 0.5uM with 5mL gravity/_2018.06.17 Yellow 1 0.3uM/_2018.06.17 Yellow 1 0.3uM 120 hr/BFF_16X_St_009_Apt_030_201806221839.tif'

In [16]:
# Move selected images from second dir
# * Reflect the results to make them compatible with G2 images (which has flipped annotations as well)
# and add chip type (G3) to filenames
from skimage import io
import numpy as np
for f in d2_files:
    img = io.imread(f)
    assert img.dtype == np.uint8
    assert img.ndim == 2
    img = img[:,::-1]
    path = osp.join(dest, 'G03_' + osp.basename(f))
    print(path, img.dtype, img.shape)
    #io.imsave(path, img)

##### G1 Images

In [29]:
# Note that the names are not unique enough unless you include the date in the path
d3_files[0]

'/lab/data/celldom/dataset/dataset06/2018.02.17 G1 35 K562 Test 2 41 hr 201802181700/BF_16X_St_012_Apt_028_F_000.tif'

In [32]:
# Move selected images from third dir
# * Also reflect these images to conform to G2 images
from skimage import io
import numpy as np
for f in d3_files:
    img = io.imread(f)
    assert img.dtype == np.uint8
    assert img.ndim == 2
    img = img[:,::-1]
    dt = f.split('/')[-2].split()[-1]
    # Add date + time to filename
    path = osp.join(dest, 'G01_' + osp.basename(f).replace('.tif', '') + '_' + dt + '.tif')
    #print(path, img.dtype, img.shape)
    io.imsave(path, img)

Now all files in `dest` are ready for review and annotation via RectLabel