In [None]:
import glob
import os.path

import numpy

import deepometry.parse

# Parse TIFs

Use `deepometry.parse` module to transform .TIF files to NumPy arrays. In this example, .TIF files are stored at `/data/raw/` in subdirectories corresponding to the class labels. Filenames should contain a prefix "Ch" for specifying channels.

    /data/raw/
        positive/
            foo_Ch3.tif
            foo_Ch4.tif
            foo_Ch6.tif
            bar_Ch3.tif
            bar_Ch4.tif
            bar_Ch6.tif
            ...
        negative/
            foo_Ch3.tif
            foo_Ch4.tif
            foo_Ch6.tif
            bar_Ch3.tif
            bar_Ch4.tif
            bar_Ch6.tif
        ...

We parse the images of selected channels of each object into a numpy array, e.g. one cell - one numpy array that contains multiple channels. The arrays are stored at `/data/parsed` in subdirectories corresponding to the class labels. Array filenames have the patient prefixes, followed by a hex series.

    /data/parsed/
        positive/
            foo__32e88e1ac3a8f44bf8f77371155553b9.npy
            bar__3dc56a0c446942aa0da170acfa922091.npy
            ...
        negative/
            foo__8348deaa70dfc95c46bd02984d28b873.npy
            bar__c1ecbca7bd98c01c1d3293b64cd6739a.npy
            ...
        ...

In [None]:
src = "/data/raw/"
dest = "/data/parsed/"
labels = ["positive", "negative"]

In [None]:
channels = [3, 6]

In [None]:
image_size = 48

In [None]:
for label in labels:
    src_dir = os.path.join(src, label)
    print("Parsing directory: {}".format(src_dir))

    dest_dir = os.path.join(dest, label)
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    
    pathnames = glob.glob(os.path.join(src_dir, "*.tif"))
    
    deepometry.parse.parse(pathnames, dest_dir, image_size, channels)

print('Done')

If you need to delete the folder of many files, do this in terminal

- mkdir empty_dir
    
- rsync -a --delete empty_dir/    yourdirectory/