# Generate XML annotation files and JPEGImages from TIFF files.

* This notebook assumes that datasets have been stored locally on this machine and bounding regions annotated within Fiji/ImageJ.
* If this is not the case, then you either want to maybe use the OMERO version of this script:
dset01_create_anno_from_OMERO.ipynb  (This other notebook allows you to import ROI from tiff files directly).  
* Creating a dataset forms the foundation of training material used to train one of many object detection algorithms.  
* N.B. Once Finished here it is also important to add the information about this dataset to the config/dataset_spec.txt file.  
* N.B. The next step after this is to run is to run the notebook dset02_create_anno_formats. This next notebook is used  create the  
configuration files associated with the dataset and also allows you to group datasets together for larger training.

In [1]:
import getpass
import matplotlib.pylab as plt
import os
import numpy as np
from scipy import ndimage
import ijroi
from ijroi.ij_roi import Roi
from ijroi.ijpython_decoder import decode_ij_roi
import sys
sys.path.append('../src')
import convert_voc_to_other as cvto

### Creating Folder structure for new dataset
First we need to create a folder structure on the file-system.

In [2]:
#Create the folder structure.
dataset_home_dir = "/Users/dominicwaithe/Documents/collaborators/WaitheD/micro_vision/cell_datasets/"
dataset_name = "USO2_rfp_pcna_live_class" #e.g. erythroblast_dapi_glycophorinA_FOXO3_class
year_acquisition = "2021"
class_name = "cell - USO2 rfp pcna live" #Classes to have in this dataset.
###########
## The above has to be added to config/dataset_spec.txt file.
###########

xml_path = dataset_home_dir+dataset_name+'/'+year_acquisition+'/Annotations'
jpg_path = dataset_home_dir+dataset_name+'/'+year_acquisition+'/JPEGImages'
# checking whether folder/directory exists
if not os.path.exists(dataset_home_dir+dataset_name):
    os.mkdir(dataset_home_dir+dataset_name)
if not os.path.exists(dataset_home_dir+dataset_name+'/'+year_acquisition+'/'):
    os.mkdir(dataset_home_dir+dataset_name+'/'+year_acquisition+'/')
for dirm in [xml_path,jpg_path]:
    if not os.path.exists(dirm):
        os.mkdir(dirm)


### Specify folder containing the tiff files.
and collect those files with ij_metadata (rather than OME metadata).

In [3]:

from tifffile import TiffFile


dir_to_read = "/Users/dominicwaithe/Documents/collaborators/WaitheD/micro_vision/acquisitions/2020_11_13_RFP-PCNA_DAPI/2020_03_28_RFP_extension/"
myimages = [] #list of image filenames
dirFiles = os.listdir(dir_to_read) #list of directory files
dirFiles.sort() #good initial sort but doesnt sort numerically very well
sorted(dirFiles) #sort numerically in ascending order

for file in dirFiles: #filter out all non jpgs
    if '.tiff' in file[-5:] or '.tif' in file[-4:]:
        myimages.append(file)
print (len(myimages))
print (myimages)
ij_tiff =[]
ome_tiff = []
for tiff in myimages:
    img_to_open = dir_to_read+tiff
    tf_img = TiffFile(img_to_open)
    
    if tf_img.ome_metadata !=None:
        print('I think this is an OME tiff.')
        ome_tiff.append(tiff)
    elif tf_img.imagej_metadata !=None:
        print('I think this is an IJ tiff.')
        ij_tiff.append(tiff)
        
    tf_img.close()

168
['00000.tif', '00001.tif', '00002.tif', '00003.tif', '00004.tif', '00005.tif', '00006.tif', '00007.tif', '00008.tif', '00009.tif', '00010.tif', '00011.tif', '00012.tif', '00013.tif', '00014.tif', '00015.tif', '00016.tif', '00017.tif', '00018.tif', '00019.tif', '00020.tif', '00021.tif', '00022.tif', '00023.tif', '00024.tif', '00025.tif', '00026.tif', '00027.tif', '00028.tif', '00029.tif', '00036.tif', '00037.tif', '00038.tif', '00039.tif', '00040.tif', '00041.tif', '00042.tif', '00043.tif', '00044.tif', '00045.tif', '00046.tif', '00047.tif', '00048.tif', '00049.tif', '00050.tif', '00051.tif', '00052.tif', '00053.tif', '00054.tif', '00055.tif', '00056.tif', '00057.tif', '00058.tif', '00059.tif', '00060.tif', '00061.tif', '00062.tif', '00063.tif', '00064.tif', '00065.tif', '00066.tif', '00067.tif', '00068.tif', '00069.tif', '00070.tif', '00071.tif', '00072.tif', '00073.tif', '00074.tif', '00075.tif', '00076.tif', '00077.tif', '00078.tif', '00079.tif', '00080.tif', '00081.tif', '00082.

### Generate XML annotation files and JPEGImages from ImageJ Tifffiles.
This cell takes a folder of ImageJ tiff images and annotations located on local computer.
This script assumes that images have been annotated in fiji/ImageJ and the cell class has been used to label the ROI.


In [4]:
#If you want to name your output images with a sequence (recommended), then set rename_with_seq=True
rename_with_seq = True
start_index = 2500 #Choose a sensible unique start.
#If rename_with_seq = False, then the input name will be reused, with .jpg ending.
annotator_name = "Waithe" 
override = True 
scale_factor = 1
ch_to_take = 0


ct = 0
for tiff in ij_tiff:
    if rename_with_seq:
        output_name = str(start_index+ct).zfill(6)
    else:
        output_name = tiff.split('.')[:-1][0]
    
    print('input name:',tiff,'output name: '+str(output_name)+'.jpg')
    tfile = TiffFile(dir_to_read+tiff)
    img_shape = tfile.asarray().shape

    overlay_arr = []
    if 'Overlays' in tfile.imagej_metadata:
        overlays = tfile.imagej_metadata['Overlays']
        if overlays.__class__.__name__ == 'list':
            #Multiple overlays and so iterate.
            for overlay in overlays:

                overlay_arr.append(decode_ij_roi(overlay,img_shape))
        else:
            #One overlay.
                overlay_arr.append(decode_ij_roi(overlays,img_shape))
    else:
        print('no Overlays present in file.')
    
    roi_list = []
    for i in range(0,overlay_arr.__len__()):
        if overlay_arr[i]:
            roi_class_name = str(overlay_arr[i].name).replace("\x00", "")
            x = overlay_arr[i].x
            y = overlay_arr[i].y
            width = overlay_arr[i].width
            height = overlay_arr[i].height
            roi_list.append([x, y, width, height, roi_class_name])
            
    
    raw_img = tfile.asarray()
    if raw_img.shape.__len__() > 2:
        raw_img = raw_img[ch_to_take,:,:]
    print(raw_img.shape)
    
    assert raw_img.shape.__len__() == 2, "image should only have 2-dimensions."
    
    
    sorted_img = np.sort(raw_img.flatten())
    sat_fac = 0.3 #Matches Fiji/ImageJ saturation factor of 0.3%
    img_min = int(np.ceil(sorted_img.shape[0]*((sat_fac/2.)/100.)))
    img_max = int(np.floor(sorted_img.shape[0]*((100.-(sat_fac/2.))/100.)))

    lower_bound = sorted_img[img_min]
    upper_bound = sorted_img[img_max]

    #This is very similar to the ImageJ/Fiji methodoloy when saving JPEGs but isn't exactly the same.
    lut = np.concatenate([
            np.zeros(lower_bound, dtype=np.uint16),
            np.linspace(0, 255, upper_bound - lower_bound).astype(np.uint16),
            np.ones(2**16 - upper_bound, dtype=np.uint16) * 255
        ])

    bit_img = lut[raw_img].astype(np.uint8)
    corr_img = ndimage.interpolation.zoom(bit_img,scale_factor)

    out_img = np.zeros((corr_img.shape[0],corr_img.shape[1],3))
    out_img[:,:,0] = corr_img
    out_img[:,:,1] = corr_img
    out_img[:,:,2] = corr_img

    #assert raw_img.shape[0] == 1024, "input image is unexpected size"
    #assert out_img.shape[0] == 512, "output image is unexpected size"
    
    out_img = out_img.astype(np.uint8)
    jpg_file = str(output_name)+".jpg"
    #Save the JPEG image out to the folder
    plt.imsave(jpg_path+'/'+jpg_file, out_img)
    #Save the XML annotation out.
    cvto.write_xml(xml_path, roi_list, output_name, dataset_name, class_name,override, year_acquisition, out_img.shape[1], out_img.shape[0], scale_factor)
    ct+=1

input name: 00000.tif output name: 002500.jpg
(512, 512)
input name: 00001.tif output name: 002501.jpg
(512, 512)
input name: 00002.tif output name: 002502.jpg
(512, 512)
input name: 00003.tif output name: 002503.jpg
(512, 512)
input name: 00004.tif output name: 002504.jpg
(512, 512)
input name: 00005.tif output name: 002505.jpg
(512, 512)
input name: 00006.tif output name: 002506.jpg
(512, 512)
input name: 00007.tif output name: 002507.jpg
(512, 512)
input name: 00008.tif output name: 002508.jpg
(512, 512)
input name: 00009.tif output name: 002509.jpg
(512, 512)
input name: 00010.tif output name: 002510.jpg
(512, 512)
input name: 00011.tif output name: 002511.jpg
(512, 512)
input name: 00012.tif output name: 002512.jpg
(512, 512)
input name: 00013.tif output name: 002513.jpg
(512, 512)
input name: 00014.tif output name: 002514.jpg
(512, 512)
input name: 00015.tif output name: 002515.jpg
(512, 512)
input name: 00016.tif output name: 002516.jpg
(512, 512)
input name: 00017.tif output na