# Generate XML annotation files and JPEGImages from OMERO.

* This notebook assumes that datasets have been collected and bounding regions annotated within OMERO.
* If this is not the case, then you either want to do this by logging into OMERO Webclient.  
Alternatively if you want to create dataset directly from TIFF files please use the notebook:  
dset01_create_anno_from_TIFF.ipynb  (This other notebook allows you to import ROI from tiff files directly).  
* Creating a dataset forms the foundation of training material used to train one of many object detection algorithms.  
* N.B. Once Finished here it is also important to add the information about this dataset to the config/dataset_spec.txt file.  
* N.B. The next step after this is to run is to run the notebook dset02_create_anno_formats. This next notebook is used  create the  
configuration files associated with the dataset and also allows you to group datasets together for larger training.

In [1]:
#!pip3 install omero-py
from omero.gateway import BlitzGateway
import omero
import getpass
import matplotlib.pylab as plt
import os
import numpy as np
from scipy import ndimage
import sys
sys.path.append('../src')
import convert_voc_to_other as cvto
import omero_interaction as om_i 

### Creating Folder structure for new dataset
First we need to create a folder structure on the file-system.

In [11]:
#Create the folder structure.
dataset_home_dir = "/Users/dominicwaithe/Documents/collaborators/WaitheD/micro_vision/cell_datasets/"
dataset_name = "erythroid_dapi_all_scale_0p25_class" #e.g. erythroblast_dapi_glycophorinA_FOXO3_class
year_acquisition = "2019"
class_name = "cell - erythroid dapi all" #Classes to have in this dataset.
###########
## The above has to be added to config/dataset_spec.txt file.
###########

xml_path = dataset_home_dir+dataset_name+'/'+year_acquisition+'/Annotations'
jpg_path = dataset_home_dir+dataset_name+'/'+year_acquisition+'/JPEGImages'
# checking whether folder/directory exists
if not os.path.exists(dataset_home_dir+dataset_name):
    os.mkdir(dataset_home_dir+dataset_name)
if not os.path.exists(dataset_home_dir+dataset_name+'/'+year_acquisition+'/'):
    os.mkdir(dataset_home_dir+dataset_name+'/'+year_acquisition+'/')
for dirm in [xml_path,jpg_path]:
    if not os.path.exists(dirm):
        os.mkdir(dirm)


### Connect to OMERO
Requires an OMERO instance, either locally or on a network.

In [3]:
PASSWORD = getpass.getpass('Enter your password')

USERNAME = "dwaithe"
HOST = "cbomero.imm.ox.ac.uk"
PORT = 4064

conn = BlitzGateway(USERNAME, PASSWORD, host=HOST, port=PORT)
connected = conn.connect()



Enter your password ·················


### Generate XML annotation files and JPEGImages from OMERO.
This cell takes a particular OMERO dataset id and downloads the images and annotations located on OMERO server.  
This script assumes that images have been annotated in OMERO and the cell class has been used to label the ROI.
You can find the OMERO Id by looking at the dataset in either the OMERO Insight client or Webclient.


In [12]:
OMERO_dataset_num = 4379

out_list = om_i.rtn_img_ids_from_dataset(OMERO_dataset_num,conn)

annotator_name = "Waithe" #If you should have a different annnotator, this is where to change it.
scale_factor = 0.25 #The networks take images close to 512, this scale-factor takes it near to this size.
override = True #True means the classes in the OMERO will be ignored (so defined above) rather than taken from OMERO.
#Loop which creates JPEGImage files and also XML annotations for training objection networks.

for imageId in out_list:
    raw_img = om_i.rtn_raw_image(imageId,conn).astype(np.uint16)
    roi_list = om_i.rtn_roi(imageId,conn)
    
    #Here we stretch the pixel information across the available intensity range.
    #This is very similar to the ImageJ/Fiji function.
    sorted_img = np.sort(raw_img.flatten())
    sat_fac = 0.3 #Matches Fiji/ImageJ saturation factor of 0.3%
    img_min = int(np.ceil(sorted_img.shape[0]*((sat_fac/2.)/100.)))
    img_max = int(np.floor(sorted_img.shape[0]*((100.-(sat_fac/2.))/100.)))

    lower_bound = sorted_img[img_min]
    upper_bound = sorted_img[img_max]

    #This is very similar to the ImageJ/Fiji methodoloy when saving JPEGs but isn't exactly the same.
    lut = np.concatenate([
            np.zeros(lower_bound, dtype=np.uint16),
            np.linspace(0, 255, upper_bound - lower_bound).astype(np.uint16),
            np.ones(2**16 - upper_bound, dtype=np.uint16) * 255
        ])


    bit_img = lut[raw_img].astype(np.uint8)
    corr_img = ndimage.interpolation.zoom(bit_img,scale_factor)

    out_img = np.zeros((corr_img.shape[0],corr_img.shape[1],3))
    out_img[:,:,0] = corr_img
    out_img[:,:,1] = corr_img
    out_img[:,:,2] = corr_img

    #assert raw_img.shape[0] == 1024, "input image is unexpected size"
    #assert out_img.shape[0] == 512, "output image is unexpected size"
    file = out_list[0]
    out_img = out_img.astype(np.uint8)
    jpg_file = str(imageId)+".jpg"
    #Save the JPEG image out to the folder
    plt.imsave(jpg_path+'/'+jpg_file, out_img)
    #Save the XML annotation out.
    cvto.write_xml(xml_path, roi_list, imageId, dataset_name, class_name,override, year_acquisition, out_img.shape[1], out_img.shape[0], scale_factor)