In [20]:
from PIL import Image
from PIL.PngImagePlugin import PngInfo
import numpy as np
import os
from pathlib import Path
import nibabel as nib
import pandas as pd

## General Utility Functions for File Operations

In [21]:
import multiprocessing
from functools import partial
from tqdm.auto import tqdm
import functools

In [22]:
def walk_directory_to_file_list(directory, condition_function=lambda x: True):
    file_list = []
    for root, dirs, files in os.walk(directory):    
        for file in files:
            if condition_function(file):
                file_list.append(os.path.join(root, file))
    return file_list

In [23]:
def multiprocess(function, iterable, **kwargs):
    one_arg_fnx = partial(function, **kwargs)
    cpus = multiprocessing.cpu_count()
    results = []
    with multiprocessing.Pool(processes=cpus) as pool:
        for r in tqdm(pool.imap(one_arg_fnx, iterable), total=len(iterable)):
            results.append(r)
    return results

## ETL Functions

In [5]:
def resize(file_path, new_size=(512,512), output_dir='.', algorithm=Image.BICUBIC):
        try:
            filename = str(Path(file_path).name).split('.')[0].replace('Olme ', '')
            img = Image.open(file_path)
            png_info = PngInfo()
            old_size_x = str(img.size[0])
            old_size_y = str(img.size[1])
            png_info.add_text('old_size_x', old_size_x)
            png_info.add_text('old_size_y', old_size_y)
            img = img.resize(new_size, resample=algorithm)
            img.save(os.path.join(str(output_dir), filename + '.png'), 'PNG', optimize=True, pnginfo=png_info)
            return {'filename': filename, 'old_size_x': old_size_x, 'old_size_y': old_size_y}

        except Exception as e:
            print(e)
        

def resize_directory(file_list, **kwargs): return multiprocess(resize, file_list, **kwargs)

In [7]:
def convert(file_path, new_mode='LA', output_directory='.'):
    """ Convert image at file path to PIL new_mode, save in output_directory."""
    try:
        file_name = file_path.split('/')[-1]
        img = Image.open(file_path)
        img_converted = img.convert(new_mode)
        img_converted.save(os.path.join(output_directory, file_name))
    except Exception as e:
        print(e)

def convert_directory(file_list, **kwargs): return multiprocess(convert, file_list, **kwargs)

In [8]:
def la_rgb_stack(file_path, output_directory='.'):
    
    try:
        file_name = file_path.split('/')[-1]
        img_arr = np.array(Image.open(file_path))[..., 0]
        img_arr_stack = np.stack(3*[img_arr], axis=-1)
        Image.fromarray(img_arr_stack, mode='RGB').save(os.path.join(output_directory, file_name))
    except Exception as e:
        print(e)

def la_rgb_stack_directory(file_list, **kwargs): return multiprocess(la_rgb_stack, file_list, **kwargs)

Extract xml information from metadatafiles.

In [14]:
import xml.etree.ElementTree as ET

In [16]:
def extract_size_information(file_path):
    
    filename = str(Path(file_path).name).split('.')[0].replace('__metadata', '')
    ns = {'ome': 'http://www.openmicroscopy.org/Schemas/OME/2016-06'}
    tree = ET.parse(file_path)
    elem = tree.find(".//ome:Image[@ID='Image:0']/ome:Pixels", ns)
    
    requested_attributes = {
        'PhysicalSizeX': '',
        'PhysicalSizeY': '',
        'PhysicalSizeXUnit': '',
        'PhysicalSizeYUnit': '',
        'SizeX': '',
        'SizeY': ''
    }
    
    for key in requested_attributes:
        requested_attributes[key] = elem.get(key)

    requested_attributes['filename'] = filename
    
    return requested_attributes

def extract_size_information_directory(file_list): return multiprocess(extract_size_information, file_list)

In [17]:
def crop_dim_from_mask_array(mask_array, buffer=0.1):
    
    non_zero_rows, non_zero_cols = np.nonzero(mask_array)

    row_min = np.min(non_zero_rows)
    row_max = np.max(non_zero_rows)
    col_min = np.min(non_zero_cols)
    col_max = np.max(non_zero_cols)
    
    width_buffer = int((col_max - col_min) * buffer)
    height_buffer = int((row_max - row_min) * buffer)
    
    top_left = np.array([row_min, col_min]) - np.array([height_buffer, width_buffer])
    bottom_right = np.array([row_max, col_max]) + np.array([height_buffer, width_buffer])
    
    if top_left[0] < 0:
        top_left[0] = 0
    if top_left[1] < 0: 
        top_left[1] = 0
    if bottom_right[0] > (mask_array.shape[0]):
        bottom_right[0] = mask_array.shape[0]
    if bottom_right[1] > (mask_array.shape[1]):
        bottom_right[1] = mask_array.shape[1]
    
    return top_left, bottom_right

In [16]:
def crop_resize_based_on_mask(image_path, mask_path, target_dir, resize_dim):
    
    img_array = np.array(Image.open(image_path))
    msk_array = np.array(Image.open(mask_path))
    
    tl, br = crop_dim_from_mask_array(msk_array)

    cropped_img = img_array[tl[0]:br[0], tl[1]:br[1]]
    
    png_info_dict = {
        'original_x': cropped_img.shape[1],
        'original_y': cropped_img.shape[0]
    }
    png_info = PngInfo()
    for key, value in png_info_dict.items():
        png_info.add_text(key, str(value))
        
    img = Image.fromarray(cropped_img)
    img = img.resize(resize_dim, resample=Image.BICUBIC)
    img.save(Path(Path(target_dir) / Path(image_path).name),'PNG', optimize=True, pnginfo=png_info)
    

In [81]:
def png_to_niigz(file_path, out_dir):
    
    affine = np.eye(4)
    img_array = np.array(Image.open(file_path))

    for i in range(3):
        nifti_img = nib.Nifti1Image(np.expand_dims(img_array[:, :, i], axis=-1), affine)
        new_filename = os.path.basename(file_path).replace('.png',  '_' + ('000' + str(i))[-4:] + '.nii.gz')
        nib.save(nifti_img, os.path.join(out_dir, new_filename))