In [2]:
import numpy as np
import pandas as pd
import os
import cv2
import h5py
import matplotlib.pyplot as plt

In [2]:
def read_image(fn):
    with h5py.File(fn, 'r') as f:
        print('Read metadata about excitation/emission lambdas')
        keys = ['excitationWavelength', 'emission']
        wls = {key: [] for key in keys}
        for key in keys:
            tmp = f['HAC_Image']['imageStruct']['protocol']['channel'][key]
            n_colors = tmp.size
            for i in range(n_colors):
                tmpi = f[f[tmp[i, 0]][0, 0]][:, 0]
                wl = int(tmpi.astype(dtype=np.uint8).tobytes().decode())
                wls[key].append(wl)
        wls['combo'] = []
        for i in range(n_colors):
            wls['combo'].append(
                (wls['excitationWavelength'][i], wls['emission'][i]),
                )

        print('Read image data')
        img = f['HAC_Image']['imageStruct']['data'][:, :, :]
    return {
        'data': img,
        'wavelengths': wls['combo'],
        'image': os.path.basename(fn).split('.')[0],
    }

### mat files to npz files

In [6]:
fdn = '/home/yike/phd/cancer_cells_img_seq/data/202306_imaging/'

for sub_fdn in [i for i in os.listdir(fdn) if '.' not in i]:
    fdn_new = os.path.join(fdn, sub_fdn)
    fns = [i for i in os.listdir(fdn_new) if '.mat' in i]
    
    for fn in fns:
        grid = fn.split('.')[0]
        data = read_image(os.path.join(fdn_new, fn))
        img = data['data']
        wls = data['wavelengths']
        
        img_data = {}
        img = img.astype(np.uint16)
        for i, wl in enumerate(wls):
            img_data['wls_{}_{}'.format(str(wl[0]), str(wl[1]))] = img[i].T
            
        np.savez_compressed(os.path.join(fdn_new, grid + '.npz'), **img_data)

Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission lambdas
Read image data
Read metadata about excitation/emission 

In [8]:
img_data.keys()

dict_keys(['wls_325_414', 'wls_343_414', 'wls_370_414', 'wls_343_451', 'wls_370_451', 'wls_373_451', 'wls_343_575', 'wls_393_575', 'wls_406_575', 'wls_441_575', 'wls_400_594', 'wls_406_594', 'wls_431_594', 'wls_480_594', 'wls_339_575'])

### Add brightfield images into each npz file

In [6]:
fdn = '/home/yike/phd/cancer_cells_img_seq/data/202306_imaging/'

for sub_fdn in [i for i in os.listdir(fdn) if '.' not in i]:
    fdn_new = os.path.join(fdn, sub_fdn)
    fns = [i for i in os.listdir(fdn_new) if '.png' in i]
    
    for fn in fns:
        grid = fn.split('.')[0]
        
        sample = plt.imread(os.path.join(fdn_new, fn))
        # bottom width
        v_white = (sample[:, :, 0] == 1).sum(axis=0)
        bottom_w = int(v_white[v_white.argmin()])
        # left or right width
        h_white = (sample[:, :, 0] == 1).sum(axis=1)
        side_w = int(h_white[h_white.argmin()] / 2)
        # cropped sample
        cropped_sample = sample[:-bottom_w, side_w: -side_w,:]
        # resize the image with factor as 4 to shape (4096, 4096)
        cropped_sample_4 = cv2.resize(cropped_sample, None, fx = 4, fy = 4)
        
        img = np.load(os.path.join(fdn_new, '{}.npz'.format(grid)))
        raw = dict(img.items())

        # add the brightefiled image to each cell
        raw['brightfield'] = cropped_sample_4
        # save the npz file with brightfield image
        np.savez_compressed(os.path.join(fdn_new, '{}.npz'.format(grid)), **raw)