In [9]:
import itertools
import os
import sys
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io

from collections import defaultdict
from tqdm.notebook import trange, tqdm, tqdm_notebook
from joblib import Parallel, delayed
import re
import h5py
import napari

In [10]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
data_dir = (Path().cwd().parents[0] / 'data').absolute()
data_raw = save_dir = data_dir / 'OCT mouse' / '3D_new' / 'stitched_3D'  

# Read data

In [12]:
markers_map = {
    'cycle1': {
        'ch0': 'Hoeschst', 
        'ch1': 'TEAD1 & YAP1'
    },
    'cycle2': {
        'ch0': 'Hoeschst', 
        'ch1': 'CylinE & CDK2'
    },
    'cycle3': {
        'ch0': 'Hoeschst', 
        'ch1': 'P-ERK & c-MYC'
    },
    'cycle4': {
        'ch0': 'Hoeschst', 
        'ch1': 'p-AKT & mTOR'
    },
    'cycle5': {
        'ch0': 'Hoeschst', 
        'ch1': 'Mcl-1 & BAK'
    },
    'cycle6': {
        'ch0': 'Hoeschst',
        'ch1': 'p-EGFR',
        'ch2': 'Tom20',
        'ch3': 'Ki67'
    },
    'cycle7': {
        'ch0': 'Hoeschst',
        'ch1': 'Pan-cytokeratin',
        'ch2': 'Golph4',
        'ch3': 'Bim'
    },
    'cycle8': {
        'ch0': 'Hoeschst',
        'ch1': 'Concanavalin A',
        'ch2': 'Phalloidin',
        'ch3': 'WGA'
    },
    'cycle9': {
        'ch0': 'Hoeschst',
        'ch1': 'NBD-C6'
    },
}

def get_info(data_raw, marker_dict = markers_map):
    timepoints = []
    resolutions = []
    fovs = []
    Zs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 

    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        for name in sorted(filenames):
            if "tif" in name:
                # Get information from image name
                n_split = name.split('_')
                                
                time=n_split[0]
                res=n_split[1]
                fov=n_split[2]
                z=n_split[3]
                cycle=n_split[4]
                ch = n_split[5][:3]
                try:
                    marker = marker_dict[cycle][ch]
                except:
                    continue 
                    
                timepoints.append(time)
                resolutions.append(res)
                fovs.append(fov)
                Zs.append(z)
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Timepoint": timepoints,
            "Resolution": resolutions,
            "FOV": fovs,
            "Z": Zs,
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

In [13]:
df_meta_path = data_dir / 'OCT mouse' / '3D_new' / 'metadata' / 'info.csv'

try:
    df_meta_path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw)
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Folder is already there
Loaded df


In [14]:
df

Unnamed: 0,Timepoint,Resolution,FOV,Z,Cycle,Channels,Markers,Path
0,1M,40X,FW1,0,cycle1,ch0,Hoeschst,Y:\coskun-lab\Thomas\15_PLA\data\OCT mouse\3D_...
1,1M,40X,FW1,0,cycle1,ch1,TEAD1 & YAP1,Y:\coskun-lab\Thomas\15_PLA\data\OCT mouse\3D_...
2,1M,40X,FW1,0,cycle2,ch0,Hoeschst,Y:\coskun-lab\Thomas\15_PLA\data\OCT mouse\3D_...
3,1M,40X,FW1,0,cycle2,ch1,CylinE & CDK2,Y:\coskun-lab\Thomas\15_PLA\data\OCT mouse\3D_...
4,1M,40X,FW1,0,cycle3,ch0,Hoeschst,Y:\coskun-lab\Thomas\15_PLA\data\OCT mouse\3D_...
...,...,...,...,...,...,...,...,...
2275,1W,40X,FW3,9,cycle8,ch1,Concanavalin A,Y:\coskun-lab\Thomas\15_PLA\data\OCT mouse\3D_...
2276,1W,40X,FW3,9,cycle8,ch2,Phalloidin,Y:\coskun-lab\Thomas\15_PLA\data\OCT mouse\3D_...
2277,1W,40X,FW3,9,cycle8,ch3,WGA,Y:\coskun-lab\Thomas\15_PLA\data\OCT mouse\3D_...
2278,1W,40X,FW3,9,cycle9,ch0,Hoeschst,Y:\coskun-lab\Thomas\15_PLA\data\OCT mouse\3D_...


# Convert to hdf5

In [15]:
import h5py

def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        chunk_shape = (1,) + data_shape[1:]
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=chunk_shape,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

def get_min(imgs):
    shapes = np.array([np.array(img.shape) for img in imgs])
    return np.min(shapes, axis=0)

In [16]:
df_imgs_path = data_dir / 'OCT mouse' / '3D_new' / 'metadata' / 'imgs.csv'

try:
    df_imgs_path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")
    
temp_path = data_dir / 'OCT mouse' / '3D_new' / 'hdf5' / 'raw'
try:
    temp_path.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    df_z = df[df.Resolution == '40X']
    group = df_z.groupby(['Timepoint', 'Resolution', 'FOV', 'Z'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        if file_path.exists():
            continue
        
        channels = df_group.Channels.to_list()
        cycles = df_group.Cycle.to_list()
        markers = df_group.Markers.to_list()
        paths = df_group.Path.to_numpy()
            
        imgs = joblib_loop(read_img, paths)
        min_shape = get_min(imgs)
        imgs_cropped = np.array([img[:min_shape[0], :min_shape[1]] for img in imgs])
        info = {"Cycle": cycles, "Channel": channels, "Marker": markers}
            
            # hdf5 as Channel -> Z mapping
        save_hdf5(file_path, 'imgs', imgs_cropped, info)
    df_imgs = pd.DataFrame(rows, columns=['Timepoint', 'Resolution', 'FOV', 'Z', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Folder is already there
Folder is already there
Created df


  0%|          | 0/95 [00:00<?, ?it/s]