# Convert ND2 to H5 format

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import skimage.io
from pathlib import Path
import re
import pandas as pd

from nd2reader import ND2Reader
import h5py

## Specify source and target folders

It is expected that all nd2 files in a folder should be stiched together. It is also expected that file names conform to ```XXX_ind.nd1``` where ```ind``` is a numeric index. By default **only channels that appear in all nd1 files are exported as h5 stacks**.

### Source

Point to folder containing datasets

In [2]:
main_folder = Path('../test_folders/test_nd2_good/')

### Target

If you leave ```save_folder = None```, by default a subfolder called H5 is created in the ```main_folder```. If you prefer to store in another place, provide a path.

In [3]:
save_folder = None
#save_folder = Path('/path/where/to/save')

## Run nd2 to h5 conversion

One h5 file per channel is created. By default only the channels that appear in **all** ND2 files are exported. You can also manually indicate a series of channels to export.

In [26]:
# if desired you can manualyl specify a set of channels to export
# manually select channels
tokeep = None
#tokeep = ['DMD-mCherry', 'DMD640']

# find all nd2 files and sort them by index
nd2files = list(main_folder.glob('*.nd2'))
nd2files = [x for x in nd2files if not x.name.startswith('.')]

if len(nd2files) > 1:
    nd2files = pd.DataFrame({'name': nd2files,
                             'numbering': [int(re.findall('_(\d+).nd2', x.name)[0]) for x in nd2files]})
    nd2files = nd2files.sort_values(by = 'numbering').name.values

# find which channels appear in all files and keep only those
all_ch = []
for file in nd2files:
    nd2file = ND2Reader(file.as_posix())
    all_ch.append(nd2file.metadata['channels'])
unique_ch = np.unique(np.concatenate(all_ch))
if tokeep is None:
    tokeep = unique_ch[[np.all([x in c for c in all_ch]) for x in unique_ch]]

# Create target folder
if save_folder is None:
    save_folder = main_folder.joinpath('H5')
    
if not save_folder.is_dir():
    save_folder.mkdir()

# export each channel in a h5 file
for ch_index, ch_name in enumerate(tokeep):
    removeSpecialChars = ch_name.translate ({ord(c): "_" for c in "!@#$%^&*()[]{};:,./<>?\|`~-=+ "})
    h5_name = save_folder.joinpath(nd2files[0].name.replace('.nd2','_'+removeSpecialChars+'.h5'))

    with h5py.File(h5_name, "w") as f_out:
        print(h5_name)
        for ind, file in enumerate(nd2files):
            nd2file = ND2Reader(file.as_posix())
            nd2file.metadata["z_levels"] = range(0)
            
            # load and save each image individually to avoid RAM problems with large nd2 files
            image = np.zeros((1, nd2file.metadata['height'],nd2file.metadata['width']), dtype=np.uint16)
            for i in range(nd2file.sizes['t']):
                ch_nd2_index = nd2file.metadata["channels"].index(tokeep[ch_index])
                image[0,:,:] = nd2file.get_frame_2D(
                        x=0, y=0, z=0, c=ch_nd2_index, t=i, v=0
                    )
                if (ind == 0) and (i==0):
                    dset = f_out.create_dataset("volume", data=image, chunks=True, compression="gzip", compression_opts=1, maxshape=(None,None,None))
                else:
                    dset.resize(dset.shape[0]+image.shape[0], axis=0)   
                    dset[-image.shape[0]:] = image

../test_folders/test_nd2_good/H5/cluster_5_FAM_pH_9_0.h5
../test_folders/test_nd2_good/H5/cluster_FM_4_64_2__CHAPS.h5


  "ND2 file contains gap frames which are represented by np.nan-filled arrays; to convert to zeros use e.g. np.nan_to_num(array)")
