In [1]:
from nd2reader import ND2Reader
import numpy as np
import pandas as pd
import datetime
import json
import re
from pathlib import Path
from typing import List, Union

In [2]:
def split_acquisition_metadata_planes(l):
    # https://stackoverflow.com/questions/69832116/split-a-list-into-sublists-based-on-the-value-of-an-element
    x = [i for i, s in enumerate(l) if re.search("^Plane",s.lstrip())]
    y = x[1:] + [len(l)]
    z = [l[i:j] for i, j in zip(x, y)]
    return(z)


def parse_additional_metadata(acq_metadata):
    metadata_planes = split_acquisition_metadata_planes(
        acq_metadata['TextInfoItem_5'].split('\r\n')
    )
    metadata = ['\\n'.join(plane) for plane in metadata_planes]
    metadata = [s.replace(',',';') for s in metadata]
    return(metadata)


def get_start_time_abs(metadata_dict,common_metadata):
    start_time_abs = metadata_dict['date']

    if (start_time_abs is None):
        start_time_abs = common_metadata['TextInfoItem_9']
        start_time_abs = datetime.datetime.strptime(start_time_abs, '%d/%m/%Y  %I:%M:%S %p')
    return(start_time_abs)
    

def get_standard_field_id_mapping(df):
    image_metadata = df
    image_metadata = image_metadata[['field_id','stage_x_abs','stage_y_abs']].groupby('field_id').mean()
    image_metadata[['stage_x_abs','stage_y_abs']] = image_metadata[['stage_x_abs','stage_y_abs']].round()
    image_metadata['XYCoordinates']= image_metadata[['stage_x_abs','stage_y_abs']].apply(tuple, axis=1)
    image_metadata = image_metadata.reset_index()

    # Number fields from top-left to bottom-right (increase x first)
    unique_int_coords_sorted = sorted(list(set(image_metadata['XYCoordinates'])) , key=lambda k: [-k[1], k[0]])
    coord_index = dict(zip(unique_int_coords_sorted, ["%0d" %i for i in range(1,len(unique_int_coords_sorted)+1)])) 

    # keep this as StandardFieldID
    image_metadata['standard_field_id'] = image_metadata['XYCoordinates'].map(coord_index)
    return(image_metadata[['field_id','standard_field_id']])
    
    
def extract_metadata_and_save(
    in_file_path : Union[str,Path],
    out_path : Union[str,Path]):
        
    nd2_file = ND2Reader(in_file_path)
    acquisition_times = [t for t in nd2_file.parser._raw_metadata.acquisition_times]
    common_metadata = nd2_file.parser._raw_metadata.image_text_info[b'SLxImageTextInfo']
    common_metadata = { key.decode(): val.decode() for key, val in common_metadata.items() }

    # save 'SLxImageTextInfo' as JSON
    json_file_path = Path(out_path) / Path(Path(in_file_path).stem +'.json')
    with open(json_file_path, "w") as outfile:
        json.dump(common_metadata, outfile)

    # parse metadata
    metadata_dict = nd2_file.parser._raw_metadata.__dict__
    additional_metadata = parse_additional_metadata(common_metadata)
    additional_metadata_df = pd.DataFrame(additional_metadata).T
    additional_metadata_df.columns = ['metadata_string_acquisition_' + str(i) for i in range(0,len(additional_metadata))]
    
    # combine into dataframe
    metadata_df = pd.DataFrame(
        data={
            'n_pixels_y' : metadata_dict['height'],
            'n_pixels_x' : metadata_dict['width'],
            'objective_name' : common_metadata['TextInfoItem_13'],
            'pixel_size_microns' : metadata_dict['pixel_microns'],
            'stage_x_abs' : nd2_file.parser._raw_metadata.x_data,
            'stage_y_abs' : nd2_file.parser._raw_metadata.y_data,
            'stage_z_abs' : nd2_file.parser._raw_metadata.z_data,
            'acquisition_time_rel' : acquisition_times,
            'stage_z_id' : list(metadata_dict['z_levels'])*(nd2_file.sizes['t']*nd2_file.sizes['v']),
            'field_id' : list(np.repeat(range(1,1 + nd2_file.sizes['v']),nd2_file.sizes['z']))*nd2_file.sizes['t'],
            'timepoint_id' : list(np.repeat(range(nd2_file.sizes['t']),nd2_file.sizes['z']*nd2_file.sizes['v']))})
    
    metadata_df['filename_ome_tiff'] = [Path(in_file_path).stem + '_' + str(f).zfill(4) + '.ome.tiff' for f in metadata_df['field_id']]
    
    start_time_abs = get_start_time_abs(metadata_dict,common_metadata)
    if (start_time_abs is not None):
        metadata_df['acquisition_time_abs']=[start_time_abs + datetime.timedelta(seconds=x) for x in metadata_df['acquisition_time_rel']]

    # standardise field id (top-left to bottom-right)
    standard_field_id_mapping = get_standard_field_id_mapping(metadata_df)
    metadata_df = pd.merge(metadata_df, standard_field_id_mapping,on = 'field_id', how = 'left')
    
    # add additional metadata as columns
    metadata_df = pd.merge(metadata_df, additional_metadata_df,how='cross')
    
    # write metadata to file
    with Path(out_path) / Path(Path(in_file_path).stem +'_metadata.csv') as out_file_path:
        metadata_df.to_csv(out_file_path, index=False)
    with Path(out_path) / Path(Path(in_file_path).stem +'_metadata.pkl') as out_file_path:
        metadata_df.to_pickle(out_file_path)
    
    return(metadata_df)  

In [3]:
in_file_path = '/srv/scratch/berrylab/z3532965/Nikon_AX_QPI/20221010_BleachChase_POLR2A/20221010_144534_824/WellE05_ChannelGFP,AF647_Seq0001.nd2'
#in_file_path = '/srv/scratch/berrylab/z3536241/NikonNSTORM/221216/221212_Pbody_DDXImmuno/20221216_140317_611/Well01_ChannelAG_647_FISH,AG_568_FISH,AG_488NHS,AG_DAPI_Seq0000.nd2'
out_path = '/srv/scratch/berrylab/z3532965/Nikon_AX_QPI/'

In [4]:
tmp = extract_metadata_and_save(in_file_path,out_path)

In [5]:
nd2_file = ND2Reader(in_file_path)
metadata_dict = nd2_file.parser._raw_metadata.__dict__

In [6]:
from blimp.preprocessing import nd2_parse_metadata

tmp = nd2_parse_metadata.nd2_extract_metadata_and_save(in_file_path,out_path,mip=True)

In [7]:
tmp

Unnamed: 0,n_pixels_y,n_pixels_x,objective_name,pixel_size_microns,field_id,timepoint_id,filename_ome_tiff,acquisition_time_rel,stage_y_abs,stage_x_abs,stage_z_n,acquisition_time_abs,standard_field_id,metadata_string_acquisition_0,metadata_string_acquisition_1
0,1024,1024,Plan Apo VC 20x DIC N2,0.428823,1,0,"WellE05_ChannelGFP,AF647_Seq0001_0001.ome.tiff",133.585174,3281.833333,12347.144444,9,2022-10-10 15:00:33.585174,16,Plane #1:\n Name: GFP\n Component Count: 1\n M...,Plane #2:\n Name: AF647\n Component Count: 1\n...
1,1024,1024,Plan Apo VC 20x DIC N2,0.428823,2,0,"WellE05_ChannelGFP,AF647_Seq0001_0002.ome.tiff",141.514562,3285.533333,11907.9,9,2022-10-10 15:00:41.514562,15,Plane #1:\n Name: GFP\n Component Count: 1\n M...,Plane #2:\n Name: AF647\n Component Count: 1\n...
2,1024,1024,Plan Apo VC 20x DIC N2,0.428823,3,0,"WellE05_ChannelGFP,AF647_Seq0001_0003.ome.tiff",149.598083,3289.5,11468.8,9,2022-10-10 15:00:49.598083,14,Plane #1:\n Name: GFP\n Component Count: 1\n M...,Plane #2:\n Name: AF647\n Component Count: 1\n...
3,1024,1024,Plan Apo VC 20x DIC N2,0.428823,4,0,"WellE05_ChannelGFP,AF647_Seq0001_0004.ome.tiff",157.399006,3293.3,11029.5,9,2022-10-10 15:00:57.399006,13,Plane #1:\n Name: GFP\n Component Count: 1\n M...,Plane #2:\n Name: AF647\n Component Count: 1\n...
4,1024,1024,Plan Apo VC 20x DIC N2,0.428823,5,0,"WellE05_ChannelGFP,AF647_Seq0001_0005.ome.tiff",165.338793,3732.577778,11033.522222,9,2022-10-10 15:01:05.338793,9,Plane #1:\n Name: GFP\n Component Count: 1\n M...,Plane #2:\n Name: AF647\n Component Count: 1\n...
5,1024,1024,Plan Apo VC 20x DIC N2,0.428823,6,0,"WellE05_ChannelGFP,AF647_Seq0001_0006.ome.tiff",173.42684,3728.788889,11473.1,9,2022-10-10 15:01:13.426840,10,Plane #1:\n Name: GFP\n Component Count: 1\n M...,Plane #2:\n Name: AF647\n Component Count: 1\n...
6,1024,1024,Plan Apo VC 20x DIC N2,0.428823,7,0,"WellE05_ChannelGFP,AF647_Seq0001_0007.ome.tiff",181.458264,3724.8,11911.9,9,2022-10-10 15:01:21.458264,11,Plane #1:\n Name: GFP\n Component Count: 1\n M...,Plane #2:\n Name: AF647\n Component Count: 1\n...
7,1024,1024,Plan Apo VC 20x DIC N2,0.428823,8,0,"WellE05_ChannelGFP,AF647_Seq0001_0008.ome.tiff",189.343981,3720.7,12351.1,9,2022-10-10 15:01:29.343981,12,Plane #1:\n Name: GFP\n Component Count: 1\n M...,Plane #2:\n Name: AF647\n Component Count: 1\n...
8,1024,1024,Plan Apo VC 20x DIC N2,0.428823,9,0,"WellE05_ChannelGFP,AF647_Seq0001_0009.ome.tiff",197.158972,4159.955556,12354.822222,9,2022-10-10 15:01:37.158972,8,Plane #1:\n Name: GFP\n Component Count: 1\n M...,Plane #2:\n Name: AF647\n Component Count: 1\n...
9,1024,1024,Plan Apo VC 20x DIC N2,0.428823,10,0,"WellE05_ChannelGFP,AF647_Seq0001_0010.ome.tiff",204.953519,4163.8,11915.6,9,2022-10-10 15:01:44.953519,7,Plane #1:\n Name: GFP\n Component Count: 1\n M...,Plane #2:\n Name: AF647\n Component Count: 1\n...
