# Batch OIB to OME-Zarr Conversion and metadata integration

Performs batch conversion, name cleaning and add both image and ROI metadata to the OME-Zarr file

## Define directory containing the files to be converted

In [1]:
directory_path = '/home/jovyan/LNMA/bravoa/data/New2 Fig para colocalizacion Manders-Mito'
#directory_path = '/home/jovyan/LNMA/bravoa/data/Seleccion ROIs circulares RE'

## Install Libraries

In [2]:
print("Installing necessary libraries...")
!pip install natsort oiffile ome-zarr read_roi> /dev/null 2>&1
print("Libraries installed successfully.")

Installing necessary libraries...
Libraries installed successfully.


## Import Libraries

In [3]:
import os
import oiffile
from oiffile import imread
import pandas as pd
import re
from natsort import natsorted

import numpy as np
import zarr
from ome_zarr.io import parse_url
import matplotlib.pyplot as plt
from read_roi import read_roi_zip

## Functions

In [4]:
def clean_filenames(directory_path):
    """
    Cleans filenames in the specified directory by replacing certain characters
    and renaming the files in the filesystem.

    Parameters:
        directory_path (str): Path to the directory containing the files.

    Returns:
        list: A list of cleaned file names sorted naturally.
    """
    # Initialize a list to store cleaned file names
    cleaned_file_names = []

    # Process each file in the specified directory
    for file_name in os.listdir(directory_path):
        # Construct the original full path
        original_path = os.path.join(directory_path, file_name)

        # Clean the filename by replacing spaces and other unwanted characters
        cleaned_name = file_name.replace(' ', '_').replace('-', '_').replace('/', '_')
        cleaned_name = cleaned_name.replace('+', '_').replace('_copy', '')

        # Construct the new full path
        cleaned_path = os.path.join(directory_path, cleaned_name)

        # Rename the file if the cleaned name is different from the original name
        if original_path != cleaned_path:
            os.rename(original_path, cleaned_path)
            #print(f"Renamed {original_path} to {cleaned_path}")

        # Append the cleaned file name to the list
        cleaned_file_names.append(cleaned_name)

    # Return the list of cleaned file names, sorted naturally
    return natsorted(cleaned_file_names)
    
def pair_oib_zip_files(files_names_cleaned):
    """
    Pairs .oib and .zip files by their shared numerical prefix after filename cleaning.

    Parameters:
        files_names_cleaned (list): List of cleaned filenames.

    Returns:
        list: List of tuples where each tuple contains a paired .oib and .zip file.
    """
    # Dictionaries to store .oib and .zip files by their prefix number
    oib_files = {}
    zip_files = {}

    # Regular expressions to match prefixes in filenames
    oib_pattern = re.compile(r'^(\d+)_.*\.oib$')
    zip_pattern = re.compile(r'^RoiSet_(\d+)_.*\.zip$')

    # Populate the dictionaries with files based on their number prefix
    for filename in files_names_cleaned:
        oib_match = oib_pattern.match(filename)
        zip_match = zip_pattern.match(filename)
        
        if oib_match:
            prefix = oib_match.group(1)
            oib_files[prefix] = filename
        elif zip_match:
            prefix = zip_match.group(1)
            zip_files[prefix] = filename

    # Pair .oib and .zip files by their prefix
    paired_files = [(oib_files[key], zip_files[key]) for key in oib_files.keys() if key in zip_files]
    
    # Sort the paired files naturally
    return natsorted(paired_files)

def create_paths_from_pairs(paired_files, directory_path):
    """
    Generates file paths for each pair of .oib and .zip files.

    Parameters:
        paired_files (list): List of tuples with paired .oib and .zip filenames.
        directory_path (str): Path to the directory containing the files.

    Returns:
        list: List of dictionaries containing paths for each pair.
    """
    paired_paths = []

    for oib_name, rois_name in paired_files:
        # Construct the file paths
        file_path = os.path.join(directory_path, oib_name)  # .oib file path
        rois_path = os.path.join(directory_path, rois_name) # .zip file path
        oib_file_path = file_path
        ome_zarr_path = f'{file_path[:-4]}.zarr'  # Path with .zarr extension instead of .oib

        # Append the paths to the list as a dictionary
        paired_paths.append({
            'oib_file_path': oib_file_path,
            'rois_path': rois_path,
            'ome_zarr_path': ome_zarr_path
        })
        
    return paired_paths

## Clean file names

In [5]:
# Example usage:
cleaned_files = clean_filenames(directory_path)
cleaned_files

['1_Cry11_2mg_2h_MitoT_750nM_60xmed_post_3.5_Z.oib',
 '1_Cry11_2mg_2h_MitoT_750nM_60xmed_post_3.5_Z.zarr',
 '2_Cry11_2mg_2h_MitoT_750nM_60x__post_Z3.5_bis_bis.oib',
 '2_Cry11_2mg_2h_MitoT_750nM_60x__post_Z3.5_bis_bis.zarr',
 '3_Cry11_2mg_2h_MitoT_750nM_60x__post_Z3.5_bis_3.oib',
 '3_Cry11_2mg_2h_MitoT_750nM_60x__post_Z3.5_bis_3.zarr',
 '4_Cry11_2mg_2h_MitoT_750nM_60x__post_Z3.5_bis_4.oib',
 '4_Cry11_2mg_2h_MitoT_750nM_60x__post_Z3.5_bis_4.zarr',
 '5_Cry11_2mg_2h_MitoT_750nM_60x__post_Z3.5_bis_6.oib',
 '5_Cry11_2mg_2h_MitoT_750nM_60x__post_Z3.5_bis_6.zarr',
 '6_Cry11Ba_1mg_3h_MitoTrack_750nM_60X_ant_bisbis_Z3.5.oib',
 '6_Cry11Ba_1mg_3h_MitoTrack_750nM_60X_ant_bisbis_Z3.5.zarr',
 '7_Cry11Ba_1mg_3h_MitoTrack_750nM_60X_ant_int2_Z3.5.oib',
 '7_Cry11Ba_1mg_3h_MitoTrack_750nM_60X_ant_int2_Z3.5.zarr',
 '8_Cry11Ba_1mg_3h_MitoTrack_750nM_60X_ant__bis4_int2_Z3.5.oib',
 '8_Cry11Ba_1mg_3h_MitoTrack_750nM_60X_ant__bis4_int2_Z3.5.zarr',
 '9_Cry11Ba_1mg_3h_MitoTrack_750nM_60X_ant__bis5_int2_Z3.5.oib',

## Define File Paths Dictionary

In [6]:
paired_files = pair_oib_zip_files(cleaned_files)
paired_paths = create_paths_from_pairs(paired_files, directory_path)
#paired_paths

## Create OME-Zarr files with metadata and rois included

In [7]:
for paths in paired_paths:
    oib_file_path = paths['oib_file_path']
    rois_path = paths['rois_path']
    ome_zarr_path = paths['ome_zarr_path']
    with oiffile.OifFile(oib_file_path) as oib:
        image_data = oib.asarray()  # Load image data
        all_metadata = dict(oib.mainfile)  # Retrieve all metadata as a dictionary
    
    ## Load rois
    rois = read_roi_zip(rois_path)
    roi_metadata = [
        {
            "name": roi_name,
            "type": roi_data.get("type"),
            "left": roi_data.get("left"),
            "top": roi_data.get("top"),
            "width": roi_data.get("width"),
            "height": roi_data.get("height"),
            "arc_size": roi_data.get("arc_size"),
            "position": roi_data.get("position"),
        }
        for roi_name, roi_data in rois.items()
    ]
    
    ## Format Image Data for OME-Zarr
    
    # Ensure data shape is in (T, C, Z, Y, X) format
    while image_data.ndim < 5:
        image_data = np.expand_dims(image_data, axis=0)  # Add dimensions as needed

    # Convert the data to a numpy array (if not already)
    image_data = np.asarray(image_data)

    # Reorder data
    image_data = image_data.transpose(0, 2, 1, 3, 4)
    
    ## Remove Existing Zarr Store (if exists)
    if os.path.exists(ome_zarr_path):
        import shutil
        shutil.rmtree(ome_zarr_path)
        
    ## Save Image Data and Metadata to OME-Zarr
    store = parse_url(ome_zarr_path, mode="w").store
    root = zarr.group(store=store)
    dataset = root.create_dataset("image_data", data=image_data, chunks=True)

    # Save image metadata and ROI metadata as attributes
    root.attrs["image_metadata"] = all_metadata
    root.attrs["roi_metadata"] = roi_metadata

print("Conversion complete. The OME-Zarr files are saved with all metadata.")


Conversion complete. The OME-Zarr files are saved with all metadata.
