In [57]:
import os
import re
import pandas as pd

In [58]:
pd.set_option('display.max_colwidth', 0)

In [59]:
def list_files(directory):
    """
    List all files in a directory and its subdirectories.

    Args:
        directory (str): The directory to search for files.

    Returns:
        list: List of file paths.
    """
    paths = []
    for root, _, files in os.walk(directory):
        for file in files:
            paths.append(os.path.join(root, file))
    return paths

def get_leaf_directory(path):
    """
    Get the leaf directory name from a given path.

    Args:
        path (str): The file path.

    Returns:
        str: The leaf directory name.
    """
    return os.path.basename(os.path.dirname(path))

def oldest_date_value(group):
    """
    Get the file path corresponding to the oldest date in the group.

    Args:
        group (pd.Series): The group of file paths.

    Returns:
        str: The file path with the oldest date.
    """
    if not group.empty:
        return group.loc[group['date'].idxmin(), 'input_path']
    return None

def get_base_directory_and_file(path):
    """
    Get the base directory and file name from a path.

    Args:
        path (str): The file path.

    Returns:
        str: The combined base directory and file name.
    """
    dir_name = os.path.basename(os.path.dirname(path))
    file_name = os.path.basename(path)
    return os.path.join(dir_name, file_name)

def remove_extension(filename):
        return re.sub(r'(\.\w+)+$', '', filename)

def generate_sample_sheet(input_dir, output_dir, input_ext='.nd2', output_ext='.nd2'):
    """
    Generate a sample sheet with input and output paths.

    Args:
        input_dir (str): The directory containing input files.
        output_dir (str): The directory to store output files.
        file_extension (str): The file extension to filter by.

    Returns:
        pd.DataFrame: The generated sample sheet.
    """
    input_paths = [path for path in list_files(input_dir) if path.endswith(input_ext)]
    patient_ids = [os.path.basename(path).split('_', 1)[0] for path in input_paths]
    sample_sheet = pd.DataFrame({'patient_id': patient_ids, 'input_path': input_paths})
    sample_sheet['output_path'] = output_dir + sample_sheet['input_path'].apply(get_base_directory_and_file)
    sample_sheet['output_path'] = sample_sheet['output_path'].apply(remove_extension) + output_ext
    return sample_sheet

def get_fixed_image(sample_sheet):
    sample_sheet['date'] = pd.to_datetime(sample_sheet['input_path'].str.extract(r'(\d{4}\.\d{2}\.\d{2})')[0], format='%Y.%m.%d')
    sample_sheet.dropna(subset=['date'], inplace=True)
    sample_sheet.sort_values(by=['patient_id', 'date'], inplace=True)
    sample_sheet['fixed_image_path'] = sample_sheet.groupby('patient_id')['input_path'].transform(lambda x: oldest_date_value(sample_sheet.loc[x.index]))
    sample_sheet.drop(columns=['date'], inplace=True)
    sample_sheet.sort_values(by=['patient_id'], inplace=True)
    return sample_sheet

In [None]:
def make_missing_dirs(sample_sheet):
    output_subdirs = list(sample_sheet['output_path'].apply(os.path.dirname))
    output_subdirs = list(set(output_subdirs))
    print(output_subdirs)
    for dir in output_subdirs:
        if not os.path.exists(dir):
            # os.mkdir(dir)
            print(f'Created directory: {dir}')
        else:
            print(f'Directory already exists: {dir}')

In [60]:
# root_dir = '/Volumes'
root_dir = '/hpcnfs'

In [67]:
input_dir = root_dir + '/techunits/imaging/PublicData/ImagingU/cborriero/nd2_images'
output_dir = root_dir + '/techunits/imaging/work/ATTEND/achiodin/ome_tiff_images/'

sample_sheet = generate_sample_sheet(input_dir, output_dir, input_ext='.nd2', output_ext='.ome.tiff')
samples_to_process = sample_sheet[~sample_sheet['output_path'].apply(os.path.exists)]

In [65]:
input_dir = root_dir + '/techunits/imaging/work/ATTEND/achiodin/ome_tiff_images/'
output_dir = root_dir + '/techunits/imaging/work/ATTEND/achiodin/image_registration_pipeline/image_registration/output/registered_stitched_images/'

sample_sheet = generate_sample_sheet(input_dir, output_dir, input_ext='.ome.tiff', output_ext='.ome.tiff')
sample_sheet = get_fixed_image(sample_sheet)
samples_to_process = sample_sheet[~sample_sheet['output_path'].apply(os.path.exists)]

In [None]:
# df.to_csv('/hpcnfs/scratch/DIMA/chiodin/repositories/image_registration_pipeline/image_registration/output/df.csv')

In [123]:
# sample_sheet.to_csv('/hpcnfs/scratch/DIMA/chiodin/repositories/image_registration_pipeline/image_registration/output/sample_sheet.csv', index=False)
# sample_sheet = pd.read_csv(root_dir + '/scratch/DIMA/chiodin/repositories/image_registration_pipeline/image_registration/output/sample_sheet.csv')

In [None]:
import argparse
import os 
import pathlib
from skimage.io import imread 
from utils.image_cropping import estimate_overlap
from utils.image_cropping import crop_2d_array_grid
from utils.wrappers.create_checkpoint_dirs import create_checkpoint_dirs
from utils.wrappers.compute_mappings import compute_mappings
from utils.wrappers.apply_mappings import apply_mappings
from utils.wrappers.export_image import export_image
from utils.empty_folder import empty_folder

def register_images(sample_sheet, mappings_dir, registered_crops_dir,  
                    crop_width_x, crop_width_y, overlap_factor=0.3, delete_checkpoints=False):
    for i, row in sample_sheet.iterrows():
        input_path = row['input_path']
        output_path = row['output_path']
        fixed_image_path = row['fixed_image_path']

        print(f'Output path: {output_path}')
        fixed_image = imread(fixed_image_path)
        moving_image = imread(input_path)
        
        overlap_x, overlap_y = estimate_overlap(fixed_image, moving_image, overlap_factor=overlap_factor)
        fixed_crops = crop_2d_array_grid(fixed_image, crop_width_x, crop_width_y, overlap_x, overlap_y)
        moving_crops = crop_2d_array_grid(moving_image, crop_width_x, crop_width_y, overlap_x, overlap_y)

        current_mappings_dir, current_registered_crops_dir = create_checkpoint_dirs(mappings_dir, registered_crops_dir, input_path)
        mappings = compute_mappings(fixed_crops=fixed_crops, moving_crops=moving_crops, checkpoint_dir=current_mappings_dir)
        registered_crops = apply_mappings(mappings=mappings, moving_crops=moving_crops, checkpoint_dir=current_registered_crops_dir)
        export_image(registered_crops, overlap_x, overlap_y, output_path)
        print('Image processed successfully.')

        if delete_checkpoints:
            empty_folder(current_mappings_dir)
            print(f'Content deleted successfully: {current_mappings_dir}')
            empty_folder(current_registered_crops_dir)
            print(f'Content deleted successfully: {current_registered_crops_dir}')