In [10]:
import shutil
import pathlib

def copy_DP_files(
    project_path: pathlib.Path,
    config_name: str,
    checkpoint_name: str,
):
    """copy config and checkpoint files to their necessary location in DP project (located at project path)
    Args:
        project_path (pathlib.Path): path for DP project
        config_name (str): name of config file to copy
        checkpoint_name (str): name of checkpoint file to copy
    """

    # copy config file to DP project
    config_load_path = pathlib.Path(f"DP_files/{config_name}")
    config_save_path = pathlib.Path(f"{project_path}/inputs/config/{config_name}")
    config_save_path.parents[0].mkdir(parents=True, exist_ok=True)
    shutil.copyfile(config_load_path, config_save_path)

    # copy checkpoint file to DP project
    checkpoint_load_path = pathlib.Path(f"DP_files/{checkpoint_name}")
    checkpoint_save_path = pathlib.Path(
        f"{project_path}/outputs/efn_pretrained/checkpoint/{checkpoint_name}"
    )
    checkpoint_save_path.parents[0].mkdir(parents=True)
    shutil.copyfile(checkpoint_load_path, checkpoint_save_path)

    # WORKS!!!!!!

In [11]:
project_path = pathlib.Path('NF1_nuc_project-DP')
config_name = 'NF1_nuc_config.json'
checkpoint_name = 'efficientnet-b0_weights_tf_dim_ordering_tf_kernels_autoaugment.h5'

copy_DP_files(project_path, config_name, checkpoint_name)

In [4]:
import pandas as pd
import os
import pathlib

def compile_index_csv(
    images_load_path: pathlib.Path,
    DP_images_path: pathlib.Path,
    annotations: pd.DataFrame,
    object: str,
) -> pd.DataFrame:
    """Compiles index csv file (image metadata, channel image locations, genotype)
    Args:
        images_load_path (pathlib.Path): Path to load illuminated corrected images from 
        DP_images_path (pathlib.Path): Path to DP project images folder (DP_project/inputs/images)
        annotations (pd.DataFrame): NF1 data annotations metadata csv file
        object (str): Object to compile index csv for, either "nuc" (nucleus) or "cyto" (cytoplasm)
    Returns:
        pd.DataFrame: index csv dataframe
    """
    # Empty data frame for index to be appended to
    index_csv_data = []

    for image_paths in images_load_path.iterdir():
        # Skip over files without DAPI in image path name
        if "DAPI" not in image_paths.name:
            continue

        # Get image metadata
        plate = int(image_paths.name[3:5])
        well = image_paths.name[0:2]
        site = image_paths.name[8]

        # Get genotype value for images, assign plate and well columns from annotations for index
        image_annotations = annotations.loc[
            (plate == annotations["Plate"]) & (annotations["Well"] == well)
        ]
        genotype = image_annotations.iloc[0]["Genotype"]
        
        # Compile nuclei index file data
        if object == "nuc":
            file_data = {
                "Metadata_Plate": plate,
                "Metadata_Well": well,
                "Metadata_Site": site,
                "Plate_Map_Name": f"{plate}_{well}_{site}",
                "DNA": os.path.relpath(image_paths, DP_images_path),
                "Genotype": genotype,
                "Genotype_Replicate": 1,
            }
            
        # Compile cytoplasm index file data
        channel = str(image_paths.name[6])

        if object == "cyto":
            channels = ["DNA", "ER", "Actin"]
            channel_paths = []
            
            file_data = {
                "Metadata_Plate": plate,
                "Metadata_Well": well,
                "Metadata_Site": site,
                "Plate_Map_Name": f"{plate}_{well}_{site}",
            }
            
            for index, channel in enumerate(channels):
                channel_path = pathlib.Path(str(image_paths).replace('_1_', f'_{index+1}_'))
                if '01_2_' in str(channel_path):
                    channel_path = pathlib.Path(str(channel_path).replace('DAPI', 'GFP'))
                if '01_3_' in str(channel_path):
                    channel_path = pathlib.Path(str(channel_path).replace('DAPI', 'RFP'))
                file_data[channel]= os.path.relpath(channel_path, DP_images_path)
            
            file_data["Genotype"] = genotype
            file_data["Genotype_Replicate"] = 1

        index_csv_data.append(file_data)

        # index_nuc = pd.DataFrame(index_csv_data)
        
        # project_path = pathlib.Path('NF1_nuc_project-DP')
        # index_save_path = pathlib.Path(f'{project_path}/inputs/metadata/index-nuc.csv')
        # index_save_path.parents[0].mkdir(parents=True, exist_ok=True)
        
        # index_nuc.to_csv(index_save_path, index=False)

    return pd.DataFrame(index_csv_data)

    # WORKS!!!!!!!!!!!!!!

In [5]:
images_load_path = pathlib.Path('../1_preprocessing_data/Corrected_Images/')
DP_images_path = pathlib.Path('NF1_nuc_project-DP/inputs/images')
annotations_path = pathlib.Path('DP_files/NF1_annotations.csv')
annotations = pd.read_csv(annotations_path)
object = 'nuc'

compile_index_csv(images_load_path, DP_images_path, annotations, object)

In [29]:
import pathlib
import pandas as pd

def compile_training_locations(
    index_csv_path: pathlib.Path,
    segmentation_data_path: pathlib.Path,
    save_path: pathlib.Path,
    object: str,
):
    """Compile well-site-nuc.csv file with cell locations, saving to save_path/plate/well
    Args:
        index_csv_path (pathlib.Path): Path to index.csv file for object (nuc or cyto) DeepProfiler project
        segmentation_data_path (pathlib.Path): Path to segmentation folder with .tsv locations files
        save_path (pathlib.Path): Path to save location files
        object (str): Object to find segmentation locations for, either "nuc" or "cyto"
    """
    # Reads in csv and iterate through the rows from the plate, well and site columns
    index_csv = pd.read_csv(index_csv_path)
    for index, row in index_csv.iterrows():
        plate = row["Metadata_Plate"]
        well = row["Metadata_Well"]
        site = row["Metadata_Site"]

        # Gets identifier string that matches identifier from segmented images tsvs
        identifier_details = row["DNA"].split("/")[-1].split("_")[0:4]
        identifier_well = identifier_details[0]
        identifier_site = identifier_details[3]
        identifier = f"{identifier_well}_{identifier_site}"

        locations_save_path = pathlib.Path(
            f"{save_path}/{plate}/{well}-{site}-Nuclei.csv"
        )

        # Skips a field if the locations have already been found
        if locations_save_path.is_file():
            print(f"{plate} + {identifier} already has locations compiled!")
        else:

            print(f"Compiling locations for {plate} + {identifier}")
            frame_segmentations_path = pathlib.Path(
                f"{segmentation_data_path}/{identifier}_{object}-segmented.tsv"
            )

            # Handle errors for issues like no locations file or no data within file
            try:
                frame_segmentations = pd.read_csv(
                    frame_segmentations_path, delimiter="\t"
                )
            except:
                print(f"No segmentation file for {frame_segmentations_path.name}")
                continue
            try:
                frame_segmentations = frame_segmentations[
                    ["Cell_ID", "Location_Center_X", "Location_Center_Y"]
                ]
            except KeyError:
                print(f"No segmentation data within {frame_segmentations_path}")
                continue
            frame_segmentations = frame_segmentations.rename(
                columns={
                    "Location_Center_X": "Nuclei_Location_Center_X",
                    "Location_Center_Y": "Nuclei_Location_Center_Y",
                }
            )

            locations_save_path.parents[0].mkdir(parents=True, exist_ok=True)
            frame_segmentations.to_csv(locations_save_path, index=False)


# WORKS!!!!!!!!!!!!!!!

In [30]:
index_csv_path = pathlib.Path('NF1_nuc_project-DP/inputs/metadata/index-nuc.csv')
segmentation_data_path = pathlib.Path('../2_segmenting_data/Segmented_Images/')
save_path = pathlib.Path('NF1_nuc_project-DP/inputs/locations/')
object = 'nuc'

compile_training_locations(index_csv_path, segmentation_data_path, save_path, object)

Compiling locations for 1 + D6_3
Compiling locations for 1 + F6_2
Compiling locations for 1 + E7_3
Compiling locations for 1 + C6_2
Compiling locations for 1 + C7_1
Compiling locations for 1 + C7_4
Compiling locations for 1 + E6_3
Compiling locations for 1 + C6_1
Compiling locations for 1 + E7_4
Compiling locations for 1 + E7_1
Compiling locations for 1 + C6_4
Compiling locations for 1 + F7_1
Compiling locations for 1 + D7_1
Compiling locations for 1 + E6_1
Compiling locations for 1 + F6_4
Compiling locations for 1 + F7_4
Compiling locations for 1 + C6_3
Compiling locations for 1 + F6_3
Compiling locations for 1 + D6_2
Compiling locations for 1 + C7_2
Compiling locations for 1 + F7_2
Compiling locations for 1 + E6_4
Compiling locations for 1 + D6_4
Compiling locations for 1 + D6_1
Compiling locations for 1 + E6_2
Compiling locations for 1 + F6_1
Compiling locations for 1 + F7_3
Compiling locations for 1 + C7_3
Compiling locations for 1 + D7_2
Compiling locations for 1 + D7_4
Compiling 