This notebook finds random cells from each prediction category and displays them. The purpose is to get representative images examples of each category.

In [1]:
import pathlib

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tifffile as tf  # write tiff files
from PIL import Image  # read tiff files
from tqdm import tqdm  # progress bar

In [2]:
# function that selects a random n images from the dataframe
def top_n_cell_select(
    df: pd.DataFrame,
    n: int = 1,
) -> pd.DataFrame:
    """
    Selects a random n cells from the dataframe

    This function assumes that the dataframe is sorted.

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe containing the cell features
    n : int, optional
        Number of random cells to select, by default 1

    Returns
    -------
    pd.DataFrame
        The return dataframe with the random cell selected
    """

    # select the top n cells
    df = df.head(n)

    return df

In [3]:
# parameters
CELL_TYPE = "PBMC"

In [4]:
# Get the current working directory of the repository
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")
root_dir

PosixPath('/home/lippincm/Documents/4TB/data/Interstellar_Analysis')

In [5]:
image_out_dir_path = pathlib.Path(f"{root_dir}/8.cytopick_analysis/figures/PBMC/")


# if path does not exist, create it
image_out_dir_path.mkdir(parents=True, exist_ok=True)

In [6]:
# define directories
# where the images are on a local machine
# this is a hard coded path to the 1TB image directory

#####
# THIS PATH NEEDS TO BE CHANGED TO THE LOCAL IMAGE DIRECTORY ON YOUR MACHINE
#####

image_dir_path = pathlib.Path(
    "/home/lippincm/Desktop/18T/interstellar_data/70117_20230210MM1_Gasdermin514_CP_BC430856__2023-03-22T15_42_38-Measurement1/2.IC/"
).resolve(strict=True)

### Get single-cell probabilities

In [7]:
# define probability path
prob_df_path = pathlib.Path(
    f"../../4.sc_Morphology_Neural_Network_MLP_Model/results/Multi_Class/MultiClass_MLP/PBMC/probabilities.parquet"
).resolve(strict=True)

# read in the probability dataframe
df = pd.read_parquet(prob_df_path)

In [8]:
df.head()

Unnamed: 0,apoptosis_prob,control_prob,pyroptosis_prob,label_true,label_pred,data_split,shuffle,class_name,index,Metadata_cell_type,...,Metadata_Treatment,Metadata_Dose,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y,Metadata_Site,labels
0,0.04983,0.778777,0.171393,1,1,train,False,healthy,6135062.0,PBMC,...,LPS_Nigericin,1.000_µg_per_ml_10.000_µM,1493.544715,2062.132791,1507.0,2074.0,1484.0,2050.0,16.0,pyroptosis
1,0.060085,0.88906,0.050855,1,1,train,False,healthy,5358238.0,PBMC,...,Thapsigargin,1.000_µM,10.072917,510.194444,20.0,521.0,2.0,501.0,10.0,apoptosis
2,0.070094,0.858353,0.071552,1,1,train,False,healthy,3875296.0,PBMC,...,H2O2,100.000_µM,511.839344,31.760656,522.0,42.0,501.0,23.0,5.0,healthy
3,0.01629,0.92764,0.056071,1,1,train,False,healthy,7361184.0,PBMC,...,LPS_Nigericin,1.000_µg_per_ml_10.000_µM,1525.590476,310.028571,1531.0,317.0,1521.0,304.0,16.0,healthy
4,0.091443,0.537604,0.370953,1,1,train,False,healthy,4631111.0,PBMC,...,H2O2,100.000_nM,989.911932,405.008523,1003.0,416.0,979.0,395.0,12.0,healthy


In [9]:
# add column for if the prediction was correct
df["correct"] = df.apply(lambda x: x["label_true"] == x["label_pred"], axis=1)
# split the data into correct and incorrect
df_correct = df[df["correct"] == True]
df_incorrect = df[df["correct"] == False]
assert len(df_correct) + len(df_incorrect) == len(df)

In [10]:
# split the data into the different classes
pyroptosis_df = df_correct[df_correct["labels"] == "pyroptosis"]
apoptosis_df = df_correct[df_correct["labels"] == "apoptosis"]
control_df = df_correct[df_correct["labels"] == "healthy"]

# split the data classes by shuffled and unshuffled
pyroptosis_shuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == True]
pyroptosis_unshuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == False]
apoptosis_shuffled_df = apoptosis_df[apoptosis_df["shuffle"] == True]
apoptosis_unshuffled_df = apoptosis_df[apoptosis_df["shuffle"] == False]
control_shuffled_df = control_df[control_df["shuffle"] == True]
control_unshuffled_df = control_df[control_df["shuffle"] == False]

# split the shuffled/unshuffled data by the data splits
pyroptosis_shuffled_train_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "train"
]
pyroptosis_shuffled_test_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "testing"
]
pyroptosis_shuffled_validation_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "validation"
]
pyroptosis_shuffled_treatment_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_shuffled_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "holdout"
]

pyroptosis_unshuffled_train_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "train"
]
pyroptosis_unshuffled_test_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "testing"
]
pyroptosis_unshuffled_validation_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "validation"
]
pyroptosis_unshuffled_treatment_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_unshuffled_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "holdout"
]

apoptosis_shuffled_train_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "train"
]
apoptosis_shuffled_test_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "testing"
]
apoptosis_shuffled_validation_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "validation"
]
apoptosis_shuffled_treatment_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_shuffled_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "holdout"
]

apoptosis_unshuffled_train_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "train"
]
apoptosis_unshuffled_test_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "testing"
]
apoptosis_unshuffled_validation_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "validation"
]
apoptosis_unshuffled_treatment_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_unshuffled_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "holdout"
]

control_shuffled_train_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "train"
]
control_shuffled_test_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "testing"
]
control_shuffled_validation_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "validation"
]
control_shuffled_treatment_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "treatment_holdout"
]
control_shuffled_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "holdout"
]

control_unshuffled_train_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "train"
]
control_unshuffled_test_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "testing"
]
control_unshuffled_validation_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "validation"
]
control_unshuffled_treatment_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "treatment_holdout"
]
control_unshuffled_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "holdout"
]

# sort the dataframes by the probability of the correct class
pyroptosis_unshuffled_train_df = pyroptosis_unshuffled_train_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_unshuffled_validation_df = pyroptosis_unshuffled_validation_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_unshuffled_test_df = pyroptosis_unshuffled_test_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_unshuffled_treatment_holdout_df = (
    pyroptosis_unshuffled_treatment_holdout_df.sort_values(
        by=["pyroptosis_prob"], ascending=False
    )
)
pyroptosis_unshuffled_holdout_df = pyroptosis_unshuffled_holdout_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)

pyroptosis_shuffled_train_df = pyroptosis_shuffled_train_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_shuffled_validation_df = pyroptosis_shuffled_validation_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_shuffled_test_df = pyroptosis_shuffled_test_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_shuffled_treatment_holdout_df = (
    pyroptosis_shuffled_treatment_holdout_df.sort_values(
        by=["pyroptosis_prob"], ascending=False
    )
)
pyroptosis_shuffled_holdout_df = pyroptosis_shuffled_holdout_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)

apoptosis_unshuffled_train_df = apoptosis_unshuffled_train_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_unshuffled_validation_df = apoptosis_unshuffled_validation_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_unshuffled_test_df = apoptosis_unshuffled_test_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_unshuffled_holdout_df = apoptosis_unshuffled_holdout_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)

apoptosis_shuffled_train_df = apoptosis_shuffled_train_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_shuffled_validation_df = apoptosis_shuffled_validation_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_shuffled_test_df = apoptosis_shuffled_test_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_shuffled_holdout_df = apoptosis_shuffled_holdout_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)

control_unshuffled_train_df = control_unshuffled_train_df.sort_values(
    by=["control_prob"], ascending=False
)
control_unshuffled_validation_df = control_unshuffled_validation_df.sort_values(
    by=["control_prob"], ascending=False
)
control_unshuffled_test_df = control_unshuffled_test_df.sort_values(
    by=["control_prob"], ascending=False
)
control_unshuffled_holdout_df = control_unshuffled_holdout_df.sort_values(
    by=["control_prob"], ascending=False
)

control_shuffled_train_df = control_shuffled_train_df.sort_values(
    by=["control_prob"], ascending=False
)
control_shuffled_validation_df = control_shuffled_validation_df.sort_values(
    by=["control_prob"], ascending=False
)
control_shuffled_test_df = control_shuffled_test_df.sort_values(
    by=["control_prob"], ascending=False
)
control_shuffled_holdout_df = control_shuffled_holdout_df.sort_values(
    by=["control_prob"], ascending=False
)


# add each df to a dictionary
dict_of_dfs = {}
dict_of_dfs["pyroptosis_shuffled_train_df"] = pyroptosis_shuffled_train_df
dict_of_dfs["pyroptosis_shuffled_test_df"] = pyroptosis_shuffled_test_df
dict_of_dfs["pyroptosis_shuffled_validation_df"] = pyroptosis_shuffled_validation_df
dict_of_dfs[
    "pyroptosis_shuffled_treatment_holdout_df"
] = pyroptosis_shuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_shuffled_holdout_df"] = pyroptosis_shuffled_holdout_df

dict_of_dfs["pyroptosis_unshuffled_train_df"] = pyroptosis_unshuffled_train_df
dict_of_dfs["pyroptosis_unshuffled_test_df"] = pyroptosis_unshuffled_test_df
dict_of_dfs["pyroptosis_unshuffled_validation_df"] = pyroptosis_unshuffled_validation_df
dict_of_dfs[
    "pyroptosis_unshuffled_treatment_holdout_df"
] = pyroptosis_unshuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_unshuffled_holdout_df"] = pyroptosis_unshuffled_holdout_df

dict_of_dfs["apoptosis_shuffled_train_df"] = apoptosis_shuffled_train_df
dict_of_dfs["apoptosis_shuffled_test_df"] = apoptosis_shuffled_test_df
dict_of_dfs["apoptosis_shuffled_validation_df"] = apoptosis_shuffled_validation_df
dict_of_dfs[
    "apoptosis_shuffled_treatment_holdout_df"
] = apoptosis_shuffled_treatment_holdout_df
dict_of_dfs["apoptosis_shuffled_holdout_df"] = apoptosis_shuffled_holdout_df

dict_of_dfs["apoptosis_unshuffled_train_df"] = apoptosis_unshuffled_train_df
dict_of_dfs["apoptosis_unshuffled_test_df"] = apoptosis_unshuffled_test_df
dict_of_dfs["apoptosis_unshuffled_validation_df"] = apoptosis_unshuffled_validation_df
dict_of_dfs[
    "apoptosis_unshuffled_treatment_holdout_df"
] = apoptosis_unshuffled_treatment_holdout_df
dict_of_dfs["apoptosis_unshuffled_holdout_df"] = apoptosis_unshuffled_holdout_df

dict_of_dfs["control_shuffled_train_df"] = control_shuffled_train_df
dict_of_dfs["control_shuffled_test_df"] = control_shuffled_test_df
dict_of_dfs["control_shuffled_validation_df"] = control_shuffled_validation_df
dict_of_dfs[
    "control_shuffled_treatment_holdout_df"
] = control_shuffled_treatment_holdout_df
dict_of_dfs["control_shuffled_holdout_df"] = control_shuffled_holdout_df

dict_of_dfs["control_unshuffled_train_df"] = control_unshuffled_train_df
dict_of_dfs["control_unshuffled_test_df"] = control_unshuffled_test_df
dict_of_dfs["control_unshuffled_validation_df"] = control_unshuffled_validation_df
dict_of_dfs[
    "control_unshuffled_treatment_holdout_df"
] = control_unshuffled_treatment_holdout_df
dict_of_dfs["control_unshuffled_holdout_df"] = control_unshuffled_holdout_df

# check the length of each df
for key, value in dict_of_dfs.items():
    if not len(dict_of_dfs[key]) == 0:
        pass
    else:
        print(key)

apoptosis_shuffled_test_df
apoptosis_shuffled_validation_df
apoptosis_shuffled_treatment_holdout_df
apoptosis_unshuffled_treatment_holdout_df
control_shuffled_treatment_holdout_df
control_unshuffled_treatment_holdout_df


In [11]:
# define a dictionary for coding the wells and FOVs correctly
well_dict = {
    "A": "01",
    "B": "02",
    "C": "03",
    "D": "04",
    "E": "05",
    "F": "06",
    "G": "07",
    "H": "08",
    "I": "09",
    "J": "10",
    "K": "11",
    "L": "12",
    "M": "13",
    "N": "14",
    "O": "15",
    "P": "16",
}
column_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
    "17": "17",
    "18": "18",
    "19": "19",
    "20": "20",
    "21": "21",
    "22": "22",
    "23": "23",
    "24": "24",
}
fov_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
}

In [12]:
image_basename_1 = "p04-ch1sk1fk1fl1_IC.tiff"
image_basename_2 = "p04-ch2sk1fk1fl1_IC.tiff"
image_basename_3 = "p04-ch3sk1fk1fl1_IC.tiff"
image_basename_4 = "p04-ch4sk1fk1fl1_IC.tiff"
image_basename_5 = "p04-ch5sk1fk1fl1_IC.tiff"

In [13]:
# set constants for the loop
radius = 50
# define the number of cells to select
n = 10

In [14]:
dict_of_subset_dfs = {}
for key in tqdm(dict_of_dfs):
    df = dict_of_dfs[key]
    if len(df) == 0:
        pass
    else:
        # select n random cells from the dataframe
        df = top_n_cell_select(df, n)
        # add the df to the dictionary
        dict_of_subset_dfs[key] = df

100%|██████████| 30/30 [00:00<00:00, 52780.67it/s]


In [15]:
# create a blank df to append the data to
main_df = dict_of_subset_dfs["pyroptosis_shuffled_train_df"]
# drop all rows from the df
main_df = main_df.drop(main_df.index)

In [16]:
# loop through the dictionary of dataframes
for key in tqdm(dict_of_subset_dfs):
    # check if the dataframe is empty
    if len(dict_of_subset_dfs[key]) >= 1:
        # loop through the dataframe
        for cell in range(len(dict_of_subset_dfs[key])):
            # get the first row of the dataframe
            df = dict_of_subset_dfs[key].iloc[cell]
            image_id = df["Metadata_ImageNumber"].astype(int).astype(str)
            fov_id = df["Metadata_Site"].astype(int).astype(str)
            cell_id = df["Metadata_Cells_Number_Object_Number"]
            well_id = df["Metadata_Well"]
            row_id = well_id[0]
            column_id = well_id[1:]
            center_x = df["Metadata_Nuclei_Location_Center_X"].astype(int)
            center_y = df["Metadata_Nuclei_Location_Center_Y"].astype(int)
            # create a custom and contstant bounding box for the images
            # this is made from the extracted center_x and center_y of the cell (nucleus)
            min_x_box = center_x - radius
            max_x_box = center_x + radius
            min_y_box = center_y - radius
            max_y_box = center_y + radius
            print(cell + 1, key, row_id, column_id, fov_id, cell_id, center_x, center_y)

            # create the image paths for each channel of the image
            image_name1 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_1}"
            )
            image_path1 = image_dir_path.joinpath(image_name1)

            image_name2 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_2}"
            )
            image_path2 = image_dir_path.joinpath(image_name2)

            image_name3 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_3}"
            )
            image_path3 = image_dir_path.joinpath(image_name3)

            image_name4 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_4}"
            )
            image_path4 = image_dir_path.joinpath(image_name4)

            image_name5 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_5}"
            )
            image_path5 = image_dir_path.joinpath(image_name5)

            # crop all 5 channels of the image
            im1 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_UNCHANGED)

            im2 = cv2.imread(image_path2.as_posix(), cv2.IMREAD_UNCHANGED)

            im3 = cv2.imread(image_path3.as_posix(), cv2.IMREAD_UNCHANGED)

            im4 = cv2.imread(image_path4.as_posix(), cv2.IMREAD_UNCHANGED)

            im5 = cv2.imread(image_path5.as_posix(), cv2.IMREAD_UNCHANGED)

            # check for non-edge cells

            ### channels ###
            # * Channel 1: DAPI
            # * Channel 2: ER
            # * Channel 3: GasderminD
            # * Channel 4: AGP (Actin, Golgi, and Plasma membrane)
            # * Channel 5: Mitochondria

            # prior to merging adjust the brightness of the image to make it easier to see
            # adjust the brightness of the image to make it easier to see
            alpha = 0.05  # Contrast control (1.0-3.0)
            beta = 0  # Brightness control (0-100)
            im3 = cv2.convertScaleAbs(im3, alpha=alpha, beta=beta)
            im4 = cv2.convertScaleAbs(im4, alpha=alpha, beta=beta)
            # blue channel does not need to be adjusted as it is the DAPI channel and is already bright

            blue_channel_stack = np.stack(im1, axis=-1)
            green_channel_stack = np.stack(im3, axis=-1)
            red_channel_stack = np.stack(im4, axis=-1)

            channel1 = "im1"
            channel2 = "im3"
            channel3 = "im4"

            # Scale the pixel values to fit within the 16-bit range (0-65535)
            blue_channel = (
                blue_channel_stack / np.max(blue_channel_stack) * 65535
            ).astype(np.uint16)
            green_channel = (
                green_channel_stack / np.max(green_channel_stack) * 65535
            ).astype(np.uint16)
            red_channel = (
                red_channel_stack / np.max(red_channel_stack) * 65535
            ).astype(np.uint16)

            # merge the channels together

            composite_image = cv2.merge(
                (red_channel, green_channel, blue_channel)
            ).astype(np.uint16)

            # The images end up being `wonky` so we need to do some post processing prior to saving
            # where wonky means that the image is not oriented correctly
            # the image is rotated 90 degrees clockwise and flipped vertically

            # this will ensure that the images are oriented correctly with X and Y centers prior to cropping
            # transformations of the image to fix the orientation post pixel scaling
            # flip the image vertically
            composite_image = cv2.flip(composite_image, 0)
            # rotate the image 90 degrees clockwise
            composite_image = cv2.rotate(composite_image, cv2.ROTATE_90_CLOCKWISE)

            composite_image_crop = composite_image[
                min_y_box:max_y_box, min_x_box:max_x_box
            ]

            if composite_image_crop.shape[0] == 0 or composite_image_crop.shape[1] == 0:
                print("Cell is on the edge of the image, skipping")
                continue

                # image_out_dir_path updated to include the feature name
            # write images
            tf.imwrite(
                pathlib.Path(
                    f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.tiff"
                ),
                composite_image,
                compression=None,
            )
            tf.imwrite(
                pathlib.Path(
                    f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.tiff"
                ),
                composite_image_crop,
                compression=None,
            )

            composite_image = cv2.cvtColor(composite_image, cv2.COLOR_BGR2RGB)
            composite_image_crop = cv2.cvtColor(composite_image_crop, cv2.COLOR_BGR2RGB)

            print(composite_image.shape)

            # save the image as a png file
            cv2.imwrite(
                f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.png",
                composite_image,
            )
            cv2.imwrite(
                f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.png",
                composite_image_crop,
            )

            df = df.to_frame().T
            df[
                "image_path"
            ] = f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.png"
            df[
                "image_crop_path"
            ] = f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.png"
            main_df = pd.concat([main_df, df], ignore_index=True)

  0%|          | 0/24 [00:00<?, ?it/s]

1 pyroptosis_shuffled_train_df G 02 14 1116.0 1188 685
(2160, 2160, 3)


  main_df = pd.concat([main_df, df], ignore_index=True)


2 pyroptosis_shuffled_train_df B 11 4 1170.0 1628 785
(2160, 2160, 3)
3 pyroptosis_shuffled_train_df N 02 16 938.0 243 595
(2160, 2160, 3)
4 pyroptosis_shuffled_train_df H 09 2 1725.0 1101 1133
(2160, 2160, 3)
5 pyroptosis_shuffled_train_df H 09 3 2500.0 460 1496
(2160, 2160, 3)
6 pyroptosis_shuffled_train_df B 11 2 930.0 619 575
(2160, 2160, 3)
7 pyroptosis_shuffled_train_df D 11 12 2349.0 2024 2011
(2160, 2160, 3)
8 pyroptosis_shuffled_train_df N 09 5 1754.0 1261 1015
(2160, 2160, 3)
9 pyroptosis_shuffled_train_df H 08 16 2828.0 958 1679
(2160, 2160, 3)


  4%|▍         | 1/24 [00:04<01:45,  4.60s/it]

10 pyroptosis_shuffled_train_df E 02 13 62.0 1846 49
Cell is on the edge of the image, skipping
1 pyroptosis_shuffled_test_df B 02 8 230.0 1770 221
(2160, 2160, 3)
2 pyroptosis_shuffled_test_df M 08 7 2317.0 327 2050
(2160, 2160, 3)
3 pyroptosis_shuffled_test_df B 02 6 2448.0 1656 1842
(2160, 2160, 3)
4 pyroptosis_shuffled_test_df C 03 16 1748.0 1790 1826
(2160, 2160, 3)
5 pyroptosis_shuffled_test_df M 08 13 600.0 24 362
Cell is on the edge of the image, skipping
6 pyroptosis_shuffled_test_df G 03 15 49.0 1247 46
Cell is on the edge of the image, skipping
7 pyroptosis_shuffled_test_df D 11 16 2901.0 1959 1554
(2160, 2160, 3)
8 pyroptosis_shuffled_test_df B 02 16 3181.0 167 1722
(2160, 2160, 3)
9 pyroptosis_shuffled_test_df G 03 9 833.0 2006 544
(2160, 2160, 3)
10 pyroptosis_shuffled_test_df D 11 12 2361.0 1485 2024
(2160, 2160, 3)


  8%|▊         | 2/24 [00:08<01:35,  4.36s/it]

1 pyroptosis_shuffled_validation_df G 03 14 492.0 449 367
(2160, 2160, 3)
2 pyroptosis_shuffled_validation_df M 08 13 694.0 87 423
(2160, 2160, 3)
3 pyroptosis_shuffled_validation_df G 03 14 506.0 356 379
(2160, 2160, 3)
4 pyroptosis_shuffled_validation_df C 10 12 1586.0 2022 1348
(2160, 2160, 3)
5 pyroptosis_shuffled_validation_df B 04 16 1642.0 2139 888
(2160, 2160, 3)
6 pyroptosis_shuffled_validation_df M 08 13 563.0 20 337
Cell is on the edge of the image, skipping
7 pyroptosis_shuffled_validation_df H 08 9 177.0 1498 110
(2160, 2160, 3)
8 pyroptosis_shuffled_validation_df B 04 16 1856.0 2083 1001
(2160, 2160, 3)
9 pyroptosis_shuffled_validation_df B 11 6 1354.0 130 928
(2160, 2160, 3)
10 pyroptosis_shuffled_validation_df G 03 14 433.0 403 325
(2160, 2160, 3)


 12%|█▎        | 3/24 [00:13<01:32,  4.41s/it]

1 pyroptosis_shuffled_treatment_holdout_df N 05 1 1471.0 1646 911
(2160, 2160, 3)
2 pyroptosis_shuffled_treatment_holdout_df L 04 2 2831.0 1816 1564
(2160, 2160, 3)
3 pyroptosis_shuffled_treatment_holdout_df D 08 9 3046.0 104 1794
(2160, 2160, 3)
4 pyroptosis_shuffled_treatment_holdout_df L 04 2 3193.0 1695 1743
(2160, 2160, 3)
5 pyroptosis_shuffled_treatment_holdout_df L 03 3 1914.0 243 1750
(2160, 2160, 3)
6 pyroptosis_shuffled_treatment_holdout_df L 03 3 1966.0 297 1795
(2160, 2160, 3)
7 pyroptosis_shuffled_treatment_holdout_df L 09 4 16.0 308 20
Cell is on the edge of the image, skipping
8 pyroptosis_shuffled_treatment_holdout_df D 03 10 59.0 1866 52
(2160, 2160, 3)
9 pyroptosis_shuffled_treatment_holdout_df D 03 2 2055.0 1777 1253
(2160, 2160, 3)
10 pyroptosis_shuffled_treatment_holdout_df L 04 2 2947.0 1809 1614
(2160, 2160, 3)


 17%|█▋        | 4/24 [00:18<01:36,  4.82s/it]

1 pyroptosis_shuffled_holdout_df D 09 3 747.0 1220 443
(2160, 2160, 3)
2 pyroptosis_shuffled_holdout_df K 08 5 1221.0 658 935
(2160, 2160, 3)
3 pyroptosis_shuffled_holdout_df D 09 10 2890.0 808 1500
(2160, 2160, 3)
4 pyroptosis_shuffled_holdout_df E 08 5 1178.0 1952 866
(2160, 2160, 3)
5 pyroptosis_shuffled_holdout_df F 03 9 1608.0 573 987
(2160, 2160, 3)
6 pyroptosis_shuffled_holdout_df L 02 2 758.0 673 573
(2160, 2160, 3)
7 pyroptosis_shuffled_holdout_df B 10 14 830.0 1692 532
(2160, 2160, 3)
8 pyroptosis_shuffled_holdout_df D 09 2 396.0 103 235
(2160, 2160, 3)
9 pyroptosis_shuffled_holdout_df C 02 4 2199.0 597 1524
(2160, 2160, 3)
10 pyroptosis_shuffled_holdout_df C 04 9 2050.0 1685 1098
(2160, 2160, 3)


 21%|██        | 5/24 [00:24<01:40,  5.26s/it]

1 pyroptosis_unshuffled_train_df C 09 15 1812.0 1564 941
(2160, 2160, 3)
2 pyroptosis_unshuffled_train_df F 09 3 3718.0 151 2106
(2160, 2160, 3)
3 pyroptosis_unshuffled_train_df C 11 10 2590.0 972 1675
(2160, 2160, 3)
4 pyroptosis_unshuffled_train_df C 03 12 2846.0 333 1973
(2160, 2160, 3)
5 pyroptosis_unshuffled_train_df B 09 2 2644.0 256 1493
(2160, 2160, 3)
6 pyroptosis_unshuffled_train_df C 11 1 604.0 1618 645
(2160, 2160, 3)
7 pyroptosis_unshuffled_train_df K 09 9 2177.0 1814 1355
(2160, 2160, 3)
8 pyroptosis_unshuffled_train_df G 02 10 2957.0 804 1602
(2160, 2160, 3)
9 pyroptosis_unshuffled_train_df B 11 2 1448.0 613 854
(2160, 2160, 3)
10 pyroptosis_unshuffled_train_df C 08 10 737.0 422 399
(2160, 2160, 3)


 25%|██▌       | 6/24 [00:30<01:38,  5.46s/it]

1 pyroptosis_unshuffled_test_df D 10 5 1754.0 1779 1555
(2160, 2160, 3)
2 pyroptosis_unshuffled_test_df C 05 3 36.0 2135 30
Cell is on the edge of the image, skipping
3 pyroptosis_unshuffled_test_df C 05 3 38.0 658 30
Cell is on the edge of the image, skipping
4 pyroptosis_unshuffled_test_df C 05 3 47.0 583 34
Cell is on the edge of the image, skipping
5 pyroptosis_unshuffled_test_df D 11 10 1032.0 747 741
(2160, 2160, 3)
6 pyroptosis_unshuffled_test_df D 11 10 1037.0 1765 743
(2160, 2160, 3)
7 pyroptosis_unshuffled_test_df C 05 8 1844.0 945 1625
(2160, 2160, 3)
8 pyroptosis_unshuffled_test_df C 11 10 2214.0 1318 1460
(2160, 2160, 3)
9 pyroptosis_unshuffled_test_df C 05 8 1832.0 1250 1610
(2160, 2160, 3)
10 pyroptosis_unshuffled_test_df D 11 10 1039.0 1733 743
(2160, 2160, 3)


 29%|██▉       | 7/24 [00:34<01:25,  5.06s/it]

1 pyroptosis_unshuffled_validation_df B 11 3 296.0 1384 176
(2160, 2160, 3)
2 pyroptosis_unshuffled_validation_df B 11 3 2177.0 74 1404
(2160, 2160, 3)
3 pyroptosis_unshuffled_validation_df C 05 4 1413.0 275 983
(2160, 2160, 3)
4 pyroptosis_unshuffled_validation_df C 05 3 287.0 1590 169
(2160, 2160, 3)
5 pyroptosis_unshuffled_validation_df D 04 5 1342.0 1957 1126
(2160, 2160, 3)
6 pyroptosis_unshuffled_validation_df D 04 8 1987.0 903 1764
(2160, 2160, 3)
7 pyroptosis_unshuffled_validation_df C 05 3 2245.0 1364 1387
(2160, 2160, 3)
8 pyroptosis_unshuffled_validation_df D 11 1 2207.0 1089 1895
(2160, 2160, 3)
9 pyroptosis_unshuffled_validation_df D 11 5 2090.0 927 1554
(2160, 2160, 3)
10 pyroptosis_unshuffled_validation_df B 11 10 3086.0 1241 1797
(2160, 2160, 3)


 33%|███▎      | 8/24 [00:40<01:22,  5.18s/it]

1 pyroptosis_unshuffled_treatment_holdout_df L 09 5 1019.0 590 668
(2160, 2160, 3)
2 pyroptosis_unshuffled_treatment_holdout_df L 09 5 2559.0 1082 1852
(2160, 2160, 3)
3 pyroptosis_unshuffled_treatment_holdout_df L 03 14 244.0 812 189
(2160, 2160, 3)
4 pyroptosis_unshuffled_treatment_holdout_df L 09 5 2451.0 1183 1764
(2160, 2160, 3)
5 pyroptosis_unshuffled_treatment_holdout_df L 03 14 297.0 1126 222
(2160, 2160, 3)
6 pyroptosis_unshuffled_treatment_holdout_df L 09 5 2483.0 1466 1787
(2160, 2160, 3)
7 pyroptosis_unshuffled_treatment_holdout_df L 09 4 2683.0 1694 1799
(2160, 2160, 3)
8 pyroptosis_unshuffled_treatment_holdout_df L 09 4 2662.0 772 1786
(2160, 2160, 3)
9 pyroptosis_unshuffled_treatment_holdout_df L 09 5 2489.0 525 1791
(2160, 2160, 3)
10 pyroptosis_unshuffled_treatment_holdout_df L 09 5 2506.0 1105 1811
(2160, 2160, 3)


 38%|███▊      | 9/24 [00:45<01:18,  5.21s/it]

1 pyroptosis_unshuffled_holdout_df K 08 16 1015.0 1664 1120
(2160, 2160, 3)
2 pyroptosis_unshuffled_holdout_df C 04 9 1896.0 1282 1015
(2160, 2160, 3)
3 pyroptosis_unshuffled_holdout_df D 05 15 2992.0 793 1981
(2160, 2160, 3)
4 pyroptosis_unshuffled_holdout_df C 04 8 1904.0 2116 1267
(2160, 2160, 3)
5 pyroptosis_unshuffled_holdout_df K 08 11 754.0 1428 628
(2160, 2160, 3)
6 pyroptosis_unshuffled_holdout_df C 04 8 1868.0 447 1239
(2160, 2160, 3)
7 pyroptosis_unshuffled_holdout_df D 05 4 2919.0 758 2110
(2160, 2160, 3)
8 pyroptosis_unshuffled_holdout_df C 04 8 2441.0 1522 1647
(2160, 2160, 3)
9 pyroptosis_unshuffled_holdout_df D 05 4 2956.0 1191 2142
(2160, 2160, 3)
10 pyroptosis_unshuffled_holdout_df D 05 5 166.0 1049 137
(2160, 2160, 3)


 42%|████▏     | 10/24 [00:51<01:14,  5.32s/it]

1 apoptosis_shuffled_train_df D 06 14 2951.0 1850 1525
(2160, 2160, 3)
2 apoptosis_shuffled_train_df E 06 11 182.0 583 281
(2160, 2160, 3)
3 apoptosis_shuffled_train_df D 07 8 826.0 473 418
(2160, 2160, 3)
4 apoptosis_shuffled_train_df E 06 3 984.0 1137 737
(2160, 2160, 3)
5 apoptosis_shuffled_train_df K 06 15 2735.0 1772 1323
(2160, 2160, 3)
6 apoptosis_shuffled_train_df L 07 16 362.0 1836 249
(2160, 2160, 3)
7 apoptosis_shuffled_train_df L 06 1 2897.0 525 1771
(2160, 2160, 3)
8 apoptosis_shuffled_train_df E 06 10 2123.0 82 1337
(2160, 2160, 3)
9 apoptosis_shuffled_train_df D 07 3 2069.0 1480 1258
(2160, 2160, 3)
10 apoptosis_shuffled_train_df D 07 16 228.0 1984 108
(2160, 2160, 3)


 46%|████▌     | 11/24 [00:57<01:11,  5.52s/it]

1 apoptosis_shuffled_holdout_df K 07 12 1651.0 945 925
(2160, 2160, 3)
2 apoptosis_shuffled_holdout_df K 07 7 2823.0 547 1840
(2160, 2160, 3)
3 apoptosis_shuffled_holdout_df E 07 5 861.0 633 672
(2160, 2160, 3)
4 apoptosis_shuffled_holdout_df E 07 13 1850.0 2125 1102
(2160, 2160, 3)
5 apoptosis_shuffled_holdout_df E 07 15 3124.0 1641 1871
(2160, 2160, 3)
6 apoptosis_shuffled_holdout_df E 07 12 1260.0 1699 907
(2160, 2160, 3)
7 apoptosis_shuffled_holdout_df K 07 13 4243.0 846 2022
(2160, 2160, 3)
8 apoptosis_shuffled_holdout_df E 07 11 2063.0 73 1288
(2160, 2160, 3)
9 apoptosis_shuffled_holdout_df K 07 16 2202.0 1413 1080
(2160, 2160, 3)
10 apoptosis_shuffled_holdout_df K 07 6 2187.0 912 1716
(2160, 2160, 3)


 50%|█████     | 12/24 [01:03<01:08,  5.73s/it]

1 apoptosis_unshuffled_train_df D 07 13 3708.0 2022 1845
(2160, 2160, 3)
2 apoptosis_unshuffled_train_df K 06 10 221.0 714 121
(2160, 2160, 3)
3 apoptosis_unshuffled_train_df L 06 2 2184.0 1200 1487
(2160, 2160, 3)
4 apoptosis_unshuffled_train_df D 07 6 242.0 1326 271
(2160, 2160, 3)
5 apoptosis_unshuffled_train_df D 06 3 3957.0 1083 2112
(2160, 2160, 3)
6 apoptosis_unshuffled_train_df D 07 3 3808.0 1020 2014
(2160, 2160, 3)
7 apoptosis_unshuffled_train_df D 06 6 2500.0 830 1787
(2160, 2160, 3)
8 apoptosis_unshuffled_train_df K 06 1 1505.0 1775 784
(2160, 2160, 3)
9 apoptosis_unshuffled_train_df D 07 8 519.0 2000 258
(2160, 2160, 3)
10 apoptosis_unshuffled_train_df D 07 2 1352.0 1442 703
(2160, 2160, 3)


 54%|█████▍    | 13/24 [01:09<01:03,  5.78s/it]

1 apoptosis_unshuffled_test_df E 06 2 511.0 1700 377
(2160, 2160, 3)
2 apoptosis_unshuffled_test_df L 06 5 347.0 1039 364
(2160, 2160, 3)
3 apoptosis_unshuffled_test_df L 06 5 686.0 531 762
(2160, 2160, 3)
4 apoptosis_unshuffled_test_df L 07 4 343.0 588 313
(2160, 2160, 3)
5 apoptosis_unshuffled_test_df L 07 8 726.0 803 424
(2160, 2160, 3)
6 apoptosis_unshuffled_test_df D 06 8 297.0 805 703
(2160, 2160, 3)
7 apoptosis_unshuffled_test_df L 07 4 694.0 630 707
(2160, 2160, 3)
8 apoptosis_unshuffled_test_df L 06 5 108.0 1089 128
(2160, 2160, 3)
9 apoptosis_unshuffled_test_df L 06 6 1227.0 1532 1256
(2160, 2160, 3)
10 apoptosis_unshuffled_test_df L 07 4 2049.0 855 1958
(2160, 2160, 3)


 58%|█████▊    | 14/24 [01:14<00:57,  5.77s/it]

1 apoptosis_unshuffled_validation_df L 06 5 242.0 258 262
(2160, 2160, 3)
2 apoptosis_unshuffled_validation_df E 06 9 382.0 1150 234
(2160, 2160, 3)
3 apoptosis_unshuffled_validation_df L 06 5 876.0 1206 975
(2160, 2160, 3)
4 apoptosis_unshuffled_validation_df L 07 4 412.0 225 395
(2160, 2160, 3)
5 apoptosis_unshuffled_validation_df L 07 10 395.0 1535 263
(2160, 2160, 3)
6 apoptosis_unshuffled_validation_df L 07 4 1606.0 1457 1587
(2160, 2160, 3)
7 apoptosis_unshuffled_validation_df L 07 14 3362.0 1020 2145
(2160, 2160, 3)
8 apoptosis_unshuffled_validation_df L 07 5 1893.0 1162 1925
(2160, 2160, 3)
9 apoptosis_unshuffled_validation_df L 07 4 1713.0 567 1667
(2160, 2160, 3)
10 apoptosis_unshuffled_validation_df L 07 10 1924.0 258 1306
(2160, 2160, 3)


 62%|██████▎   | 15/24 [01:20<00:51,  5.67s/it]

1 apoptosis_unshuffled_holdout_df E 07 10 939.0 136 628
(2160, 2160, 3)
2 apoptosis_unshuffled_holdout_df E 07 10 1301.0 626 884
(2160, 2160, 3)
3 apoptosis_unshuffled_holdout_df E 07 16 656.0 1458 449
(2160, 2160, 3)
4 apoptosis_unshuffled_holdout_df E 07 3 299.0 986 223
(2160, 2160, 3)
5 apoptosis_unshuffled_holdout_df E 07 3 423.0 114 322
(2160, 2160, 3)
6 apoptosis_unshuffled_holdout_df E 07 2 235.0 1283 151
(2160, 2160, 3)
7 apoptosis_unshuffled_holdout_df E 07 2 1721.0 1104 1067
(2160, 2160, 3)
8 apoptosis_unshuffled_holdout_df E 07 14 396.0 881 229
(2160, 2160, 3)
9 apoptosis_unshuffled_holdout_df E 07 8 2200.0 702 1507
(2160, 2160, 3)
10 apoptosis_unshuffled_holdout_df E 07 3 95.0 1083 64
(2160, 2160, 3)


 67%|██████▋   | 16/24 [01:25<00:44,  5.62s/it]

1 control_shuffled_train_df N 06 4 3399.0 301 1736
(2160, 2160, 3)
2 control_shuffled_train_df C 12 14 26.0 1475 30
Cell is on the edge of the image, skipping
3 control_shuffled_train_df I 04 1 2463.0 284 1307
(2160, 2160, 3)
4 control_shuffled_train_df H 05 10 3582.0 2077 1726
(2160, 2160, 3)
5 control_shuffled_train_df F 07 1 1142.0 90 903
(2160, 2160, 3)
6 control_shuffled_train_df O 07 9 2491.0 1056 1233
(2160, 2160, 3)
7 control_shuffled_train_df F 07 9 1732.0 2122 843
(2160, 2160, 3)
8 control_shuffled_train_df E 10 5 320.0 1049 175
(2160, 2160, 3)
9 control_shuffled_train_df O 07 1 1995.0 294 1068
(2160, 2160, 3)
10 control_shuffled_train_df I 04 8 3434.0 1368 1830
(2160, 2160, 3)


 71%|███████   | 17/24 [01:31<00:39,  5.65s/it]

1 control_shuffled_test_df I 09 1 901.0 1251 1171
(2160, 2160, 3)
2 control_shuffled_test_df N 06 14 3172.0 764 1511
(2160, 2160, 3)
3 control_shuffled_test_df O 05 12 1771.0 434 1473
(2160, 2160, 3)
4 control_shuffled_test_df O 11 13 2672.0 260 1777
(2160, 2160, 3)
5 control_shuffled_test_df O 06 7 134.0 1421 217
(2160, 2160, 3)
6 control_shuffled_test_df H 05 8 3165.0 1664 2069
(2160, 2160, 3)
7 control_shuffled_test_df O 10 2 71.0 2038 72
(2160, 2160, 3)
8 control_shuffled_test_df F 10 3 2443.0 761 1242
(2160, 2160, 3)
9 control_shuffled_test_df N 07 4 1689.0 177 1356
(2160, 2160, 3)
10 control_shuffled_test_df F 06 13 2455.0 1361 1186
(2160, 2160, 3)


 75%|███████▌  | 18/24 [01:37<00:34,  5.77s/it]

1 control_shuffled_validation_df O 09 12 1408.0 1804 1124
(2160, 2160, 3)
2 control_shuffled_validation_df G 06 6 2685.0 415 1421
(2160, 2160, 3)
3 control_shuffled_validation_df F 12 5 2759.0 363 1693
(2160, 2160, 3)
4 control_shuffled_validation_df O 12 7 1358.0 917 1386
(2160, 2160, 3)
5 control_shuffled_validation_df O 10 16 1365.0 1445 598
(2160, 2160, 3)
6 control_shuffled_validation_df I 12 4 243.0 395 187
(2160, 2160, 3)
7 control_shuffled_validation_df K 05 10 376.0 198 225
(2160, 2160, 3)
8 control_shuffled_validation_df O 08 16 858.0 669 571
(2160, 2160, 3)
9 control_shuffled_validation_df E 10 7 689.0 1098 1012
(2160, 2160, 3)
10 control_shuffled_validation_df I 08 5 1849.0 625 1239
(2160, 2160, 3)


 79%|███████▉  | 19/24 [01:43<00:29,  5.89s/it]

1 control_shuffled_holdout_df C 07 4 2811.0 297 1770
(2160, 2160, 3)
2 control_shuffled_holdout_df G 05 9 396.0 502 216
(2160, 2160, 3)
3 control_shuffled_holdout_df M 07 15 3401.0 1632 1610
(2160, 2160, 3)
4 control_shuffled_holdout_df C 07 15 2371.0 614 1157
(2160, 2160, 3)
5 control_shuffled_holdout_df F 11 13 954.0 1551 444
(2160, 2160, 3)
6 control_shuffled_holdout_df E 11 7 3361.0 472 2016
(2160, 2160, 3)
7 control_shuffled_holdout_df J 12 14 260.0 503 136
(2160, 2160, 3)
8 control_shuffled_holdout_df G 07 11 521.0 1618 1016
(2160, 2160, 3)
9 control_shuffled_holdout_df I 03 13 1549.0 2075 886
(2160, 2160, 3)
10 control_shuffled_holdout_df F 11 4 19.0 104 21


 83%|████████▎ | 20/24 [01:49<00:23,  5.81s/it]

Cell is on the edge of the image, skipping
1 control_unshuffled_train_df I 05 7 797.0 946 630
(2160, 2160, 3)
2 control_unshuffled_train_df F 05 8 2514.0 1722 1648
(2160, 2160, 3)
3 control_unshuffled_train_df J 10 2 874.0 94 489
(2160, 2160, 3)
4 control_unshuffled_train_df C 06 3 4050.0 447 2127
(2160, 2160, 3)
5 control_unshuffled_train_df F 06 9 3918.0 57 1803
(2160, 2160, 3)
6 control_unshuffled_train_df F 06 1 2512.0 1961 1522
(2160, 2160, 3)
7 control_unshuffled_train_df I 08 16 1421.0 724 813
(2160, 2160, 3)
8 control_unshuffled_train_df B 12 10 520.0 348 251
(2160, 2160, 3)
9 control_unshuffled_train_df C 12 14 2075.0 1161 1028
(2160, 2160, 3)
10 control_unshuffled_train_df I 07 7 2159.0 705 1487
(2160, 2160, 3)


 88%|████████▊ | 21/24 [01:55<00:17,  5.82s/it]

1 control_unshuffled_test_df I 02 6 1776.0 973 1824
(2160, 2160, 3)
2 control_unshuffled_test_df I 06 6 1020.0 1574 1077
(2160, 2160, 3)
3 control_unshuffled_test_df O 10 12 268.0 1000 287
(2160, 2160, 3)
4 control_unshuffled_test_df H 11 1 818.0 1746 1163
(2160, 2160, 3)
5 control_unshuffled_test_df I 06 6 127.0 891 146
(2160, 2160, 3)
6 control_unshuffled_test_df I 06 6 239.0 1203 269
(2160, 2160, 3)
7 control_unshuffled_test_df O 11 12 1768.0 721 1760
(2160, 2160, 3)
8 control_unshuffled_test_df G 10 9 1114.0 1744 518
(2160, 2160, 3)
9 control_unshuffled_test_df H 11 1 1280.0 1156 2141
(2160, 2160, 3)
10 control_unshuffled_test_df I 02 4 2779.0 768 2013
(2160, 2160, 3)


 92%|█████████▏| 22/24 [02:00<00:11,  5.79s/it]

1 control_unshuffled_validation_df H 11 11 421.0 238 771
(2160, 2160, 3)
2 control_unshuffled_validation_df I 05 6 2893.0 1906 1870
(2160, 2160, 3)
3 control_unshuffled_validation_df I 07 5 596.0 913 469
(2160, 2160, 3)
4 control_unshuffled_validation_df M 06 7 343.0 345 197
(2160, 2160, 3)
5 control_unshuffled_validation_df G 11 11 662.0 1554 1024
(2160, 2160, 3)
6 control_unshuffled_validation_df I 10 5 1561.0 1882 1265
(2160, 2160, 3)
7 control_unshuffled_validation_df O 10 11 421.0 1959 350
(2160, 2160, 3)
8 control_unshuffled_validation_df B 06 6 2024.0 383 1703
(2160, 2160, 3)
9 control_unshuffled_validation_df H 10 7 2436.0 1024 1651
(2160, 2160, 3)
10 control_unshuffled_validation_df H 12 2 2437.0 775 1762
(2160, 2160, 3)


 96%|█████████▌| 23/24 [02:07<00:05,  5.86s/it]

1 control_unshuffled_holdout_df G 05 7 1492.0 1826 1894
(2160, 2160, 3)
2 control_unshuffled_holdout_df G 05 11 118.0 2065 141
(2160, 2160, 3)
3 control_unshuffled_holdout_df G 07 6 2505.0 819 1847
(2160, 2160, 3)
4 control_unshuffled_holdout_df G 07 2 752.0 67 469
(2160, 2160, 3)
5 control_unshuffled_holdout_df H 06 2 1862.0 1630 1892
(2160, 2160, 3)
6 control_unshuffled_holdout_df C 07 7 1095.0 106 1139
(2160, 2160, 3)
7 control_unshuffled_holdout_df G 07 6 1923.0 1844 1396
(2160, 2160, 3)
8 control_unshuffled_holdout_df G 05 11 1655.0 1330 1961
(2160, 2160, 3)
9 control_unshuffled_holdout_df C 07 12 1154.0 1505 821
(2160, 2160, 3)
10 control_unshuffled_holdout_df I 11 5 1817.0 567 1784
(2160, 2160, 3)


100%|██████████| 24/24 [02:12<00:00,  5.53s/it]


In [17]:
# define main_df_path
main_df_path = pathlib.Path(f"../results/{CELL_TYPE}/")
# if path does not exist, create it
main_df_path.mkdir(parents=True, exist_ok=True)
# save the dataframe
main_df.to_parquet(f"{main_df_path}/single_cell_predictions.parquet")

In [18]:
main_df.head()

Unnamed: 0,apoptosis_prob,control_prob,pyroptosis_prob,label_true,label_pred,data_split,shuffle,class_name,index,Metadata_cell_type,...,Metadata_Nuclei_Location_Center_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y,Metadata_Site,labels,correct,image_path,image_crop_path
0,0.060589,0.489427,0.449984,1,1,train,True,healthy,2466219.0,PBMC,...,685.890947,1205.0,699.0,1174.0,674.0,14.0,pyroptosis,True,/home/lippincm/Documents/4TB/data/Interstellar...,/home/lippincm/Documents/4TB/data/Interstellar...
1,0.037797,0.543169,0.419034,1,1,train,True,healthy,992552.0,PBMC,...,785.290076,1642.0,796.0,1617.0,770.0,4.0,pyroptosis,True,/home/lippincm/Documents/4TB/data/Interstellar...,/home/lippincm/Documents/4TB/data/Interstellar...
2,0.009804,0.575268,0.414929,1,1,train,True,healthy,7219928.0,PBMC,...,595.397661,261.0,609.0,234.0,582.0,16.0,pyroptosis,True,/home/lippincm/Documents/4TB/data/Interstellar...,/home/lippincm/Documents/4TB/data/Interstellar...
3,0.038737,0.550273,0.41099,1,1,train,True,healthy,4386044.0,PBMC,...,1133.617834,1114.0,1146.0,1089.0,1122.0,2.0,pyroptosis,True,/home/lippincm/Documents/4TB/data/Interstellar...,/home/lippincm/Documents/4TB/data/Interstellar...
4,0.019823,0.570437,0.40974,1,1,train,True,healthy,4392343.0,PBMC,...,1496.259319,478.0,1509.0,444.0,1484.0,3.0,pyroptosis,True,/home/lippincm/Documents/4TB/data/Interstellar...,/home/lippincm/Documents/4TB/data/Interstellar...
