This notebook finds random cells from each prediction category and displays them. The purpose is to get representative images examples of each category.

In [1]:
import pathlib

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tifffile as tf  # write tiff files
from PIL import Image  # read tiff files
from tqdm import tqdm  # progress bar

In [33]:
# function that selects a random image from the dataframe


def top_n_cell_select(
    df: pd.DataFrame,
    n: int = 1,
) -> pd.DataFrame:
    """
    Selects a random cell from the dataframe

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe containing the cell features
    n : int, optional
        Number of random cells to select, by default 1

    Returns
    -------
    pd.DataFrame
        The return dataframe with the random cell selected
    """

    # select the top n cells
    df = df.head(n)

    return df

In [3]:
# parameters
CELL_TYPE = "PBMC"

In [4]:
# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")
root_dir

PosixPath('/home/lippincm/Documents/ML/Interstellar_Analysis')

In [5]:
image_out_dir_path = pathlib.Path(f"{root_dir}/8.cytopick_analysis/figures/PBMC/")

In [6]:
# define directories
# where the images are
image_dir_path = pathlib.Path(
    "/media/lippincm/18T/interstellar_data/70117_20230210MM1_Gasdermin514_CP_BC430856__2023-03-22T15_42_38-Measurement1/2.IC/"
).resolve(strict=True)


# if path does not exist, create it
image_out_dir_path.mkdir(parents=True, exist_ok=True)

### Get single-cell probabilities

In [20]:
# define probability path
prob_df_path = pathlib.Path(
    f"../../4.sc_Morphology_Neural_Network_MLP_Model/results/Multi_Class/MultiClass_MLP/PBMC/probabilities.parquet"
).resolve(strict=True)

# read in the probability dataframe
df = pd.read_parquet(prob_df_path)

In [22]:
df.head()

Unnamed: 0,apoptosis_prob,control_prob,pyroptosis_prob,label_true,label_pred,data_split,shuffle,class_name,index,Metadata_cell_type,...,Metadata_Nuclei_Number_Object_Number,Metadata_Treatment,Metadata_Dose,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y,labels
0,0.116349,0.490718,0.392933,2,1,train,True,pyroptosis,2534673.0,PBMC,...,169.0,LPS,10.000_µg_per_ml,95.866071,128.33631,111.0,143.0,87.0,117.0,healthy
1,0.110338,0.490567,0.399095,1,1,train,True,healthy,1010528.0,PBMC,...,604.0,LPS_Nigericin,100.000_µg_per_ml_10.000_µM,244.816143,477.495516,258.0,489.0,233.0,466.0,pyroptosis
2,0.111726,0.493623,0.394651,1,1,train,True,healthy,2174870.0,PBMC,...,608.0,LPS,10.000_µg_per_ml,1728.680628,382.531414,1740.0,394.0,1717.0,365.0,pyroptosis
3,0.112709,0.500293,0.386999,1,1,train,True,healthy,5185514.0,PBMC,...,337.0,Topotecan,10.000_nM,1363.190736,282.046322,1376.0,292.0,1352.0,272.0,healthy
4,0.113707,0.49722,0.389073,2,1,train,True,pyroptosis,358846.0,PBMC,...,1554.0,DMSO,0.100_%,428.720379,1044.687204,444.0,1055.0,420.0,1031.0,healthy


In [23]:
# add column for if the prediction was correct
df["correct"] = df.apply(lambda x: x["label_true"] == x["label_pred"], axis=1)
# split the data into correct and incorrect
df_correct = df[df["correct"] == True]
df_incorrect = df[df["correct"] == False]
assert len(df_correct) + len(df_incorrect) == len(df)

In [36]:
# split the data into the different classes
pyroptosis_df = df_correct[df_correct["labels"] == "pyroptosis"]
apoptosis_df = df_correct[df_correct["labels"] == "apoptosis"]
control_df = df_correct[df_correct["labels"] == "healthy"]

# split the data classes by shuffled and unshuffled
pyroptosis_shuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == True]
pyroptosis_unshuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == False]
apoptosis_shuffled_df = apoptosis_df[apoptosis_df["shuffle"] == True]
apoptosis_unshuffled_df = apoptosis_df[apoptosis_df["shuffle"] == False]
control_shuffled_df = control_df[control_df["shuffle"] == True]
control_unshuffled_df = control_df[control_df["shuffle"] == False]

# split the shuffled/unshuffled data by the data splits
pyroptosis_shuffled_train_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "train"
]
pyroptosis_shuffled_test_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "testing"
]
pyroptosis_shuffled_validation_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "validation"
]
pyroptosis_shuffled_treatment_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_shuffled_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "holdout"
]

pyroptosis_unshuffled_train_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "train"
]
pyroptosis_unshuffled_test_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "testing"
]
pyroptosis_unshuffled_validation_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "validation"
]
pyroptosis_unshuffled_treatment_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_unshuffled_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "holdout"
]

apoptosis_shuffled_train_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "train"
]
apoptosis_shuffled_test_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "testing"
]
apoptosis_shuffled_validation_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "validation"
]
apoptosis_shuffled_treatment_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_shuffled_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "holdout"
]

apoptosis_unshuffled_train_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "train"
]
apoptosis_unshuffled_test_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "testing"
]
apoptosis_unshuffled_validation_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "validation"
]
apoptosis_unshuffled_treatment_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_unshuffled_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "holdout"
]

control_shuffled_train_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "train"
]
control_shuffled_test_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "testing"
]
control_shuffled_validation_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "validation"
]
control_shuffled_treatment_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "treatment_holdout"
]
control_shuffled_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "holdout"
]

control_unshuffled_train_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "train"
]
control_unshuffled_test_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "testing"
]
control_unshuffled_validation_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "validation"
]
control_unshuffled_treatment_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "treatment_holdout"
]
control_unshuffled_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "holdout"
]

# sort the dataframes by the probability of the correct class
pyroptosis_unshuffled_train_df = pyroptosis_unshuffled_train_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_unshuffled_validation_df = pyroptosis_unshuffled_validation_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_unshuffled_test_df = pyroptosis_unshuffled_test_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_unshuffled_treatment_holdout_df = (
    pyroptosis_unshuffled_treatment_holdout_df.sort_values(
        by=["pyroptosis_prob"], ascending=False
    )
)
pyroptosis_unshuffled_holdout_df = pyroptosis_unshuffled_holdout_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)

pyroptosis_shuffled_train_df = pyroptosis_shuffled_train_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_shuffled_validation_df = pyroptosis_shuffled_validation_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_shuffled_test_df = pyroptosis_shuffled_test_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_shuffled_treatment_holdout_df = (
    pyroptosis_shuffled_treatment_holdout_df.sort_values(
        by=["pyroptosis_prob"], ascending=False
    )
)
pyroptosis_shuffled_holdout_df = pyroptosis_shuffled_holdout_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)

apoptosis_unshuffled_train_df = apoptosis_unshuffled_train_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_unshuffled_validation_df = apoptosis_unshuffled_validation_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_unshuffled_test_df = apoptosis_unshuffled_test_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_unshuffled_holdout_df = apoptosis_unshuffled_holdout_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)

apoptosis_shuffled_train_df = apoptosis_shuffled_train_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_shuffled_validation_df = apoptosis_shuffled_validation_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_shuffled_test_df = apoptosis_shuffled_test_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_shuffled_holdout_df = apoptosis_shuffled_holdout_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)

control_unshuffled_train_df = control_unshuffled_train_df.sort_values(
    by=["control_prob"], ascending=False
)
control_unshuffled_validation_df = control_unshuffled_validation_df.sort_values(
    by=["control_prob"], ascending=False
)
control_unshuffled_test_df = control_unshuffled_test_df.sort_values(
    by=["control_prob"], ascending=False
)
control_unshuffled_holdout_df = control_unshuffled_holdout_df.sort_values(
    by=["control_prob"], ascending=False
)

control_shuffled_train_df = control_shuffled_train_df.sort_values(
    by=["control_prob"], ascending=False
)
control_shuffled_validation_df = control_shuffled_validation_df.sort_values(
    by=["control_prob"], ascending=False
)
control_shuffled_test_df = control_shuffled_test_df.sort_values(
    by=["control_prob"], ascending=False
)
control_shuffled_holdout_df = control_shuffled_holdout_df.sort_values(
    by=["control_prob"], ascending=False
)


# add each df to a dictionary
dict_of_dfs = {}
dict_of_dfs["pyroptosis_shuffled_train_df"] = pyroptosis_shuffled_train_df
dict_of_dfs["pyroptosis_shuffled_test_df"] = pyroptosis_shuffled_test_df
dict_of_dfs["pyroptosis_shuffled_validation_df"] = pyroptosis_shuffled_validation_df
dict_of_dfs[
    "pyroptosis_shuffled_treatment_holdout_df"
] = pyroptosis_shuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_shuffled_holdout_df"] = pyroptosis_shuffled_holdout_df

dict_of_dfs["pyroptosis_unshuffled_train_df"] = pyroptosis_unshuffled_train_df
dict_of_dfs["pyroptosis_unshuffled_test_df"] = pyroptosis_unshuffled_test_df
dict_of_dfs["pyroptosis_unshuffled_validation_df"] = pyroptosis_unshuffled_validation_df
dict_of_dfs[
    "pyroptosis_unshuffled_treatment_holdout_df"
] = pyroptosis_unshuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_unshuffled_holdout_df"] = pyroptosis_unshuffled_holdout_df

dict_of_dfs["apoptosis_shuffled_train_df"] = apoptosis_shuffled_train_df
dict_of_dfs["apoptosis_shuffled_test_df"] = apoptosis_shuffled_test_df
dict_of_dfs["apoptosis_shuffled_validation_df"] = apoptosis_shuffled_validation_df
dict_of_dfs[
    "apoptosis_shuffled_treatment_holdout_df"
] = apoptosis_shuffled_treatment_holdout_df
dict_of_dfs["apoptosis_shuffled_holdout_df"] = apoptosis_shuffled_holdout_df

dict_of_dfs["apoptosis_unshuffled_train_df"] = apoptosis_unshuffled_train_df
dict_of_dfs["apoptosis_unshuffled_test_df"] = apoptosis_unshuffled_test_df
dict_of_dfs["apoptosis_unshuffled_validation_df"] = apoptosis_unshuffled_validation_df
dict_of_dfs[
    "apoptosis_unshuffled_treatment_holdout_df"
] = apoptosis_unshuffled_treatment_holdout_df
dict_of_dfs["apoptosis_unshuffled_holdout_df"] = apoptosis_unshuffled_holdout_df

dict_of_dfs["control_shuffled_train_df"] = control_shuffled_train_df
dict_of_dfs["control_shuffled_test_df"] = control_shuffled_test_df
dict_of_dfs["control_shuffled_validation_df"] = control_shuffled_validation_df
dict_of_dfs[
    "control_shuffled_treatment_holdout_df"
] = control_shuffled_treatment_holdout_df
dict_of_dfs["control_shuffled_holdout_df"] = control_shuffled_holdout_df

dict_of_dfs["control_unshuffled_train_df"] = control_unshuffled_train_df
dict_of_dfs["control_unshuffled_test_df"] = control_unshuffled_test_df
dict_of_dfs["control_unshuffled_validation_df"] = control_unshuffled_validation_df
dict_of_dfs[
    "control_unshuffled_treatment_holdout_df"
] = control_unshuffled_treatment_holdout_df
dict_of_dfs["control_unshuffled_holdout_df"] = control_unshuffled_holdout_df

# check the length of each df
for key, value in dict_of_dfs.items():
    if not len(dict_of_dfs[key]) == 0:
        pass
    else:
        print(key)

apoptosis_shuffled_treatment_holdout_df
apoptosis_unshuffled_treatment_holdout_df
control_shuffled_treatment_holdout_df
control_unshuffled_treatment_holdout_df


In [37]:
# define a dictionary for coding the wells and FOVs correctly
well_dict = {
    "A": "01",
    "B": "02",
    "C": "03",
    "D": "04",
    "E": "05",
    "F": "06",
    "G": "07",
    "H": "08",
    "I": "09",
    "J": "10",
    "K": "11",
    "L": "12",
    "M": "13",
    "N": "14",
    "O": "15",
    "P": "16",
}
column_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
    "17": "17",
    "18": "18",
    "19": "19",
    "20": "20",
    "21": "21",
    "22": "22",
    "23": "23",
    "24": "24",
}
fov_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
}

In [38]:
image_basename_1 = "p04-ch1sk1fk1fl1_IC.tiff"
image_basename_2 = "p04-ch2sk1fk1fl1_IC.tiff"
image_basename_3 = "p04-ch3sk1fk1fl1_IC.tiff"
image_basename_4 = "p04-ch4sk1fk1fl1_IC.tiff"
image_basename_5 = "p04-ch5sk1fk1fl1_IC.tiff"

In [39]:
# set constants for the loop
radius = 50
# define the number of cells to select
n = 5

In [40]:
dict_of_subset_dfs = {}
for key in tqdm(dict_of_dfs):
    df = dict_of_dfs[key]
    if len(df) == 0:
        pass
    else:
        # select n random cells from the dataframe
        df = top_n_cell_select(df, n)
        # add the df to the dictionary
        dict_of_subset_dfs[key] = df

100%|██████████| 30/30 [00:00<00:00, 4103.35it/s]


In [41]:
# create a blank df to append the data to
main_df = dict_of_subset_dfs["pyroptosis_shuffled_train_df"]
# drop all rows from the df
main_df = main_df.drop(main_df.index)

In [59]:
for key in tqdm(dict_of_subset_dfs):
    if len(dict_of_subset_dfs[key]) >= 1:
        # loop through the dataframe
        for cell in range(len(dict_of_subset_dfs[key])):
            # get the first row of the dataframe
            df = dict_of_subset_dfs[key].iloc[cell]
            image_id = df["Metadata_ImageNumber"].astype(int).astype(str)
            fov_id = df["Metadata_Site"].astype(int).astype(str)
            cell_id = df["Metadata_Cells_Number_Object_Number"]
            well_id = df["Metadata_Well"]
            row_id = well_id[0]
            column_id = well_id[1:]
            center_x = df["Metadata_Nuclei_Location_Center_X"].astype(int)
            center_y = df["Metadata_Nuclei_Location_Center_Y"].astype(int)
            # create a custom and contstant bounding box for the images
            # this is made from the extracted center_x and center_y of the cell (nucleus)
            min_x_box = center_x - radius
            max_x_box = center_x + radius
            min_y_box = center_y - radius
            max_y_box = center_y + radius
            print(cell + 1, key, row_id, column_id, fov_id, cell_id, center_x, center_y)

            # create the image paths for each channel of the image
            image_name1 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_1}"
            )
            image_path1 = image_dir_path.joinpath(image_name1)

            image_name2 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_2}"
            )
            image_path2 = image_dir_path.joinpath(image_name2)

            image_name3 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_3}"
            )
            image_path3 = image_dir_path.joinpath(image_name3)

            image_name4 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_4}"
            )
            image_path4 = image_dir_path.joinpath(image_name4)

            image_name5 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_5}"
            )
            image_path5 = image_dir_path.joinpath(image_name5)

            # crop all 5 channels of the image
            im1 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_UNCHANGED)
            # im1_crop = im1[min_y_box:max_y_box, min_x_box:max_x_box]

            im2 = cv2.imread(image_path2.as_posix(), cv2.IMREAD_UNCHANGED)
            # im2_crop = im2[min_y_box:max_y_box, min_x_box:max_x_box]

            im3 = cv2.imread(image_path3.as_posix(), cv2.IMREAD_UNCHANGED)
            # im3_crop = im3[min_y_box:max_y_box, min_x_box:max_x_box]

            im4 = cv2.imread(image_path4.as_posix(), cv2.IMREAD_UNCHANGED)
            # im4_crop = im4[min_y_box:max_y_box, min_x_box:max_x_box]

            im5 = cv2.imread(image_path5.as_posix(), cv2.IMREAD_UNCHANGED)
            # im5_crop = im5[min_y_box:max_y_box, min_x_box:max_x_box]

            # check for non-edge cells

            ### channels ###
            # * Channel 1: DAPI
            # * Channel 2: ER
            # * Channel 3: GasderminD
            # * Channel 4: AGP (Actin, Golgi, and Plasma membrane)
            # * Channel 5: Mitochondria

            blue_channel_stack = np.stack(im1, axis=-1)
            green_channel_stack = np.stack(im3, axis=-1)
            red_channel_stack = np.stack(im4, axis=-1)

            # blue_channel_stack_crop = np.stack(im1_crop, axis=-1)
            # green_channel_stack_crop = np.stack(im3_crop, axis=-1)
            # red_channel_stack_crop = np.stack(im4_crop, axis=-1)

            channel1 = "im1"
            channel2 = "im3"
            channel3 = "im4"

            # Scale the pixel values to fit within the 16-bit range (0-65535)
            blue_channel = (
                blue_channel_stack / np.max(blue_channel_stack) * 65535
            ).astype(np.uint16)
            green_channel = (
                green_channel_stack / np.max(green_channel_stack) * 65535
            ).astype(np.uint16)
            red_channel = (
                red_channel_stack / np.max(red_channel_stack) * 65535
            ).astype(np.uint16)

            # blue_channel_crop = (
            #     blue_channel_stack_crop / np.max(blue_channel_stack_crop) * 65535
            # ).astype(np.uint16)
            # green_channel_crop = (
            #     green_channel_stack_crop / np.max(green_channel_stack_crop) * 65535
            # ).astype(np.uint16)
            # red_channel_crop = (
            #     red_channel_stack_crop / np.max(red_channel_stack_crop) * 65535
            # ).astype(np.uint16)

            # merge the channels together

            composite_image = cv2.merge(
                (red_channel, green_channel, blue_channel)
            ).astype(np.uint16)
            # composite_image = cv2.cvtColor(composite_image, cv2.COLOR_BGR2RGB)

            composite_image_crop = composite_image[
                min_y_box:max_y_box, min_x_box:max_x_box
            ]

            # im_crop = composite_image[min_y_box:max_y_box, min_x_box:max_x_box]
            if composite_image_crop.shape[0] == 0 or composite_image_crop.shape[1] == 0:
                print("Cell is on the edge of the image, skipping")
                continue

            # composite_image_crop = cv2.merge(
            #     (blue_channel_crop, green_channel_crop, red_channel_crop)
            # ).astype(np.uint16)
            composite_image = cv2.cvtColor(composite_image, cv2.COLOR_BGR2RGB)
            composite_image_crop = cv2.cvtColor(composite_image_crop, cv2.COLOR_BGR2RGB)

            # The images end up being `wonky` so we need to do some post processing prior to saving
            # this will ensure that the images are oriented correctly with X and Y centers prior to cropping
            # transformations of the image to fix the orientation post pixel scaling
            # flip the image vertically
            composite_image = cv2.flip(composite_image, 0)
            composite_image_crop = cv2.flip(composite_image_crop, 0)
            # rotate the image 90 degrees clockwise
            composite_image = cv2.rotate(composite_image, cv2.ROTATE_90_CLOCKWISE)
            composite_image_crop = cv2.rotate(
                composite_image_crop, cv2.ROTATE_90_CLOCKWISE
            )

            print(composite_image.shape)

            # save the image as a png file
            cv2.imwrite(
                f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.png",
                composite_image,
            )
            cv2.imwrite(
                f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.png",
                composite_image_crop,
            )

            # image_out_dir_path updated to include the feature name
            # write images
            tf.imwrite(
                pathlib.Path(
                    f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.tiff"
                ),
                composite_image,
                compression=None,
            )
            tf.imwrite(
                pathlib.Path(
                    f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.tiff"
                ),
                composite_image_crop,
                compression=None,
            )
            df = df.to_frame().T
            df[
                "image_path"
            ] = f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.png"
            df[
                "image_crop_path"
            ] = f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.png"
            main_df = pd.concat([main_df, df], ignore_index=True)

  0%|          | 0/26 [00:00<?, ?it/s]

1 pyroptosis_shuffled_train_df J 08 3 1074.0 333 701
(2160, 2160, 3)


  main_df = pd.concat([main_df, df], ignore_index=True)


2 pyroptosis_shuffled_train_df C 11 10 2740.0 613 1752
(2160, 2160, 3)
3 pyroptosis_shuffled_train_df D 10 1 22.0 356 41
Cell is on the edge of the image, skipping
4 pyroptosis_shuffled_train_df B 10 5 540.0 1477 413
(2160, 2160, 3)
5 pyroptosis_shuffled_train_df J 09 13 289.0 1981 177
(2160, 2160, 3)


  4%|▍         | 1/26 [00:03<01:20,  3.22s/it]

1 pyroptosis_shuffled_test_df J 09 8 2204.0 631 1406
(2160, 2160, 3)
2 pyroptosis_shuffled_test_df E 09 1 1264.0 1125 783
(2160, 2160, 3)
3 pyroptosis_shuffled_test_df C 03 6 1090.0 1420 1563
(2160, 2160, 3)
4 pyroptosis_shuffled_test_df B 09 2 2454.0 1416 1399
(2160, 2160, 3)
5 pyroptosis_shuffled_test_df E 09 1 997.0 451 628
(2160, 2160, 3)


  8%|▊         | 2/26 [00:08<01:46,  4.44s/it]

1 pyroptosis_shuffled_validation_df F 08 3 3403.0 333 1863
(2160, 2160, 3)
2 pyroptosis_shuffled_validation_df K 03 16 333.0 1449 194
(2160, 2160, 3)
3 pyroptosis_shuffled_validation_df C 11 3 256.0 2083 165
(2160, 2160, 3)
4 pyroptosis_shuffled_validation_df E 08 9 470.0 477 279
(2160, 2160, 3)
5 pyroptosis_shuffled_validation_df F 08 12 463.0 1846 361
(2160, 2160, 3)


 12%|█▏        | 3/26 [00:12<01:35,  4.13s/it]

1 pyroptosis_shuffled_treatment_holdout_df D 03 3 342.0 1772 192
(2160, 2160, 3)
2 pyroptosis_shuffled_treatment_holdout_df D 03 3 265.0 1808 148
(2160, 2160, 3)
3 pyroptosis_shuffled_treatment_holdout_df D 03 3 237.0 1736 135
(2160, 2160, 3)
4 pyroptosis_shuffled_treatment_holdout_df D 03 3 212.0 1763 124
(2160, 2160, 3)
5 pyroptosis_shuffled_treatment_holdout_df D 03 3 260.0 1769 148
(2160, 2160, 3)


 15%|█▌        | 4/26 [00:15<01:23,  3.81s/it]

1 pyroptosis_shuffled_holdout_df C 04 7 7.0 1277 19
Cell is on the edge of the image, skipping
2 pyroptosis_shuffled_holdout_df C 04 2 2707.0 1153 1573
(2160, 2160, 3)
3 pyroptosis_shuffled_holdout_df C 08 5 54.0 135 98
(2160, 2160, 3)
4 pyroptosis_shuffled_holdout_df D 02 14 3298.0 989 1871
(2160, 2160, 3)
5 pyroptosis_shuffled_holdout_df M 02 4 2377.0 663 1493
(2160, 2160, 3)


 19%|█▉        | 5/26 [00:19<01:20,  3.84s/it]

1 pyroptosis_unshuffled_train_df C 10 8 1463.0 1360 1295
(2160, 2160, 3)
2 pyroptosis_unshuffled_train_df M 09 11 1388.0 537 1118
(2160, 2160, 3)
3 pyroptosis_unshuffled_train_df C 03 13 375.0 961 205
(2160, 2160, 3)
4 pyroptosis_unshuffled_train_df N 02 12 795.0 278 696
(2160, 2160, 3)
5 pyroptosis_unshuffled_train_df H 08 10 543.0 1521 332
(2160, 2160, 3)


 23%|██▎       | 6/26 [00:26<01:36,  4.80s/it]

1 pyroptosis_unshuffled_test_df M 08 11 493.0 1457 492
(2160, 2160, 3)
2 pyroptosis_unshuffled_test_df B 11 13 1.0 237 9
Cell is on the edge of the image, skipping
3 pyroptosis_unshuffled_test_df D 04 1 1673.0 1925 1511
(2160, 2160, 3)
4 pyroptosis_unshuffled_test_df D 04 1 1608.0 1302 1455
(2160, 2160, 3)
5 pyroptosis_unshuffled_test_df D 10 3 1330.0 564 976
(2160, 2160, 3)


 27%|██▋       | 7/26 [00:29<01:21,  4.31s/it]

1 pyroptosis_unshuffled_validation_df D 04 4 2314.0 1346 1971
(2160, 2160, 3)
2 pyroptosis_unshuffled_validation_df D 10 3 2167.0 1147 1645
(2160, 2160, 3)
3 pyroptosis_unshuffled_validation_df D 04 7 1337.0 2004 1646
(2160, 2160, 3)
4 pyroptosis_unshuffled_validation_df D 05 8 920.0 320 604
(2160, 2160, 3)
5 pyroptosis_unshuffled_validation_df D 04 7 907.0 1319 1149
(2160, 2160, 3)


 31%|███       | 8/26 [00:37<01:40,  5.60s/it]

1 pyroptosis_unshuffled_treatment_holdout_df L 09 5 1900.0 920 1324
(2160, 2160, 3)
2 pyroptosis_unshuffled_treatment_holdout_df L 08 12 333.0 902 251
(2160, 2160, 3)
3 pyroptosis_unshuffled_treatment_holdout_df L 08 12 2238.0 1309 1597
(2160, 2160, 3)
4 pyroptosis_unshuffled_treatment_holdout_df L 09 5 2419.0 948 1729
(2160, 2160, 3)
5 pyroptosis_unshuffled_treatment_holdout_df L 09 5 2467.0 1266 1777
(2160, 2160, 3)


 35%|███▍      | 9/26 [00:51<02:19,  8.23s/it]

1 pyroptosis_unshuffled_holdout_df D 11 5 818.0 369 608
(2160, 2160, 3)
2 pyroptosis_unshuffled_holdout_df D 11 15 1427.0 715 984
(2160, 2160, 3)
3 pyroptosis_unshuffled_holdout_df D 11 5 2102.0 946 1565
(2160, 2160, 3)
4 pyroptosis_unshuffled_holdout_df D 11 8 2325.0 899 1879
(2160, 2160, 3)
5 pyroptosis_unshuffled_holdout_df D 11 4 1531.0 1133 1124
(2160, 2160, 3)


 38%|███▊      | 10/26 [00:55<01:48,  6.81s/it]

1 apoptosis_shuffled_train_df K 07 8 2367.0 763 1223
(2160, 2160, 3)
2 apoptosis_shuffled_train_df L 07 2 55.0 1772 54
(2160, 2160, 3)
3 apoptosis_shuffled_train_df E 07 11 819.0 451 535
(2160, 2160, 3)
4 apoptosis_shuffled_train_df L 07 2 711.0 654 541
(2160, 2160, 3)
5 apoptosis_shuffled_train_df K 07 3 772.0 1727 409
(2160, 2160, 3)


 42%|████▏     | 11/26 [01:00<01:35,  6.38s/it]

1 apoptosis_shuffled_test_df D 07 6 2038.0 861 2146
(2160, 2160, 3)
2 apoptosis_shuffled_test_df L 06 9 288.0 1311 195
(2160, 2160, 3)
3 apoptosis_shuffled_test_df D 07 5 1940.0 149 1987
(2160, 2160, 3)
4 apoptosis_shuffled_test_df E 07 2 2557.0 1439 1592
(2160, 2160, 3)
5 apoptosis_shuffled_test_df D 07 2 286.0 1326 167
(2160, 2160, 3)


 46%|████▌     | 12/26 [01:09<01:37,  6.93s/it]

1 apoptosis_shuffled_validation_df D 07 5 1806.0 110 1874
(2160, 2160, 3)
2 apoptosis_shuffled_validation_df L 07 8 718.0 1417 424
(2160, 2160, 3)
3 apoptosis_shuffled_validation_df K 06 1 3700.0 1030 1882
(2160, 2160, 3)
4 apoptosis_shuffled_validation_df E 07 10 505.0 542 332
(2160, 2160, 3)
5 apoptosis_shuffled_validation_df K 06 13 442.0 1052 202
(2160, 2160, 3)


 50%|█████     | 13/26 [01:13<01:20,  6.22s/it]

1 apoptosis_shuffled_holdout_df E 06 6 486.0 1592 405
(2160, 2160, 3)
2 apoptosis_shuffled_holdout_df E 06 4 412.0 781 258
(2160, 2160, 3)
3 apoptosis_shuffled_holdout_df D 06 13 252.0 145 140
(2160, 2160, 3)
4 apoptosis_shuffled_holdout_df E 06 13 3039.0 1725 1801
(2160, 2160, 3)
5 apoptosis_shuffled_holdout_df E 06 13 3375.0 1698 2017
(2160, 2160, 3)


 54%|█████▍    | 14/26 [01:17<01:07,  5.63s/it]

1 apoptosis_unshuffled_train_df L 06 12 1688.0 1169 1141
(2160, 2160, 3)
2 apoptosis_unshuffled_train_df E 07 2 744.0 24 439
Cell is on the edge of the image, skipping
3 apoptosis_unshuffled_train_df L 06 12 1537.0 719 1032
(2160, 2160, 3)
4 apoptosis_unshuffled_train_df L 07 13 3382.0 1424 2143
(2160, 2160, 3)
5 apoptosis_unshuffled_train_df L 06 7 1200.0 190 962
(2160, 2160, 3)


 58%|█████▊    | 15/26 [01:22<00:57,  5.22s/it]

1 apoptosis_unshuffled_test_df L 06 5 494.0 1656 572
(2160, 2160, 3)
2 apoptosis_unshuffled_test_df L 07 4 1226.0 1510 1241
(2160, 2160, 3)
3 apoptosis_unshuffled_test_df L 07 10 2358.0 1976 1587
(2160, 2160, 3)
4 apoptosis_unshuffled_test_df L 06 5 1482.0 1369 1556
(2160, 2160, 3)
5 apoptosis_unshuffled_test_df L 06 5 424.0 409 499
(2160, 2160, 3)


 62%|██████▏   | 16/26 [01:25<00:47,  4.76s/it]

1 apoptosis_unshuffled_validation_df L 06 4 1121.0 1550 782
(2160, 2160, 3)
2 apoptosis_unshuffled_validation_df L 06 5 1936.0 1886 1933
(2160, 2160, 3)
3 apoptosis_unshuffled_validation_df L 07 16 2572.0 857 1783
(2160, 2160, 3)
4 apoptosis_unshuffled_validation_df L 06 5 347.0 1039 364
(2160, 2160, 3)
5 apoptosis_unshuffled_validation_df E 07 3 1575.0 420 1096
(2160, 2160, 3)


 65%|██████▌   | 17/26 [01:34<00:52,  5.78s/it]

1 apoptosis_unshuffled_holdout_df E 06 1 905.0 1462 1076
(2160, 2160, 3)
2 apoptosis_unshuffled_holdout_df E 06 11 825.0 259 1212
(2160, 2160, 3)
3 apoptosis_unshuffled_holdout_df E 06 6 1965.0 872 1705
(2160, 2160, 3)
4 apoptosis_unshuffled_holdout_df E 06 16 3145.0 1908 1813
(2160, 2160, 3)
5 apoptosis_unshuffled_holdout_df E 06 10 2537.0 1971 1586
(2160, 2160, 3)


 69%|██████▉   | 18/26 [01:44<00:57,  7.24s/it]

1 control_shuffled_train_df F 04 12 424.0 1930 259
(2160, 2160, 3)
2 control_shuffled_train_df O 09 14 2717.0 1639 1707
(2160, 2160, 3)
3 control_shuffled_train_df M 06 4 3426.0 126 1779
(2160, 2160, 3)
4 control_shuffled_train_df L 12 13 3954.0 1306 2121
(2160, 2160, 3)
5 control_shuffled_train_df B 06 13 2827.0 1147 1377
(2160, 2160, 3)


 73%|███████▎  | 19/26 [01:51<00:50,  7.15s/it]

1 control_shuffled_test_df O 09 7 1049.0 1800 1065
(2160, 2160, 3)
2 control_shuffled_test_df O 10 8 1880.0 2109 1112
(2160, 2160, 3)
3 control_shuffled_test_df C 07 8 859.0 1826 673
(2160, 2160, 3)
4 control_shuffled_test_df F 04 15 607.0 2070 400
(2160, 2160, 3)
5 control_shuffled_test_df M 07 7 1639.0 436 992
(2160, 2160, 3)


 77%|███████▋  | 20/26 [02:04<00:53,  9.00s/it]

1 control_shuffled_validation_df I 02 4 2160.0 1408 1492
(2160, 2160, 3)
2 control_shuffled_validation_df C 07 7 668.0 1638 690
(2160, 2160, 3)
3 control_shuffled_validation_df N 07 7 856.0 857 719
(2160, 2160, 3)
4 control_shuffled_validation_df E 12 7 806.0 1415 932
(2160, 2160, 3)
5 control_shuffled_validation_df M 06 4 955.0 616 548
(2160, 2160, 3)


 81%|████████  | 21/26 [02:17<00:50, 10.17s/it]

1 control_shuffled_holdout_df J 04 8 375.0 335 283
(2160, 2160, 3)
2 control_shuffled_holdout_df N 05 12 1749.0 281 1385
(2160, 2160, 3)
3 control_shuffled_holdout_df I 10 8 1312.0 1066 656
(2160, 2160, 3)
4 control_shuffled_holdout_df F 11 12 2675.0 1896 1497
(2160, 2160, 3)
5 control_shuffled_holdout_df M 04 9 2981.0 722 1591
(2160, 2160, 3)


 85%|████████▍ | 22/26 [02:30<00:43, 10.80s/it]

1 control_unshuffled_train_df G 04 8 4192.0 585 2091
(2160, 2160, 3)
2 control_unshuffled_train_df G 05 8 1591.0 1945 1541
(2160, 2160, 3)
3 control_unshuffled_train_df H 04 11 494.0 152 337
(2160, 2160, 3)
4 control_unshuffled_train_df H 05 3 3042.0 287 1610
(2160, 2160, 3)
5 control_unshuffled_train_df F 12 3 2213.0 1379 1460
(2160, 2160, 3)


 88%|████████▊ | 23/26 [02:37<00:29,  9.73s/it]

1 control_unshuffled_test_df I 02 2 1408.0 1773 1155
(2160, 2160, 3)
2 control_unshuffled_test_df I 02 13 1911.0 937 1162
(2160, 2160, 3)
3 control_unshuffled_test_df I 02 2 2523.0 1412 1919
(2160, 2160, 3)
4 control_unshuffled_test_df I 02 2 2517.0 511 1916
(2160, 2160, 3)
5 control_unshuffled_test_df I 02 6 675.0 1476 782
(2160, 2160, 3)


 92%|█████████▏| 24/26 [02:50<00:21, 10.87s/it]

1 control_unshuffled_validation_df I 02 4 2715.0 2117 1957
(2160, 2160, 3)
2 control_unshuffled_validation_df H 12 2 826.0 306 694
(2160, 2160, 3)
3 control_unshuffled_validation_df I 02 12 1340.0 1884 1089
(2160, 2160, 3)
4 control_unshuffled_validation_df I 02 11 444.0 1188 354
(2160, 2160, 3)
5 control_unshuffled_validation_df I 02 8 982.0 1394 635
(2160, 2160, 3)


 96%|█████████▌| 25/26 [03:10<00:13, 13.46s/it]

1 control_unshuffled_holdout_df O 11 12 1433.0 2122 1442
(2160, 2160, 3)
2 control_unshuffled_holdout_df I 10 4 1380.0 1399 781
(2160, 2160, 3)
3 control_unshuffled_holdout_df F 11 3 1592.0 834 1002
(2160, 2160, 3)
4 control_unshuffled_holdout_df K 11 9 1533.0 1384 890
(2160, 2160, 3)
5 control_unshuffled_holdout_df O 12 11 329.0 1125 313
(2160, 2160, 3)


100%|██████████| 26/26 [03:24<00:00,  7.87s/it]


In [60]:
# define main_df_path
main_df_path = pathlib.Path(f"../results/{CELL_TYPE}/")
# if path does not exist, create it
main_df_path.mkdir(parents=True, exist_ok=True)
# save the dataframe
main_df.to_parquet(f"{main_df_path}/single_cell_predictions.parquet")

In [61]:
main_df

Unnamed: 0,apoptosis_prob,control_prob,pyroptosis_prob,label_true,label_pred,data_split,shuffle,class_name,index,Metadata_cell_type,...,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y,labels,correct,image_path,image_crop_path
0,0.053009,0.227769,0.719221,2,2,train,True,pyroptosis,3599724.0,PBMC,...,333.556164,701.619178,345.0,713.0,323.0,690.0,pyroptosis,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
1,0.069751,0.241888,0.688361,2,2,train,True,pyroptosis,724498.0,PBMC,...,613.582996,1752.874494,623.0,1762.0,605.0,1745.0,pyroptosis,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
2,0.081726,0.323375,0.594899,2,2,train,True,pyroptosis,579466.0,PBMC,...,1477.613687,413.838852,1491.0,427.0,1463.0,403.0,pyroptosis,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
3,0.092082,0.327625,0.580293,2,2,train,True,pyroptosis,3701103.0,PBMC,...,1981.429885,177.23908,1994.0,191.0,1971.0,166.0,pyroptosis,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
4,0.075811,0.23121,0.692979,2,2,testing,True,pyroptosis,3689653.0,PBMC,...,631.633333,1406.147619,660.0,1434.0,616.0,1392.0,pyroptosis,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121,0.0,1.0,0.0,1,1,holdout,False,healthy,5518911.0,PBMC,...,2122.401914,1442.593301,2147.0,1465.0,2106.0,1423.0,healthy,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
122,0.0,1.0,0.0,1,1,holdout,False,healthy,2998317.0,PBMC,...,1399.795597,781.056604,1411.0,791.0,1390.0,772.0,healthy,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
123,0.0,1.0,0.0,1,1,holdout,False,healthy,2263570.0,PBMC,...,834.220676,1002.117296,848.0,1088.0,675.0,944.0,healthy,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
124,0.0,1.0,0.0,1,1,holdout,False,healthy,3876642.0,PBMC,...,1384.516588,890.862559,1397.0,903.0,1373.0,880.0,healthy,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
