This notebook finds random cells from each prediction category and displays them. The purpose is to get representative images examples of each category.

In [1]:
import pathlib

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tifffile as tf  # write tiff files
from PIL import Image  # read tiff files
from tqdm import tqdm  # progress bar

In [2]:
# function that selects a random n images from the dataframe


def top_n_cell_select(
    df: pd.DataFrame,
    n: int = 1,
) -> pd.DataFrame:
    """
    Selects a random n cells from the dataframe

    This function assumes that the dataframe is sorted.

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe containing the cell features
    n : int, optional
        Number of random cells to select, by default 1

    Returns
    -------
    pd.DataFrame
        The return dataframe with the random cell selected
    """

    # select the top n cells
    df = df.head(n)

    return df

In [3]:
# parameters
CELL_TYPE = "PBMC"

In [4]:
# Get the current working directory of the repository
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")
root_dir

PosixPath('/home/lippincm/Documents/4TB/data/Interstellar_Analysis')

In [5]:
image_out_dir_path = pathlib.Path(f"{root_dir}/8.cytopick_analysis/figures/PBMC/")


# if path does not exist, create it
image_out_dir_path.mkdir(parents=True, exist_ok=True)

In [6]:
# define directories
# where the images are on a local machine
# this is a hard coded path to the 1TB image directory

#####
# THIS PATH NEEDS TO BE CHANGED TO THE LOCAL IMAGE DIRECTORY ON YOUR MACHINE
# TODO: CHANGE THIS PATH
#####

image_dir_path = pathlib.Path(
    "/home/lippincm/Desktop/18T/interstellar_data/70117_20230210MM1_Gasdermin514_CP_BC430856__2023-03-22T15_42_38-Measurement1/2.IC/"
).resolve(strict=True)

### Get single-cell probabilities

In [7]:
# define probability path
prob_df_path = pathlib.Path(
    f"../../4.sc_Morphology_Neural_Network_MLP_Model/results/Multi_Class/MultiClass_MLP/PBMC/probabilities.parquet"
).resolve(strict=True)

# read in the probability dataframe
df = pd.read_parquet(prob_df_path)

In [8]:
df.head()

Unnamed: 0,apoptosis_prob,control_prob,pyroptosis_prob,label_true,label_pred,data_split,shuffle,class_name,index,Metadata_cell_type,...,Metadata_Treatment,Metadata_Dose,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y,Metadata_Site,labels
0,0.05788595,0.74094,0.201174,1,1,train,False,healthy,6135062.0,PBMC,...,LPS_Nigericin,1.000_µg_per_ml_10.000_µM,1493.544715,2062.132791,1507.0,2074.0,1484.0,2050.0,16.0,pyroptosis
1,0.003964022,0.494929,0.501107,2,2,train,False,pyroptosis,5358238.0,PBMC,...,Thapsigargin,1.000_µM,10.072917,510.194444,20.0,521.0,2.0,501.0,10.0,apoptosis
2,0.1483557,0.811236,0.040408,0,1,train,False,apoptosis,3875296.0,PBMC,...,H2O2,100.000_µM,511.839344,31.760656,522.0,42.0,501.0,23.0,5.0,healthy
3,0.2445394,0.015162,0.740298,0,2,train,False,apoptosis,7361184.0,PBMC,...,LPS_Nigericin,1.000_µg_per_ml_10.000_µM,1525.590476,310.028571,1531.0,317.0,1521.0,304.0,16.0,healthy
4,6.662987e-07,0.99874,0.001259,1,1,train,False,healthy,4631111.0,PBMC,...,H2O2,100.000_nM,989.911932,405.008523,1003.0,416.0,979.0,395.0,12.0,healthy


In [9]:
# add column for if the prediction was correct
df["correct"] = df.apply(lambda x: x["label_true"] == x["label_pred"], axis=1)
# split the data into correct and incorrect
df_correct = df[df["correct"] == True]
df_incorrect = df[df["correct"] == False]
assert len(df_correct) + len(df_incorrect) == len(df)

In [10]:
# split the data into the different classes
pyroptosis_df = df_correct[df_correct["labels"] == "pyroptosis"]
apoptosis_df = df_correct[df_correct["labels"] == "apoptosis"]
control_df = df_correct[df_correct["labels"] == "healthy"]

# split the data classes by shuffled and unshuffled
pyroptosis_shuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == True]
pyroptosis_unshuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == False]
apoptosis_shuffled_df = apoptosis_df[apoptosis_df["shuffle"] == True]
apoptosis_unshuffled_df = apoptosis_df[apoptosis_df["shuffle"] == False]
control_shuffled_df = control_df[control_df["shuffle"] == True]
control_unshuffled_df = control_df[control_df["shuffle"] == False]

# split the shuffled/unshuffled data by the data splits
pyroptosis_shuffled_train_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "train"
]
pyroptosis_shuffled_test_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "testing"
]
pyroptosis_shuffled_validation_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "validation"
]
pyroptosis_shuffled_treatment_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_shuffled_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "holdout"
]

pyroptosis_unshuffled_train_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "train"
]
pyroptosis_unshuffled_test_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "testing"
]
pyroptosis_unshuffled_validation_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "validation"
]
pyroptosis_unshuffled_treatment_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_unshuffled_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "holdout"
]

apoptosis_shuffled_train_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "train"
]
apoptosis_shuffled_test_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "testing"
]
apoptosis_shuffled_validation_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "validation"
]
apoptosis_shuffled_treatment_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_shuffled_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "holdout"
]

apoptosis_unshuffled_train_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "train"
]
apoptosis_unshuffled_test_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "testing"
]
apoptosis_unshuffled_validation_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "validation"
]
apoptosis_unshuffled_treatment_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_unshuffled_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "holdout"
]

control_shuffled_train_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "train"
]
control_shuffled_test_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "testing"
]
control_shuffled_validation_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "validation"
]
control_shuffled_treatment_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "treatment_holdout"
]
control_shuffled_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "holdout"
]

control_unshuffled_train_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "train"
]
control_unshuffled_test_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "testing"
]
control_unshuffled_validation_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "validation"
]
control_unshuffled_treatment_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "treatment_holdout"
]
control_unshuffled_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "holdout"
]

# sort the dataframes by the probability of the correct class
pyroptosis_unshuffled_train_df = pyroptosis_unshuffled_train_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_unshuffled_validation_df = pyroptosis_unshuffled_validation_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_unshuffled_test_df = pyroptosis_unshuffled_test_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_unshuffled_treatment_holdout_df = (
    pyroptosis_unshuffled_treatment_holdout_df.sort_values(
        by=["pyroptosis_prob"], ascending=False
    )
)
pyroptosis_unshuffled_holdout_df = pyroptosis_unshuffled_holdout_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)

pyroptosis_shuffled_train_df = pyroptosis_shuffled_train_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_shuffled_validation_df = pyroptosis_shuffled_validation_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_shuffled_test_df = pyroptosis_shuffled_test_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)
pyroptosis_shuffled_treatment_holdout_df = (
    pyroptosis_shuffled_treatment_holdout_df.sort_values(
        by=["pyroptosis_prob"], ascending=False
    )
)
pyroptosis_shuffled_holdout_df = pyroptosis_shuffled_holdout_df.sort_values(
    by=["pyroptosis_prob"], ascending=False
)

apoptosis_unshuffled_train_df = apoptosis_unshuffled_train_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_unshuffled_validation_df = apoptosis_unshuffled_validation_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_unshuffled_test_df = apoptosis_unshuffled_test_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_unshuffled_holdout_df = apoptosis_unshuffled_holdout_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)

apoptosis_shuffled_train_df = apoptosis_shuffled_train_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_shuffled_validation_df = apoptosis_shuffled_validation_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_shuffled_test_df = apoptosis_shuffled_test_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)
apoptosis_shuffled_holdout_df = apoptosis_shuffled_holdout_df.sort_values(
    by=["apoptosis_prob"], ascending=False
)

control_unshuffled_train_df = control_unshuffled_train_df.sort_values(
    by=["control_prob"], ascending=False
)
control_unshuffled_validation_df = control_unshuffled_validation_df.sort_values(
    by=["control_prob"], ascending=False
)
control_unshuffled_test_df = control_unshuffled_test_df.sort_values(
    by=["control_prob"], ascending=False
)
control_unshuffled_holdout_df = control_unshuffled_holdout_df.sort_values(
    by=["control_prob"], ascending=False
)

control_shuffled_train_df = control_shuffled_train_df.sort_values(
    by=["control_prob"], ascending=False
)
control_shuffled_validation_df = control_shuffled_validation_df.sort_values(
    by=["control_prob"], ascending=False
)
control_shuffled_test_df = control_shuffled_test_df.sort_values(
    by=["control_prob"], ascending=False
)
control_shuffled_holdout_df = control_shuffled_holdout_df.sort_values(
    by=["control_prob"], ascending=False
)


# add each df to a dictionary
dict_of_dfs = {}
dict_of_dfs["pyroptosis_shuffled_train_df"] = pyroptosis_shuffled_train_df
dict_of_dfs["pyroptosis_shuffled_test_df"] = pyroptosis_shuffled_test_df
dict_of_dfs["pyroptosis_shuffled_validation_df"] = pyroptosis_shuffled_validation_df
dict_of_dfs[
    "pyroptosis_shuffled_treatment_holdout_df"
] = pyroptosis_shuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_shuffled_holdout_df"] = pyroptosis_shuffled_holdout_df

dict_of_dfs["pyroptosis_unshuffled_train_df"] = pyroptosis_unshuffled_train_df
dict_of_dfs["pyroptosis_unshuffled_test_df"] = pyroptosis_unshuffled_test_df
dict_of_dfs["pyroptosis_unshuffled_validation_df"] = pyroptosis_unshuffled_validation_df
dict_of_dfs[
    "pyroptosis_unshuffled_treatment_holdout_df"
] = pyroptosis_unshuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_unshuffled_holdout_df"] = pyroptosis_unshuffled_holdout_df

dict_of_dfs["apoptosis_shuffled_train_df"] = apoptosis_shuffled_train_df
dict_of_dfs["apoptosis_shuffled_test_df"] = apoptosis_shuffled_test_df
dict_of_dfs["apoptosis_shuffled_validation_df"] = apoptosis_shuffled_validation_df
dict_of_dfs[
    "apoptosis_shuffled_treatment_holdout_df"
] = apoptosis_shuffled_treatment_holdout_df
dict_of_dfs["apoptosis_shuffled_holdout_df"] = apoptosis_shuffled_holdout_df

dict_of_dfs["apoptosis_unshuffled_train_df"] = apoptosis_unshuffled_train_df
dict_of_dfs["apoptosis_unshuffled_test_df"] = apoptosis_unshuffled_test_df
dict_of_dfs["apoptosis_unshuffled_validation_df"] = apoptosis_unshuffled_validation_df
dict_of_dfs[
    "apoptosis_unshuffled_treatment_holdout_df"
] = apoptosis_unshuffled_treatment_holdout_df
dict_of_dfs["apoptosis_unshuffled_holdout_df"] = apoptosis_unshuffled_holdout_df

dict_of_dfs["control_shuffled_train_df"] = control_shuffled_train_df
dict_of_dfs["control_shuffled_test_df"] = control_shuffled_test_df
dict_of_dfs["control_shuffled_validation_df"] = control_shuffled_validation_df
dict_of_dfs[
    "control_shuffled_treatment_holdout_df"
] = control_shuffled_treatment_holdout_df
dict_of_dfs["control_shuffled_holdout_df"] = control_shuffled_holdout_df

dict_of_dfs["control_unshuffled_train_df"] = control_unshuffled_train_df
dict_of_dfs["control_unshuffled_test_df"] = control_unshuffled_test_df
dict_of_dfs["control_unshuffled_validation_df"] = control_unshuffled_validation_df
dict_of_dfs[
    "control_unshuffled_treatment_holdout_df"
] = control_unshuffled_treatment_holdout_df
dict_of_dfs["control_unshuffled_holdout_df"] = control_unshuffled_holdout_df

# check the length of each df
for key, value in dict_of_dfs.items():
    if not len(dict_of_dfs[key]) == 0:
        pass
    else:
        print(key)

apoptosis_shuffled_test_df
apoptosis_shuffled_validation_df
apoptosis_shuffled_treatment_holdout_df
apoptosis_unshuffled_treatment_holdout_df
control_shuffled_treatment_holdout_df
control_unshuffled_treatment_holdout_df


In [11]:
# define a dictionary for coding the wells and FOVs correctly
well_dict = {
    "A": "01",
    "B": "02",
    "C": "03",
    "D": "04",
    "E": "05",
    "F": "06",
    "G": "07",
    "H": "08",
    "I": "09",
    "J": "10",
    "K": "11",
    "L": "12",
    "M": "13",
    "N": "14",
    "O": "15",
    "P": "16",
}
column_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
    "17": "17",
    "18": "18",
    "19": "19",
    "20": "20",
    "21": "21",
    "22": "22",
    "23": "23",
    "24": "24",
}
fov_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
}

In [12]:
image_basename_1 = "p04-ch1sk1fk1fl1_IC.tiff"
image_basename_2 = "p04-ch2sk1fk1fl1_IC.tiff"
image_basename_3 = "p04-ch3sk1fk1fl1_IC.tiff"
image_basename_4 = "p04-ch4sk1fk1fl1_IC.tiff"
image_basename_5 = "p04-ch5sk1fk1fl1_IC.tiff"

In [13]:
# set constants for the loop
radius = 50
# define the number of cells to select
n = 5

In [14]:
dict_of_subset_dfs = {}
for key in tqdm(dict_of_dfs):
    df = dict_of_dfs[key]
    if len(df) == 0:
        pass
    else:
        # select n random cells from the dataframe
        df = top_n_cell_select(df, n)
        # add the df to the dictionary
        dict_of_subset_dfs[key] = df

100%|██████████| 30/30 [00:00<00:00, 27194.54it/s]


In [15]:
# create a blank df to append the data to
main_df = dict_of_subset_dfs["pyroptosis_shuffled_train_df"]
# drop all rows from the df
main_df = main_df.drop(main_df.index)

In [16]:
# loop through the dictionary of dataframes
for key in tqdm(dict_of_subset_dfs):
    # check if the dataframe is empty
    if len(dict_of_subset_dfs[key]) >= 1:
        # loop through the dataframe
        for cell in range(len(dict_of_subset_dfs[key])):
            # get the first row of the dataframe
            df = dict_of_subset_dfs[key].iloc[cell]
            image_id = df["Metadata_ImageNumber"].astype(int).astype(str)
            fov_id = df["Metadata_Site"].astype(int).astype(str)
            cell_id = df["Metadata_Cells_Number_Object_Number"]
            well_id = df["Metadata_Well"]
            row_id = well_id[0]
            column_id = well_id[1:]
            center_x = df["Metadata_Nuclei_Location_Center_X"].astype(int)
            center_y = df["Metadata_Nuclei_Location_Center_Y"].astype(int)
            # create a custom and contstant bounding box for the images
            # this is made from the extracted center_x and center_y of the cell (nucleus)
            min_x_box = center_x - radius
            max_x_box = center_x + radius
            min_y_box = center_y - radius
            max_y_box = center_y + radius
            print(cell + 1, key, row_id, column_id, fov_id, cell_id, center_x, center_y)

            # create the image paths for each channel of the image
            image_name1 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_1}"
            )
            image_path1 = image_dir_path.joinpath(image_name1)

            image_name2 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_2}"
            )
            image_path2 = image_dir_path.joinpath(image_name2)

            image_name3 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_3}"
            )
            image_path3 = image_dir_path.joinpath(image_name3)

            image_name4 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_4}"
            )
            image_path4 = image_dir_path.joinpath(image_name4)

            image_name5 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_5}"
            )
            image_path5 = image_dir_path.joinpath(image_name5)

            # crop all 5 channels of the image
            im1 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_UNCHANGED)

            im2 = cv2.imread(image_path2.as_posix(), cv2.IMREAD_UNCHANGED)

            im3 = cv2.imread(image_path3.as_posix(), cv2.IMREAD_UNCHANGED)

            im4 = cv2.imread(image_path4.as_posix(), cv2.IMREAD_UNCHANGED)

            im5 = cv2.imread(image_path5.as_posix(), cv2.IMREAD_UNCHANGED)

            # check for non-edge cells

            ### channels ###
            # * Channel 1: DAPI
            # * Channel 2: ER
            # * Channel 3: GasderminD
            # * Channel 4: AGP (Actin, Golgi, and Plasma membrane)
            # * Channel 5: Mitochondria

            # prior to merging adjust the brightness of the image to make it easier to see
            # adjust the brightness of the image to make it easier to see
            alpha = 0.05  # Contrast control (1.0-3.0)
            beta = 0  # Brightness control (0-100)
            im3 = cv2.convertScaleAbs(im3, alpha=alpha, beta=beta)
            im4 = cv2.convertScaleAbs(im4, alpha=alpha, beta=beta)
            # blue channel does not need to be adjusted as it is the DAPI channel and is already bright

            blue_channel_stack = np.stack(im1, axis=-1)
            green_channel_stack = np.stack(im3, axis=-1)
            red_channel_stack = np.stack(im4, axis=-1)

            channel1 = "im1"
            channel2 = "im3"
            channel3 = "im4"

            # Scale the pixel values to fit within the 16-bit range (0-65535)
            blue_channel = (
                blue_channel_stack / np.max(blue_channel_stack) * 65535
            ).astype(np.uint16)
            green_channel = (
                green_channel_stack / np.max(green_channel_stack) * 65535
            ).astype(np.uint16)
            red_channel = (
                red_channel_stack / np.max(red_channel_stack) * 65535
            ).astype(np.uint16)

            # merge the channels together

            composite_image = cv2.merge(
                (red_channel, green_channel, blue_channel)
            ).astype(np.uint16)

            # The images end up being `wonky` so we need to do some post processing prior to saving
            # where wonky means that the image is not oriented correctly
            # the image is rotated 90 degrees clockwise and flipped vertically

            # this will ensure that the images are oriented correctly with X and Y centers prior to cropping
            # transformations of the image to fix the orientation post pixel scaling
            # flip the image vertically
            composite_image = cv2.flip(composite_image, 0)
            # rotate the image 90 degrees clockwise
            composite_image = cv2.rotate(composite_image, cv2.ROTATE_90_CLOCKWISE)

            composite_image_crop = composite_image[
                min_y_box:max_y_box, min_x_box:max_x_box
            ]

            if composite_image_crop.shape[0] == 0 or composite_image_crop.shape[1] == 0:
                print("Cell is on the edge of the image, skipping")
                continue

                # image_out_dir_path updated to include the feature name
            # write images
            tf.imwrite(
                pathlib.Path(
                    f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.tiff"
                ),
                composite_image,
                compression=None,
            )
            tf.imwrite(
                pathlib.Path(
                    f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.tiff"
                ),
                composite_image_crop,
                compression=None,
            )

            composite_image = cv2.cvtColor(composite_image, cv2.COLOR_BGR2RGB)
            composite_image_crop = cv2.cvtColor(composite_image_crop, cv2.COLOR_BGR2RGB)

            print(composite_image.shape)

            # save the image as a png file
            cv2.imwrite(
                f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.png",
                composite_image,
            )
            cv2.imwrite(
                f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.png",
                composite_image_crop,
            )

            df = df.to_frame().T
            df[
                "image_path"
            ] = f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.png"
            df[
                "image_crop_path"
            ] = f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.png"
            main_df = pd.concat([main_df, df], ignore_index=True)

  0%|          | 0/24 [00:00<?, ?it/s]

1 pyroptosis_shuffled_train_df E 09 10 2876.0 1662 1548
(2160, 2160, 3)
2 pyroptosis_shuffled_train_df K 02 15 1246.0 1079 596
(2160, 2160, 3)
3 pyroptosis_shuffled_train_df G 02 15 292.0 50 160
(2160, 2160, 3)
4 pyroptosis_shuffled_train_df C 08 10 3695.0 2026 1939
(2160, 2160, 3)
5 pyroptosis_shuffled_train_df N 02 3 2180.0 1036 1301
(2160, 2160, 3)


  4%|▍         | 1/24 [00:06<02:27,  6.41s/it]

1 pyroptosis_shuffled_test_df M 08 7 2317.0 327 2050
(2160, 2160, 3)
2 pyroptosis_shuffled_test_df D 11 16 2901.0 1959 1554
(2160, 2160, 3)
3 pyroptosis_shuffled_test_df M 08 7 2243.0 283 1979
(2160, 2160, 3)
4 pyroptosis_shuffled_test_df B 11 3 2651.0 1125 1693
(2160, 2160, 3)
5 pyroptosis_shuffled_test_df D 10 16 1848.0 1152 1071
(2160, 2160, 3)


  8%|▊         | 2/24 [00:11<02:06,  5.75s/it]

1 pyroptosis_shuffled_validation_df C 11 16 1892.0 378 1097
(2160, 2160, 3)
2 pyroptosis_shuffled_validation_df D 11 11 1464.0 1833 1551
(2160, 2160, 3)
3 pyroptosis_shuffled_validation_df D 04 15 842.0 969 648
(2160, 2160, 3)
4 pyroptosis_shuffled_validation_df D 04 12 953.0 2094 994
(2160, 2160, 3)
5 pyroptosis_shuffled_validation_df B 11 12 3092.0 1670 1904
(2160, 2160, 3)


 12%|█▎        | 3/24 [00:17<02:00,  5.73s/it]

1 pyroptosis_shuffled_treatment_holdout_df L 03 14 228.0 524 178
(2160, 2160, 3)
2 pyroptosis_shuffled_treatment_holdout_df L 05 5 274.0 764 460
(2160, 2160, 3)
3 pyroptosis_shuffled_treatment_holdout_df D 08 1 847.0 1915 616
(2160, 2160, 3)
4 pyroptosis_shuffled_treatment_holdout_df M 11 5 1003.0 220 1785
(2160, 2160, 3)
5 pyroptosis_shuffled_treatment_holdout_df L 08 13 2146.0 908 1246
(2160, 2160, 3)


 17%|█▋        | 4/24 [00:23<01:55,  5.78s/it]

1 pyroptosis_shuffled_holdout_df N 08 1 286.0 1017 163
(2160, 2160, 3)
2 pyroptosis_shuffled_holdout_df K 08 5 782.0 400 576
(2160, 2160, 3)
3 pyroptosis_shuffled_holdout_df L 02 13 1836.0 412 1113
(2160, 2160, 3)
4 pyroptosis_shuffled_holdout_df L 02 15 1319.0 738 819
(2160, 2160, 3)
5 pyroptosis_shuffled_holdout_df B 10 1 498.0 1329 444
(2160, 2160, 3)


 21%|██        | 5/24 [00:29<01:53,  6.00s/it]

1 pyroptosis_unshuffled_train_df G 09 4 391.0 578 251
(2160, 2160, 3)
2 pyroptosis_unshuffled_train_df C 08 9 469.0 373 307
(2160, 2160, 3)
3 pyroptosis_unshuffled_train_df G 03 14 3275.0 1291 1973
(2160, 2160, 3)
4 pyroptosis_unshuffled_train_df N 09 12 915.0 1067 837
(2160, 2160, 3)
5 pyroptosis_unshuffled_train_df D 10 12 536.0 431 488
(2160, 2160, 3)


 25%|██▌       | 6/24 [00:35<01:45,  5.87s/it]

1 pyroptosis_unshuffled_test_df D 10 3 556.0 391 396
(2160, 2160, 3)
2 pyroptosis_unshuffled_test_df C 05 12 548.0 984 476
(2160, 2160, 3)
3 pyroptosis_unshuffled_test_df D 04 10 94.0 858 69
(2160, 2160, 3)
4 pyroptosis_unshuffled_test_df C 05 7 1293.0 1815 1156
(2160, 2160, 3)
5 pyroptosis_unshuffled_test_df C 05 7 1281.0 880 1146
(2160, 2160, 3)


 29%|██▉       | 7/24 [00:40<01:38,  5.78s/it]

1 pyroptosis_unshuffled_validation_df C 05 3 1071.0 51 651
(2160, 2160, 3)
2 pyroptosis_unshuffled_validation_df C 05 3 1127.0 1749 688
(2160, 2160, 3)
3 pyroptosis_unshuffled_validation_df C 10 1 2640.0 1303 1991
(2160, 2160, 3)
4 pyroptosis_unshuffled_validation_df D 04 11 114.0 1083 147
(2160, 2160, 3)
5 pyroptosis_unshuffled_validation_df D 04 8 2293.0 928 2062
(2160, 2160, 3)


 33%|███▎      | 8/24 [00:46<01:32,  5.79s/it]

1 pyroptosis_unshuffled_treatment_holdout_df L 09 10 770.0 1922 445
(2160, 2160, 3)
2 pyroptosis_unshuffled_treatment_holdout_df L 09 15 1528.0 1955 970
(2160, 2160, 3)
3 pyroptosis_unshuffled_treatment_holdout_df L 09 5 1911.0 1103 1332
(2160, 2160, 3)
4 pyroptosis_unshuffled_treatment_holdout_df L 09 4 1447.0 1302 1006
(2160, 2160, 3)
5 pyroptosis_unshuffled_treatment_holdout_df L 09 5 1093.0 1119 713
(2160, 2160, 3)


 38%|███▊      | 9/24 [00:52<01:27,  5.81s/it]

1 pyroptosis_unshuffled_holdout_df C 04 4 1276.0 317 838
(2160, 2160, 3)
2 pyroptosis_unshuffled_holdout_df C 04 4 2627.0 2140 1844
(2160, 2160, 3)
3 pyroptosis_unshuffled_holdout_df D 05 3 1738.0 582 1160
(2160, 2160, 3)
4 pyroptosis_unshuffled_holdout_df C 04 5 498.0 468 382
(2160, 2160, 3)
5 pyroptosis_unshuffled_holdout_df D 05 16 1551.0 1480 904
(2160, 2160, 3)


 42%|████▏     | 10/24 [00:58<01:21,  5.80s/it]

1 apoptosis_shuffled_train_df E 06 14 3248.0 596 1959
(2160, 2160, 3)
2 apoptosis_shuffled_train_df E 06 2 1257.0 112 912
(2160, 2160, 3)
3 apoptosis_shuffled_train_df E 06 15 103.0 536 65
(2160, 2160, 3)
4 apoptosis_shuffled_train_df L 07 2 166.0 1261 131
(2160, 2160, 3)
5 apoptosis_shuffled_train_df L 06 7 2222.0 1789 1638
(2160, 2160, 3)


 46%|████▌     | 11/24 [01:04<01:16,  5.92s/it]

1 apoptosis_shuffled_holdout_df K 07 13 1871.0 901 877
(2160, 2160, 3)
2 apoptosis_shuffled_holdout_df E 07 10 1208.0 958 821
(2160, 2160, 3)
3 apoptosis_shuffled_holdout_df E 07 16 2171.0 699 1427
(2160, 2160, 3)
4 apoptosis_shuffled_holdout_df E 07 1 2371.0 997 1550
(2160, 2160, 3)
5 apoptosis_shuffled_holdout_df K 07 14 3086.0 68 1675
(2160, 2160, 3)


 50%|█████     | 12/24 [01:10<01:10,  5.84s/it]

1 apoptosis_unshuffled_train_df D 06 9 3181.0 1393 1500
(2160, 2160, 3)
2 apoptosis_unshuffled_train_df E 06 12 504.0 1398 441
(2160, 2160, 3)
3 apoptosis_unshuffled_train_df K 06 1 770.0 1361 401
(2160, 2160, 3)
4 apoptosis_unshuffled_train_df E 06 10 3433.0 1124 2115
(2160, 2160, 3)
5 apoptosis_unshuffled_train_df D 07 14 2512.0 655 1230
(2160, 2160, 3)


 54%|█████▍    | 13/24 [01:15<01:03,  5.81s/it]

1 apoptosis_unshuffled_test_df L 06 5 419.0 1435 488
(2160, 2160, 3)
2 apoptosis_unshuffled_test_df L 06 5 494.0 1656 572
(2160, 2160, 3)
3 apoptosis_unshuffled_test_df L 06 1 1128.0 1790 730
(2160, 2160, 3)
4 apoptosis_unshuffled_test_df L 06 5 300.0 980 326
(2160, 2160, 3)
5 apoptosis_unshuffled_test_df L 06 5 1961.0 1490 1950
(2160, 2160, 3)


 58%|█████▊    | 14/24 [01:20<00:54,  5.49s/it]

1 apoptosis_unshuffled_validation_df L 06 5 506.0 1784 586
(2160, 2160, 3)
2 apoptosis_unshuffled_validation_df L 06 6 134.0 622 130
(2160, 2160, 3)
3 apoptosis_unshuffled_validation_df L 07 6 240.0 862 305
(2160, 2160, 3)
4 apoptosis_unshuffled_validation_df E 06 2 1401.0 1948 1016
(2160, 2160, 3)
5 apoptosis_unshuffled_validation_df L 07 4 1833.0 694 1760
(2160, 2160, 3)


 62%|██████▎   | 15/24 [01:25<00:48,  5.34s/it]

1 apoptosis_unshuffled_holdout_df E 07 7 967.0 133 811
(2160, 2160, 3)
2 apoptosis_unshuffled_holdout_df E 07 8 925.0 354 640
(2160, 2160, 3)
3 apoptosis_unshuffled_holdout_df E 07 8 729.0 1493 500
(2160, 2160, 3)
4 apoptosis_unshuffled_holdout_df E 07 3 1555.0 1529 1075
(2160, 2160, 3)
5 apoptosis_unshuffled_holdout_df E 07 3 1575.0 420 1096
(2160, 2160, 3)


 67%|██████▋   | 16/24 [01:30<00:40,  5.06s/it]

1 control_shuffled_train_df O 03 12 1601.0 1037 1051
(2160, 2160, 3)
2 control_shuffled_train_df H 12 1 2044.0 369 1166
(2160, 2160, 3)
3 control_shuffled_train_df I 06 13 1079.0 1947 591
(2160, 2160, 3)
4 control_shuffled_train_df E 12 13 1815.0 16 1020
Cell is on the edge of the image, skipping
5 control_shuffled_train_df E 04 2 726.0 2028 426
(2160, 2160, 3)


 71%|███████   | 17/24 [01:34<00:34,  4.89s/it]

1 control_shuffled_test_df H 05 5 132.0 1913 78
(2160, 2160, 3)
2 control_shuffled_test_df J 05 14 3700.0 2102 1751
(2160, 2160, 3)
3 control_shuffled_test_df N 06 16 2397.0 17 1139
Cell is on the edge of the image, skipping
4 control_shuffled_test_df I 08 2 3356.0 1516 2029
(2160, 2160, 3)
5 control_shuffled_test_df N 06 1 3198.0 2106 1697
(2160, 2160, 3)


 75%|███████▌  | 18/24 [01:38<00:28,  4.69s/it]

1 control_shuffled_validation_df I 08 11 651.0 1898 1004
(2160, 2160, 3)
2 control_shuffled_validation_df O 10 14 3589.0 2017 1951
(2160, 2160, 3)
3 control_shuffled_validation_df O 07 14 2561.0 943 1783
(2160, 2160, 3)
4 control_shuffled_validation_df G 10 10 402.0 460 195
(2160, 2160, 3)
5 control_shuffled_validation_df K 05 9 384.0 1710 294
(2160, 2160, 3)


 79%|███████▉  | 19/24 [01:43<00:24,  4.84s/it]

1 control_shuffled_holdout_df H 06 5 808.0 1138 443
(2160, 2160, 3)
2 control_shuffled_holdout_df F 11 15 4352.0 2106 2088
(2160, 2160, 3)
3 control_shuffled_holdout_df H 04 13 35.0 1653 24
Cell is on the edge of the image, skipping
4 control_shuffled_holdout_df I 11 14 490.0 1472 248
(2160, 2160, 3)
5 control_shuffled_holdout_df J 12 16 1909.0 1658 848
(2160, 2160, 3)


 83%|████████▎ | 20/24 [01:48<00:18,  4.70s/it]

1 control_unshuffled_train_df F 05 13 2448.0 1155 1164
(2160, 2160, 3)
2 control_unshuffled_train_df I 07 3 2907.0 2074 1425
(2160, 2160, 3)
3 control_unshuffled_train_df I 12 14 3256.0 1749 1628
(2160, 2160, 3)
4 control_unshuffled_train_df I 04 10 1680.0 1986 830
(2160, 2160, 3)
5 control_unshuffled_train_df O 12 10 866.0 1756 364
(2160, 2160, 3)


 88%|████████▊ | 21/24 [01:53<00:14,  4.77s/it]

1 control_unshuffled_test_df I 02 11 1994.0 1478 1433
(2160, 2160, 3)
2 control_unshuffled_test_df M 12 8 1894.0 2024 1163
(2160, 2160, 3)
3 control_unshuffled_test_df I 02 15 2155.0 1966 1282
(2160, 2160, 3)
4 control_unshuffled_test_df I 02 13 244.0 1198 217
(2160, 2160, 3)
5 control_unshuffled_test_df H 12 2 1617.0 1207 1254
(2160, 2160, 3)


 92%|█████████▏| 22/24 [01:58<00:09,  4.95s/it]

1 control_unshuffled_validation_df H 12 2 877.0 337 738
(2160, 2160, 3)
2 control_unshuffled_validation_df I 02 12 2159.0 610 1805
(2160, 2160, 3)
3 control_unshuffled_validation_df H 12 2 2664.0 691 1895
(2160, 2160, 3)
4 control_unshuffled_validation_df I 02 2 2592.0 497 1972
(2160, 2160, 3)
5 control_unshuffled_validation_df I 02 1 3078.0 1852 1812
(2160, 2160, 3)


 96%|█████████▌| 23/24 [02:03<00:04,  4.87s/it]

1 control_unshuffled_holdout_df I 03 11 1991.0 1918 1327
(2160, 2160, 3)
2 control_unshuffled_holdout_df I 03 16 1051.0 395 621
(2160, 2160, 3)
3 control_unshuffled_holdout_df G 05 11 296.0 1353 357
(2160, 2160, 3)
4 control_unshuffled_holdout_df G 05 11 70.0 444 91
(2160, 2160, 3)
5 control_unshuffled_holdout_df C 07 2 4459.0 2098 2093
(2160, 2160, 3)


100%|██████████| 24/24 [02:07<00:00,  5.33s/it]


In [17]:
# define main_df_path
main_df_path = pathlib.Path(f"../results/{CELL_TYPE}/")
# if path does not exist, create it
main_df_path.mkdir(parents=True, exist_ok=True)
# save the dataframe
main_df.to_parquet(f"{main_df_path}/single_cell_predictions.parquet")

In [18]:
main_df.head()

Unnamed: 0,apoptosis_prob,control_prob,pyroptosis_prob,label_true,label_pred,data_split,shuffle,class_name,index,Metadata_cell_type,...,Metadata_Nuclei_Location_Center_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y,Metadata_Site,labels,correct,image_path,image_crop_path
0,0.127945,0.345422,0.526633,2,2,train,True,pyroptosis,2021413.0,PBMC,...,1548.467066,1674.0,1563.0,1654.0,1535.0,10.0,pyroptosis,True,/home/lippincm/Documents/4TB/data/Interstellar...,/home/lippincm/Documents/4TB/data/Interstellar...
1,0.155791,0.333143,0.511066,2,2,train,True,pyroptosis,4926511.0,PBMC,...,596.154122,1090.0,606.0,1071.0,586.0,15.0,pyroptosis,True,/home/lippincm/Documents/4TB/data/Interstellar...,/home/lippincm/Documents/4TB/data/Interstellar...
2,0.12637,0.376186,0.497444,2,2,train,True,pyroptosis,2467561.0,PBMC,...,160.055276,68.0,172.0,42.0,137.0,15.0,pyroptosis,True,/home/lippincm/Documents/4TB/data/Interstellar...,/home/lippincm/Documents/4TB/data/Interstellar...
3,0.118203,0.393621,0.488176,2,2,train,True,pyroptosis,746227.0,PBMC,...,1939.197568,2037.0,1951.0,2016.0,1929.0,10.0,pyroptosis,True,/home/lippincm/Documents/4TB/data/Interstellar...,/home/lippincm/Documents/4TB/data/Interstellar...
4,0.12019,0.398426,0.481384,2,2,train,True,pyroptosis,7184062.0,PBMC,...,1301.399329,1047.0,1311.0,1027.0,1292.0,3.0,pyroptosis,True,/home/lippincm/Documents/4TB/data/Interstellar...,/home/lippincm/Documents/4TB/data/Interstellar...
