This notebook finds random cells from each prediction category and displays them. The purpose is to get representative images examples of each category.

In [1]:
import pathlib

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tifffile as tf  # write tiff files
from PIL import Image  # read tiff files
from tqdm import tqdm  # progress bar

In [2]:
# function that selects a random image from the dataframe


def random_cell_select(
    df: pd.DataFrame,
    n: int = 1,
) -> pd.DataFrame:
    """
    Selects a random cell from the dataframe

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe containing the cell features
    n : int, optional
        Number of random cells to select, by default 1

    Returns
    -------
    pd.DataFrame
        The return dataframe with the random cell selected
    """

    # select a random cell
    random_cell = df.sample(n=n, random_state=0)
    return random_cell

In [3]:
# parameters
CELL_TYPE = "PBMC"

In [4]:
# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")
root_dir

PosixPath('/home/lippincm/Documents/ML/Interstellar_Analysis')

In [5]:
image_out_dir_path = pathlib.Path(f"{root_dir}/8.cytopick_analysis/figures/PBMC/")

In [6]:
# define directories
# where the images are
image_dir_path = pathlib.Path(
    "/media/lippincm/18T/interstellar_data/70117_20230210MM1_Gasdermin514_CP_BC430856__2023-03-22T15_42_38-Measurement1/2.IC/"
).resolve(strict=True)


# if path does not exist, create it
image_out_dir_path.mkdir(parents=True, exist_ok=True)

In [7]:
df_path = pathlib.Path(
    f"../../4.sc_Morphology_Neural_Network_MLP_Model/results/Multi_Class/MultiClass_MLP/{CELL_TYPE}/single_cell_predictions.parquet"
)
# read in the data
df = pd.read_parquet(df_path)
df.head()

Unnamed: 0,true_label,predicted_label,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_Site,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,...,Metadata_Dose,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y,labels,data_split,shuffle
0,1,1,PBMC,I03,46663,5,6,Z-VAD-FMK,100.0,µM,...,10.000_µg_per_ml,95.866071,128.33631,111.0,143.0,87.0,117.0,healthy,train,True
1,1,1,PBMC,D05,46119,5,6,DMSO,0.025,%,...,100.000_µg_per_ml_10.000_µM,244.816143,477.495516,258.0,489.0,233.0,466.0,pyroptosis,train,True
2,1,1,PBMC,G09,43977,9,6,Disulfiram,1.0,µM,...,10.000_µg_per_ml,1728.680628,382.531414,1740.0,394.0,1717.0,365.0,pyroptosis,train,True
3,1,1,PBMC,N07,21434,7,6,DMSO,0.025,%,...,10.000_nM,1363.190736,282.046322,1376.0,292.0,1352.0,272.0,healthy,train,True
4,1,1,PBMC,B06,25314,4,6,DMSO,0.025,%,...,0.100_%,778.51676,1777.108939,801.0,1790.0,754.0,1757.0,healthy,train,True


In [8]:
# add column for if the prediction was correct
df["correct"] = df.apply(lambda x: x["true_label"] == x["predicted_label"], axis=1)
# split the data into correct and incorrect
df_correct = df[df["correct"] == True]
df_incorrect = df[df["correct"] == False]
assert len(df_correct) + len(df_incorrect) == len(df)

In [9]:
# split the data into the different classes
pyroptosis_df = df_correct[df_correct["labels"] == "pyroptosis"]
apoptosis_df = df_correct[df_correct["labels"] == "apoptosis"]
control_df = df_correct[df_correct["labels"] == "healthy"]

# split the data classes by shuffled and unshuffled
pyroptosis_shuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == True]
pyroptosis_unshuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == False]
apoptosis_shuffled_df = apoptosis_df[apoptosis_df["shuffle"] == True]
apoptosis_unshuffled_df = apoptosis_df[apoptosis_df["shuffle"] == False]
control_shuffled_df = control_df[control_df["shuffle"] == True]
control_unshuffled_df = control_df[control_df["shuffle"] == False]

# split the shuffled/unshuffled data by the data splits
pyroptosis_shuffled_train_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "train"
]
pyroptosis_shuffled_test_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "test"
]
pyroptosis_shuffled_validation_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "validation"
]
pyroptosis_shuffled_treatment_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_shuffled_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "holdout"
]

pyroptosis_unshuffled_train_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "train"
]
pyroptosis_unshuffled_test_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "test"
]
pyroptosis_unshuffled_validation_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "validation"
]
pyroptosis_unshuffled_treatment_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_unshuffled_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "holdout"
]

apoptosis_shuffled_train_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "train"
]
apoptosis_shuffled_test_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "test"
]
apoptosis_shuffled_validation_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "validation"
]
apoptosis_shuffled_treatment_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_shuffled_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "holdout"
]

apoptosis_unshuffled_train_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "train"
]
apoptosis_unshuffled_test_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "test"
]
apoptosis_unshuffled_validation_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "validation"
]
apoptosis_unshuffled_treatment_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_unshuffled_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "holdout"
]

control_shuffled_train_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "train"
]
control_shuffled_test_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "test"
]
control_shuffled_validation_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "validation"
]
control_shuffled_treatment_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "treatment_holdout"
]
control_shuffled_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "holdout"
]

control_unshuffled_train_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "train"
]
control_unshuffled_test_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "test"
]
control_unshuffled_validation_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "validation"
]
control_unshuffled_treatment_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "treatment_holdout"
]
control_unshuffled_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "holdout"
]

# add each df to a dictionary
dict_of_dfs = {}
dict_of_dfs["pyroptosis_shuffled_train_df"] = pyroptosis_shuffled_train_df
dict_of_dfs["pyroptosis_shuffled_test_df"] = pyroptosis_shuffled_test_df
dict_of_dfs["pyroptosis_shuffled_validation_df"] = pyroptosis_shuffled_validation_df
dict_of_dfs[
    "pyroptosis_shuffled_treatment_holdout_df"
] = pyroptosis_shuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_shuffled_holdout_df"] = pyroptosis_shuffled_holdout_df

dict_of_dfs["pyroptosis_unshuffled_train_df"] = pyroptosis_unshuffled_train_df
dict_of_dfs["pyroptosis_unshuffled_test_df"] = pyroptosis_unshuffled_test_df
dict_of_dfs["pyroptosis_unshuffled_validation_df"] = pyroptosis_unshuffled_validation_df
dict_of_dfs[
    "pyroptosis_unshuffled_treatment_holdout_df"
] = pyroptosis_unshuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_unshuffled_holdout_df"] = pyroptosis_unshuffled_holdout_df

dict_of_dfs["apoptosis_shuffled_train_df"] = apoptosis_shuffled_train_df
dict_of_dfs["apoptosis_shuffled_test_df"] = apoptosis_shuffled_test_df
dict_of_dfs["apoptosis_shuffled_validation_df"] = apoptosis_shuffled_validation_df
dict_of_dfs[
    "apoptosis_shuffled_treatment_holdout_df"
] = apoptosis_shuffled_treatment_holdout_df
dict_of_dfs["apoptosis_shuffled_holdout_df"] = apoptosis_shuffled_holdout_df

dict_of_dfs["apoptosis_unshuffled_train_df"] = apoptosis_unshuffled_train_df
dict_of_dfs["apoptosis_unshuffled_test_df"] = apoptosis_unshuffled_test_df
dict_of_dfs["apoptosis_unshuffled_validation_df"] = apoptosis_unshuffled_validation_df
dict_of_dfs[
    "apoptosis_unshuffled_treatment_holdout_df"
] = apoptosis_unshuffled_treatment_holdout_df
dict_of_dfs["apoptosis_unshuffled_holdout_df"] = apoptosis_unshuffled_holdout_df

dict_of_dfs["control_shuffled_train_df"] = control_shuffled_train_df
dict_of_dfs["control_shuffled_test_df"] = control_shuffled_test_df
dict_of_dfs["control_shuffled_validation_df"] = control_shuffled_validation_df
dict_of_dfs[
    "control_shuffled_treatment_holdout_df"
] = control_shuffled_treatment_holdout_df
dict_of_dfs["control_shuffled_holdout_df"] = control_shuffled_holdout_df

dict_of_dfs["control_unshuffled_train_df"] = control_unshuffled_train_df
dict_of_dfs["control_unshuffled_test_df"] = control_unshuffled_test_df
dict_of_dfs["control_unshuffled_validation_df"] = control_unshuffled_validation_df
dict_of_dfs[
    "control_unshuffled_treatment_holdout_df"
] = control_unshuffled_treatment_holdout_df
dict_of_dfs["control_unshuffled_holdout_df"] = control_unshuffled_holdout_df

# check the length of each df
for key, value in dict_of_dfs.items():
    if not len(dict_of_dfs[key]) == 0:
        pass
    else:
        print(key)

apoptosis_shuffled_test_df
apoptosis_shuffled_validation_df
apoptosis_shuffled_treatment_holdout_df
apoptosis_unshuffled_treatment_holdout_df
control_shuffled_treatment_holdout_df
control_unshuffled_treatment_holdout_df


In [10]:
# define a dictionary for coding the wells and FOVs correctly
well_dict = {
    "A": "01",
    "B": "02",
    "C": "03",
    "D": "04",
    "E": "05",
    "F": "06",
    "G": "07",
    "H": "08",
    "I": "09",
    "J": "10",
    "K": "11",
    "L": "12",
    "M": "13",
    "N": "14",
    "O": "15",
    "P": "16",
}
column_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
    "17": "17",
    "18": "18",
    "19": "19",
    "20": "20",
    "21": "21",
    "22": "22",
    "23": "23",
    "24": "24",
}
fov_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
}

In [11]:
image_basename_1 = "p04-ch1sk1fk1fl1_IC.tiff"
image_basename_2 = "p04-ch2sk1fk1fl1_IC.tiff"
image_basename_3 = "p04-ch3sk1fk1fl1_IC.tiff"
image_basename_4 = "p04-ch4sk1fk1fl1_IC.tiff"
image_basename_5 = "p04-ch5sk1fk1fl1_IC.tiff"

In [12]:
# set constants for the loop
radius = 50
# define the number of cells to select
n = 5

In [13]:
dict_of_subset_dfs = {}
for key in tqdm(dict_of_dfs):
    df = dict_of_dfs[key]
    if len(df) == 0:
        pass
    else:
        # select n random cells from the dataframe
        df = random_cell_select(df, n)
        # add the df to the dictionary
        dict_of_subset_dfs[key] = df

100%|██████████| 30/30 [00:00<00:00, 415.90it/s]


In [14]:
# create a blank df to append the data to
main_df = dict_of_subset_dfs["pyroptosis_shuffled_train_df"]
# drop all rows from the df
main_df = main_df.drop(main_df.index)

In [15]:
for key in tqdm(dict_of_subset_dfs):
    if len(dict_of_subset_dfs[key]) >= 1:
        # loop through the dataframe
        for cell in range(len(dict_of_subset_dfs[key])):
            # get the first row of the dataframe
            df = dict_of_subset_dfs[key].iloc[cell]
            image_id = df["Metadata_ImageNumber"]
            fov_id = df["Metadata_Site"].astype(str)
            cell_id = df["Metadata_Cells_Number_Object_Number"]
            well_id = df["Metadata_Well"]
            row_id = well_id[0]
            column_id = well_id[1:]
            center_x = df["Metadata_Nuclei_Location_Center_X"].astype(int)
            center_y = df["Metadata_Nuclei_Location_Center_Y"].astype(int)
            # create a custom and contstant bounding box for the images
            # this is made from the extracted center_x and center_y of the cell (nucleus)
            min_x_box = center_x - radius
            max_x_box = center_x + radius
            min_y_box = center_y - radius
            max_y_box = center_y + radius
            print(cell + 1, key, row_id, column_id, fov_id, cell_id, center_x, center_y)

            # create the image paths for each channel of the image
            image_name1 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_1}"
            )
            image_path1 = image_dir_path.joinpath(image_name1)

            image_name2 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_2}"
            )
            image_path2 = image_dir_path.joinpath(image_name2)

            image_name3 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_3}"
            )
            image_path3 = image_dir_path.joinpath(image_name3)

            image_name4 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_4}"
            )
            image_path4 = image_dir_path.joinpath(image_name4)

            image_name5 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_5}"
            )
            image_path5 = image_dir_path.joinpath(image_name5)

            # crop all 5 channels of the image
            im1 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_GRAYSCALE)
            im1_crop = im1[min_y_box:max_y_box, min_x_box:max_x_box]

            im2 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_GRAYSCALE)
            im2_crop = im2[min_y_box:max_y_box, min_x_box:max_x_box]

            im3 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_GRAYSCALE)
            im3_crop = im3[min_y_box:max_y_box, min_x_box:max_x_box]

            im4 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_GRAYSCALE)
            im4_crop = im4[min_y_box:max_y_box, min_x_box:max_x_box]

            im5 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_GRAYSCALE)
            im5_crop = im5[min_y_box:max_y_box, min_x_box:max_x_box]

            # check for non-edge cells
            if im1_crop.shape[0] == 0 or im1_crop.shape[1] == 0:
                print("Cell is on the edge of the image, skipping")
                continue

            ### channels ###
            # * Channel 1: DAPI
            # * Channel 2: ER
            # * Channel 3: GasderminD
            # * Channel 4: AGP (Actin, Golgi, and Plasma membrane)
            # * Channel 5: Mitochondria

            blue_channel_stack = np.stack(im1, axis=-1)
            green_channel_stack = np.stack(im3, axis=-1)
            red_channel_stack = np.stack(im4, axis=-1)

            blue_channel_stack_crop = np.stack(im1_crop, axis=-1)
            green_channel_stack_crop = np.stack(im3_crop, axis=-1)
            red_channel_stack_crop = np.stack(im4_crop, axis=-1)

            channel1 = "im1"
            channel2 = "im3"
            channel3 = "im4"

            # Scale the pixel values to fit within the 16-bit range (0-65535)
            blue_channel = (
                blue_channel_stack / np.max(blue_channel_stack) * 65535
            ).astype(np.uint16)
            green_channel = (
                green_channel_stack / np.max(green_channel_stack) * 65535
            ).astype(np.uint16)
            red_channel = (
                red_channel_stack / np.max(red_channel_stack) * 65535
            ).astype(np.uint16)

            blue_channel_crop = (
                blue_channel_stack_crop / np.max(blue_channel_stack_crop) * 65535
            ).astype(np.uint16)
            green_channel_crop = (
                green_channel_stack_crop / np.max(green_channel_stack_crop) * 65535
            ).astype(np.uint16)
            red_channel_crop = (
                red_channel_stack_crop / np.max(red_channel_stack_crop) * 65535
            ).astype(np.uint16)

            # merge the channels together

            composite_image = cv2.merge(
                (blue_channel, green_channel, red_channel)
            ).astype(np.uint16)
            composite_image = cv2.cvtColor(composite_image, cv2.COLOR_BGR2RGB)

            composite_image_crop = cv2.merge(
                (blue_channel_crop, green_channel_crop, red_channel_crop)
            ).astype(np.uint16)
            composite_image_crop = cv2.cvtColor(composite_image_crop, cv2.COLOR_BGR2RGB)

            # transformations of the image to fix the orientation post pixel scaling
            # flip the image vertically
            composite_image = cv2.flip(composite_image, 0)
            composite_image_crop = cv2.flip(composite_image_crop, 0)
            # rotate the image 90 degrees clockwise
            composite_image = cv2.rotate(composite_image, cv2.ROTATE_90_CLOCKWISE)
            composite_image_crop = cv2.rotate(
                composite_image_crop, cv2.ROTATE_90_CLOCKWISE
            )

            # im_crop = composite_image[min_y_box:max_y_box, min_x_box:max_x_box]

            # image_out_dir_path updated to include the feature name
            # write images
            tf.imwrite(
                pathlib.Path(
                    f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.tiff"
                ),
                composite_image,
                compression=None,
            )
            tf.imwrite(
                pathlib.Path(
                    f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.tiff"
                ),
                composite_image_crop,
                compression=None,
            )
            df = df.to_frame().T
            df[
                "image_path"
            ] = f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.tiff"
            df[
                "image_crop_path"
            ] = f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.tiff"
            main_df = pd.concat([main_df, df], ignore_index=True)

  0%|          | 0/24 [00:00<?, ?it/s]

1 pyroptosis_shuffled_train_df J 08 13 368 50 277


  main_df = pd.concat([main_df, df], ignore_index=True)


2 pyroptosis_shuffled_train_df B 04 6 253 1877 285
3 pyroptosis_shuffled_train_df D 10 15 1912 1762 1183
4 pyroptosis_shuffled_train_df F 02 12 1793 65 1350
5 pyroptosis_shuffled_train_df H 03 2 3000 718 1987


  4%|▍         | 1/24 [00:01<00:24,  1.04s/it]

1 pyroptosis_shuffled_test_df B 11 8 517 1294 374
2 pyroptosis_shuffled_test_df C 05 4 1904 2083 1389
3 pyroptosis_shuffled_test_df D 04 12 1730 738 1686
4 pyroptosis_shuffled_test_df J 09 10 328 1217 204
5 pyroptosis_shuffled_test_df D 05 12 1000 1384 1032


  8%|▊         | 2/24 [00:02<00:23,  1.06s/it]

1 pyroptosis_shuffled_validation_df B 02 12 1294 230 1137
2 pyroptosis_shuffled_validation_df K 08 15 114 641 137
3 pyroptosis_shuffled_validation_df B 11 3 1212 390 776
4 pyroptosis_shuffled_validation_df G 09 5 1693 807 1669
5 pyroptosis_shuffled_validation_df B 03 4 1450 486 869


 12%|█▎        | 3/24 [00:03<00:23,  1.10s/it]

1 pyroptosis_shuffled_treatment_holdout_df N 11 12 2538 1001 2032
2 pyroptosis_shuffled_treatment_holdout_df D 03 10 59 1866 52
3 pyroptosis_shuffled_treatment_holdout_df L 08 7 1002 816 1112
4 pyroptosis_shuffled_treatment_holdout_df L 08 6 1283 25 971
Cell is on the edge of the image, skipping
5 pyroptosis_shuffled_treatment_holdout_df L 09 15 257 1765 176


 17%|█▋        | 4/24 [00:04<00:20,  1.05s/it]

1 pyroptosis_shuffled_holdout_df D 11 5 117 556 91
2 pyroptosis_shuffled_holdout_df K 09 9 1406 1683 889
3 pyroptosis_shuffled_holdout_df E 03 1 3369 1919 2001
4 pyroptosis_shuffled_holdout_df B 05 1 344 2082 288
5 pyroptosis_shuffled_holdout_df L 03 13 3561 649 2080


 21%|██        | 5/24 [00:05<00:22,  1.17s/it]

1 pyroptosis_unshuffled_train_df K 03 8 3105 826 2013
2 pyroptosis_unshuffled_train_df N 08 10 2346 995 1366
3 pyroptosis_unshuffled_train_df D 10 7 340 295 340
4 pyroptosis_unshuffled_train_df G 09 13 2033 1470 1147
5 pyroptosis_unshuffled_train_df M 03 7 731 132 678


 25%|██▌       | 6/24 [00:06<00:22,  1.23s/it]

1 pyroptosis_unshuffled_test_df K 03 12 739 1885 637
2 pyroptosis_unshuffled_test_df E 08 11 267 1558 160
3 pyroptosis_unshuffled_test_df E 08 4 2883 1928 1816
4 pyroptosis_unshuffled_test_df M 09 8 1199 1811 791
5 pyroptosis_unshuffled_test_df D 10 7 1282 489 1405


 29%|██▉       | 7/24 [00:10<00:32,  1.89s/it]

1 pyroptosis_unshuffled_validation_df B 10 6 1586 1941 1335
2 pyroptosis_unshuffled_validation_df N 08 2 367 579 261
3 pyroptosis_unshuffled_validation_df B 11 16 1856 324 1040
4 pyroptosis_unshuffled_validation_df M 08 15 1694 797 914
5 pyroptosis_unshuffled_validation_df C 10 15 861 2069 431


 33%|███▎      | 8/24 [00:11<00:25,  1.62s/it]

1 pyroptosis_unshuffled_treatment_holdout_df D 03 4 917 1502 850
2 pyroptosis_unshuffled_treatment_holdout_df D 08 2 2901 1532 1869
3 pyroptosis_unshuffled_treatment_holdout_df L 02 4 1573 1050 955
4 pyroptosis_unshuffled_treatment_holdout_df N 11 15 2035 1289 1177
5 pyroptosis_unshuffled_treatment_holdout_df M 10 15 2206 625 1130


 38%|███▊      | 9/24 [00:12<00:24,  1.65s/it]

1 pyroptosis_unshuffled_holdout_df F 03 4 1065 957 665
2 pyroptosis_unshuffled_holdout_df C 04 13 1738 179 1050
3 pyroptosis_unshuffled_holdout_df K 09 6 654 296 865
4 pyroptosis_unshuffled_holdout_df C 04 1 2479 70 1554
5 pyroptosis_unshuffled_holdout_df K 09 15 1641 1553 896


 42%|████▏     | 10/24 [00:14<00:20,  1.47s/it]

1 apoptosis_shuffled_train_df K 07 8 3991 65 2035
2 apoptosis_shuffled_train_df K 06 11 2685 1345 1676
3 apoptosis_shuffled_train_df D 07 2 3311 663 1595
4 apoptosis_shuffled_train_df K 06 10 1616 1872 838
5 apoptosis_shuffled_train_df D 07 6 754 1912 876


 46%|████▌     | 11/24 [00:15<00:18,  1.39s/it]

1 apoptosis_shuffled_holdout_df D 06 3 1199 1920 647
2 apoptosis_shuffled_holdout_df D 06 6 1968 237 1404
3 apoptosis_shuffled_holdout_df E 06 16 3279 857 1882
4 apoptosis_shuffled_holdout_df D 06 5 923 899 485
5 apoptosis_shuffled_holdout_df E 06 16 1863 684 1070


 50%|█████     | 12/24 [00:17<00:18,  1.54s/it]

1 apoptosis_unshuffled_train_df K 07 2 2898 888 1506
2 apoptosis_unshuffled_train_df L 07 16 1425 285 996
3 apoptosis_unshuffled_train_df E 07 11 942 721 621
4 apoptosis_unshuffled_train_df L 06 9 793 2064 529
5 apoptosis_unshuffled_train_df K 06 8 589 835 318


 54%|█████▍    | 13/24 [00:19<00:18,  1.67s/it]

1 apoptosis_unshuffled_test_df E 07 15 2095 761 1229
2 apoptosis_unshuffled_test_df L 07 4 887 568 908
3 apoptosis_unshuffled_test_df E 07 1 2453 624 1608
4 apoptosis_unshuffled_test_df L 06 3 1251 1170 790
5 apoptosis_unshuffled_test_df L 06 10 250 624 144


 58%|█████▊    | 14/24 [00:22<00:21,  2.16s/it]

1 apoptosis_unshuffled_validation_df K 06 9 562 737 301
2 apoptosis_unshuffled_validation_df L 06 10 167 413 99
3 apoptosis_unshuffled_validation_df E 07 14 1010 1692 641
4 apoptosis_unshuffled_validation_df K 06 4 643 770 424
5 apoptosis_unshuffled_validation_df L 06 7 2886 1725 2064


 62%|██████▎   | 15/24 [00:25<00:21,  2.42s/it]

1 apoptosis_unshuffled_holdout_df E 06 16 439 297 246
2 apoptosis_unshuffled_holdout_df E 06 10 2095 1487 1317
3 apoptosis_unshuffled_holdout_df E 06 13 3243 893 1933
4 apoptosis_unshuffled_holdout_df E 06 5 3201 2057 2001
5 apoptosis_unshuffled_holdout_df E 06 3 148 2110 117


 67%|██████▋   | 16/24 [00:32<00:29,  3.74s/it]

1 control_shuffled_train_df N 06 7 137 631 110
2 control_shuffled_train_df G 04 6 3097 1246 1592
3 control_shuffled_train_df O 06 6 506 905 338
4 control_shuffled_train_df C 07 11 1890 2128 1923
5 control_shuffled_train_df E 05 3 1643 1779 947


 71%|███████   | 17/24 [00:33<00:20,  2.96s/it]

1 control_shuffled_test_df B 06 5 1062 1445 792
2 control_shuffled_test_df B 07 2 1432 943 739
3 control_shuffled_test_df I 04 7 1610 1878 1019
4 control_shuffled_test_df J 07 6 2180 1323 1572
5 control_shuffled_test_df O 04 4 3022 652 1920


 75%|███████▌  | 18/24 [00:35<00:15,  2.59s/it]

1 control_shuffled_validation_df C 12 7 2338 915 1811
2 control_shuffled_validation_df F 05 16 1796 66 848
3 control_shuffled_validation_df F 05 4 3039 1104 1759
4 control_shuffled_validation_df I 02 16 3224 1887 1816
5 control_shuffled_validation_df M 12 13 2124 1967 1169


 79%|███████▉  | 19/24 [00:36<00:10,  2.15s/it]

1 control_shuffled_holdout_df N 12 11 3329 847 1926
2 control_shuffled_holdout_df M 05 13 328 1433 186
3 control_shuffled_holdout_df O 12 11 1226 1197 995
4 control_shuffled_holdout_df G 10 11 1718 1658 1401
5 control_shuffled_holdout_df G 10 2 997 1476 633


 83%|████████▎ | 20/24 [00:37<00:07,  1.85s/it]

1 control_unshuffled_train_df J 05 7 1645 635 991
2 control_unshuffled_train_df B 07 6 3675 1514 2143
3 control_unshuffled_train_df E 04 3 2848 448 1771
4 control_unshuffled_train_df B 12 10 657 1067 322
5 control_unshuffled_train_df C 06 12 1750 517 1229


 88%|████████▊ | 21/24 [00:42<00:08,  2.95s/it]

1 control_unshuffled_test_df M 07 3 2234 34 1112
Cell is on the edge of the image, skipping
2 control_unshuffled_test_df J 07 6 2143 1632 1548
3 control_unshuffled_test_df K 12 5 572 1674 574
4 control_unshuffled_test_df H 12 8 257 1247 188
5 control_unshuffled_test_df K 05 12 686 1187 536


 92%|█████████▏| 22/24 [00:45<00:05,  2.79s/it]

1 control_unshuffled_validation_df G 11 2 593 1904 381
2 control_unshuffled_validation_df E 04 3 2235 1506 1400
3 control_unshuffled_validation_df O 04 12 1327 1661 1035
4 control_unshuffled_validation_df K 05 3 2016 2073 1132
5 control_unshuffled_validation_df F 04 8 326 147 417


 96%|█████████▌| 23/24 [00:51<00:03,  3.79s/it]

1 control_unshuffled_holdout_df J 04 12 2692 500 1930
2 control_unshuffled_holdout_df O 11 12 1851 1306 1859
3 control_unshuffled_holdout_df I 10 4 2521 1925 1408
4 control_unshuffled_holdout_df K 11 9 2968 1799 1738
5 control_unshuffled_holdout_df N 05 8 2330 1628 1421


100%|██████████| 24/24 [00:54<00:00,  2.29s/it]


In [16]:
# define main_df_path
main_df_path = pathlib.Path(f"../results/{CELL_TYPE}/")
# if path does not exist, create it
main_df_path.mkdir(parents=True, exist_ok=True)
# save the dataframe
main_df.to_parquet(f"{main_df_path}/single_cell_predictions.parquet")

In [17]:
main_df

Unnamed: 0,true_label,predicted_label,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_Site,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,...,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y,labels,data_split,shuffle,correct,image_path,image_crop_path
0,1,1,PBMC,J08,37243,13,6,DMSO,0.025,%,...,61.0,288.0,41.0,267.0,pyroptosis,train,True,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
1,1,1,PBMC,B04,41185,6,6,DMSO,0.025,%,...,1892.0,299.0,1866.0,273.0,pyroptosis,train,True,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
2,1,1,PBMC,D10,45392,15,6,DMSO,0.025,%,...,1778.0,1198.0,1754.0,1167.0,pyroptosis,train,True,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
3,1,1,PBMC,F02,42347,12,6,Disulfiram,0.100,µM,...,76.0,1364.0,55.0,1340.0,pyroptosis,train,True,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
4,1,1,PBMC,H03,35358,2,6,Disulfiram,2.500,µM,...,728.0,2000.0,709.0,1977.0,pyroptosis,train,True,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,1,1,PBMC,J04,34947,12,6,Disulfiram,1.000,µM,...,513.0,1942.0,487.0,1919.0,healthy,holdout,False,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
114,1,1,PBMC,O11,26527,12,6,Media,,,...,1349.0,1889.0,1286.0,1844.0,healthy,holdout,False,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
115,1,1,PBMC,I10,28584,4,6,DMSO,0.025,%,...,1945.0,1436.0,1909.0,1399.0,healthy,holdout,False,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
116,1,1,PBMC,K11,43054,9,6,Z-VAD-FMK,100.000,µM,...,1813.0,1751.0,1788.0,1727.0,healthy,holdout,False,True,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
