This notebook finds random cells from each prediction category and displays them. The purpose is to get representative images examples of each category.

In [1]:
import pathlib

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# import pillow and open cv
import PIL
import seaborn as sns
import tifffile as tf
from cytocherrypick import cherrypick
from PIL import Image, ImageEnhance
from tqdm import tqdm

In [2]:
# function that selects a random image from the dataframe
def random_cell_select(
    df: pd.DataFrame,
    n: int = 1,
) -> pd.DataFrame:
    """
    Selects a random cell from the dataframe

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe containing the cell features
    n : int, optional
        Number of random cells to select, by default 1

    Returns
    -------
    pd.DataFrame
        The return dataframe with the random cell selected
    """

    # select a random cell
    random_cell = df.sample(n=n, random_state=0)
    return random_cell

In [3]:
# parameters
CELL_TYPE = "PBMC"
feature = "Nuclei_Texture_SumVariance_CorrGasdermin_3_01_256"

In [4]:
# define directories
# where the images are
image_dir_path = pathlib.Path(
    "/media/lippincm/18T/interstellar_data/70117_20230210MM1_Gasdermin514_CP_BC430856__2023-03-22T15_42_38-Measurement1/2.IC/"
)
# if path does not exist, create it
image_dir_path.mkdir(parents=True, exist_ok=True)

image_out_dir_path = pathlib.Path("../figures/")
# if path does not exist, create it
image_out_dir_path.mkdir(parents=True, exist_ok=True)

In [5]:
df_path = pathlib.Path(
    f"../../4.sc_Morphology_Neural_Network_MLP_Model/results/Multi_Class/MultiClass_MLP/{CELL_TYPE}/single_cell_predictions.parquet"
)
# read in the data
df = pd.read_parquet(df_path)

# df_no_fs_path = pathlib.Path(f"../../data/{cell_type}_sc.parquet")
# # read in the data
# df_no_fs = pd.read_parquet(df_no_fs_path)

df.head()

Unnamed: 0,true_label,predicted_label,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_Site,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,...,Metadata_Dose,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X,Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y,labels,data_split,shuffle
0,1,1,PBMC,I03,46663,5,6,Z-VAD-FMK,100.0,µM,...,10.000_µg_per_ml,95.866071,128.33631,111.0,143.0,87.0,117.0,healthy,train,True
1,1,1,PBMC,D05,46119,5,6,DMSO,0.025,%,...,100.000_µg_per_ml_10.000_µM,244.816143,477.495516,258.0,489.0,233.0,466.0,pyroptosis,train,True
2,1,1,PBMC,G09,43977,9,6,Disulfiram,1.0,µM,...,10.000_µg_per_ml,1728.680628,382.531414,1740.0,394.0,1717.0,365.0,pyroptosis,train,True
3,1,1,PBMC,N07,21434,7,6,DMSO,0.025,%,...,10.000_nM,1363.190736,282.046322,1376.0,292.0,1352.0,272.0,healthy,train,True
4,2,1,PBMC,B06,25314,4,6,DMSO,0.025,%,...,0.100_%,778.51676,1777.108939,801.0,1790.0,754.0,1757.0,healthy,train,True


In [6]:
# add column for if the prediction was correct
df["correct"] = df.apply(lambda x: x["true_label"] == x["predicted_label"], axis=1)
# split the data into correct and incorrect
df_correct = df[df["correct"] == True]
df_incorrect = df[df["correct"] == False]
assert len(df_correct) + len(df_incorrect) == len(df)

In [7]:
# split the data into the different classes
pyroptosis_df = df_correct[df_correct["labels"] == "pyroptosis"]
apoptosis_df = df_correct[df_correct["labels"] == "apoptosis"]
control_df = df_correct[df_correct["labels"] == "healthy"]

# split the data classes by shuffled and unshuffled
pyroptosis_shuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == True]
pyroptosis_unshuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == False]
apoptosis_shuffled_df = apoptosis_df[apoptosis_df["shuffle"] == True]
apoptosis_unshuffled_df = apoptosis_df[apoptosis_df["shuffle"] == False]
control_shuffled_df = control_df[control_df["shuffle"] == True]
control_unshuffled_df = control_df[control_df["shuffle"] == False]

# split the shuffled/unshuffled data by the data splits
pyroptosis_shuffled_train_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "train"
]
pyroptosis_shuffled_test_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "test"
]
pyroptosis_shuffled_validation_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "validation"
]
pyroptosis_shuffled_treatment_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_shuffled_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "holdout"
]

pyroptosis_unshuffled_train_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "train"
]
pyroptosis_unshuffled_test_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "test"
]
pyroptosis_unshuffled_validation_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "validation"
]
pyroptosis_unshuffled_treatment_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_unshuffled_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "holdout"
]

apoptosis_shuffled_train_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "train"
]
apoptosis_shuffled_test_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "test"
]
apoptosis_shuffled_validation_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "validation"
]
apoptosis_shuffled_treatment_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_shuffled_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "holdout"
]

apoptosis_unshuffled_train_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "train"
]
apoptosis_unshuffled_test_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "test"
]
apoptosis_unshuffled_validation_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "validation"
]
apoptosis_unshuffled_treatment_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_unshuffled_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "holdout"
]

control_shuffled_train_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "train"
]
control_shuffled_test_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "test"
]
control_shuffled_validation_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "validation"
]
control_shuffled_treatment_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "treatment_holdout"
]
control_shuffled_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "holdout"
]

control_unshuffled_train_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "train"
]
control_unshuffled_test_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "test"
]
control_unshuffled_validation_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "validation"
]
control_unshuffled_treatment_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "treatment_holdout"
]
control_unshuffled_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "holdout"
]

# add each df to a dictionary
dict_of_dfs = {}
dict_of_dfs["pyroptosis_shuffled_train_df"] = pyroptosis_shuffled_train_df
dict_of_dfs["pyroptosis_shuffled_test_df"] = pyroptosis_shuffled_test_df
dict_of_dfs["pyroptosis_shuffled_validation_df"] = pyroptosis_shuffled_validation_df
dict_of_dfs[
    "pyroptosis_shuffled_treatment_holdout_df"
] = pyroptosis_shuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_shuffled_holdout_df"] = pyroptosis_shuffled_holdout_df

dict_of_dfs["pyroptosis_unshuffled_train_df"] = pyroptosis_unshuffled_train_df
dict_of_dfs["pyroptosis_unshuffled_test_df"] = pyroptosis_unshuffled_test_df
dict_of_dfs["pyroptosis_unshuffled_validation_df"] = pyroptosis_unshuffled_validation_df
dict_of_dfs[
    "pyroptosis_unshuffled_treatment_holdout_df"
] = pyroptosis_unshuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_unshuffled_holdout_df"] = pyroptosis_unshuffled_holdout_df

dict_of_dfs["apoptosis_shuffled_train_df"] = apoptosis_shuffled_train_df
dict_of_dfs["apoptosis_shuffled_test_df"] = apoptosis_shuffled_test_df
dict_of_dfs["apoptosis_shuffled_validation_df"] = apoptosis_shuffled_validation_df
dict_of_dfs[
    "apoptosis_shuffled_treatment_holdout_df"
] = apoptosis_shuffled_treatment_holdout_df
dict_of_dfs["apoptosis_shuffled_holdout_df"] = apoptosis_shuffled_holdout_df

dict_of_dfs["apoptosis_unshuffled_train_df"] = apoptosis_unshuffled_train_df
dict_of_dfs["apoptosis_unshuffled_test_df"] = apoptosis_unshuffled_test_df
dict_of_dfs["apoptosis_unshuffled_validation_df"] = apoptosis_unshuffled_validation_df
dict_of_dfs[
    "apoptosis_unshuffled_treatment_holdout_df"
] = apoptosis_unshuffled_treatment_holdout_df
dict_of_dfs["apoptosis_unshuffled_holdout_df"] = apoptosis_unshuffled_holdout_df

dict_of_dfs["control_shuffled_train_df"] = control_shuffled_train_df
dict_of_dfs["control_shuffled_test_df"] = control_shuffled_test_df
dict_of_dfs["control_shuffled_validation_df"] = control_shuffled_validation_df
dict_of_dfs[
    "control_shuffled_treatment_holdout_df"
] = control_shuffled_treatment_holdout_df
dict_of_dfs["control_shuffled_holdout_df"] = control_shuffled_holdout_df

dict_of_dfs["control_unshuffled_train_df"] = control_unshuffled_train_df
dict_of_dfs["control_unshuffled_test_df"] = control_unshuffled_test_df
dict_of_dfs["control_unshuffled_validation_df"] = control_unshuffled_validation_df
dict_of_dfs[
    "control_unshuffled_treatment_holdout_df"
] = control_unshuffled_treatment_holdout_df
dict_of_dfs["control_unshuffled_holdout_df"] = control_unshuffled_holdout_df

# check the length of each df
for key, value in dict_of_dfs.items():
    if not len(dict_of_dfs[key]) == 0:
        pass
    else:
        print(key)

apoptosis_shuffled_test_df
apoptosis_shuffled_validation_df
apoptosis_shuffled_treatment_holdout_df
apoptosis_unshuffled_treatment_holdout_df
control_shuffled_treatment_holdout_df
control_unshuffled_treatment_holdout_df


In [8]:
# define a dictionary for coding the wells and FOVs correctly
well_dict = {
    "A": "01",
    "B": "02",
    "C": "03",
    "D": "04",
    "E": "05",
    "F": "06",
    "G": "07",
    "H": "08",
    "I": "09",
    "J": "10",
    "K": "11",
    "L": "12",
    "M": "13",
    "N": "14",
    "O": "15",
    "P": "16",
}
column_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
    "17": "17",
    "18": "18",
    "19": "19",
    "20": "20",
    "21": "21",
    "22": "22",
    "23": "23",
    "24": "24",
}
fov_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
}

In [9]:
image_basename_1 = "p04-ch1sk1fk1fl1_IC.tiff"
image_basename_2 = "p04-ch2sk1fk1fl1_IC.tiff"
image_basename_3 = "p04-ch3sk1fk1fl1_IC.tiff"
image_basename_4 = "p04-ch4sk1fk1fl1_IC.tiff"
image_basename_5 = "p04-ch5sk1fk1fl1_IC.tiff"

In [10]:
image_out_dir_path = pathlib.Path(f"../figures/{CELL_TYPE}/")
# if path does not exist, create it
image_out_dir_path.mkdir(parents=True, exist_ok=True)

In [11]:
# set constants for the loop
radius = 50
# define the number of cells to select
n = 5

In [12]:
dict_of_subset_dfs = {}
for key in tqdm(dict_of_dfs):
    df = dict_of_dfs[key]
    if len(df) == 0:
        pass
    else:
        # select n random cells from the dataframe
        df = random_cell_select(df, n)
        # add the df to the dictionary
        dict_of_subset_dfs[key] = df

100%|██████████| 30/30 [00:00<00:00, 421.24it/s]


In [13]:
# create a blank df to append the data to
main_df = dict_of_subset_dfs["pyroptosis_shuffled_train_df"]
# drop all rows from the df
main_df = main_df.drop(main_df.index)

In [14]:
for key in tqdm(dict_of_subset_dfs):
    if len(dict_of_subset_dfs[key]) >= 1:
        # loop through the dataframe
        for cell in range(len(dict_of_subset_dfs[key])):
            # get the first row of the dataframe
            df = dict_of_subset_dfs[key].iloc[cell]
            image_id = df["Metadata_ImageNumber"]
            fov_id = df["Metadata_Site"].astype(str)
            cell_id = df["Metadata_Cells_Number_Object_Number"]
            well_id = df["Metadata_Well"]
            row_id = well_id[0]
            column_id = well_id[1:]
            center_x = df["Metadata_Nuclei_Location_Center_X"].astype(int)
            center_y = df["Metadata_Nuclei_Location_Center_Y"].astype(int)
            # create a custom and contstant bounding box for the images
            # this is made from the extracted center_x and center_y of the cell (nucleus)
            min_x_box = center_x - radius
            max_x_box = center_x + radius
            min_y_box = center_y - radius
            max_y_box = center_y + radius
            print(cell + 1, key, row_id, column_id, fov_id, cell_id, center_x, center_y)

            # create the image paths for each channel of the image
            image_name1 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_1}"
            )
            image_path1 = image_dir_path.joinpath(image_name1)

            image_name2 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_2}"
            )
            image_path2 = image_dir_path.joinpath(image_name2)

            image_name3 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_3}"
            )
            image_path3 = image_dir_path.joinpath(image_name3)

            image_name4 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_4}"
            )
            image_path4 = image_dir_path.joinpath(image_name4)

            image_name5 = (
                f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_5}"
            )
            image_path5 = image_dir_path.joinpath(image_name5)

            # crop all 5 channels of the image
            im1 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_GRAYSCALE)
            # im_crop1 = im1[min_y_box:max_y_box, min_x_box:max_x_box]

            im2 = cv2.imread(image_path2.as_posix(), cv2.IMREAD_GRAYSCALE)
            # im_crop2 = im2[min_y_box:max_y_box, min_x_box:max_x_box]

            im3 = cv2.imread(image_path3.as_posix(), cv2.IMREAD_GRAYSCALE)
            # im_crop3 = im3[min_y_box:max_y_box, min_x_box:max_x_box]

            im4 = cv2.imread(image_path4.as_posix(), cv2.IMREAD_GRAYSCALE)
            # im_crop4 = im4[min_y_box:max_y_box, min_x_box:max_x_box]

            im5 = cv2.imread(image_path5.as_posix(), cv2.IMREAD_GRAYSCALE)
            # im_crop5 = im5[min_y_box:max_y_box, min_x_box:max_x_box]

            ### channels ###
            # * Channel 1: DAPI
            # * Channel 2: ER
            # * Channel 3: GasderminD
            # * Channel 4: AGP (Actin, Golgi, and Plasma membrane)
            # * Channel 5: Mitochondria

            blue_channel_stack = np.stack(im1, axis=-1)
            green_channel_stack = np.stack(im3, axis=-1)
            red_channel_stack = np.stack(im4, axis=-1)

            channel1 = "im1"
            channel2 = "im3"
            channel3 = "im4"

            # Scale the pixel values to fit within the 16-bit range (0-65535)
            blue_channel = (
                blue_channel_stack / np.max(blue_channel_stack) * 65535
            ).astype(np.uint16)
            green_channel = (
                green_channel_stack / np.max(green_channel_stack) * 65535
            ).astype(np.uint16)
            red_channel = (
                red_channel_stack / np.max(red_channel_stack) * 65535
            ).astype(np.uint16)
            composite_image = cv2.merge(
                (blue_channel, green_channel, red_channel)
            ).astype(np.uint16)
            composite_image = cv2.cvtColor(composite_image, cv2.COLOR_BGR2RGB)

            # transformations of the image to fix the orientation post pixel scaling
            # flip the image vertically
            composite_image = cv2.flip(composite_image, 0)
            # rotate the image 90 degrees clockwise
            composite_image = cv2.rotate(composite_image, cv2.ROTATE_90_CLOCKWISE)
            im_crop = composite_image[min_y_box:max_y_box, min_x_box:max_x_box]

            # image_out_dir_path updated to include the feature name
            # write images
            tf.imwrite(
                pathlib.Path(
                    f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_cell_{cell}.tiff"
                ),
                composite_image,
                compression=None,
            )
            tf.imwrite(
                pathlib.Path(
                    f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.tiff"
                ),
                im_crop,
                compression=None,
            )
            df = df.to_frame().T
            df[
                "image_path"
            ] = f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.tiff"
            df[
                "image_crop_path"
            ] = f"{image_out_dir_path}/{key}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{cell}.tiff"
            main_df = pd.concat([main_df, df], ignore_index=True)

  0%|          | 0/24 [00:00<?, ?it/s]

1 pyroptosis_shuffled_train_df K 03 4 338 2045 264


  main_df = pd.concat([main_df, df], ignore_index=True)


2 pyroptosis_shuffled_train_df F 09 2 1283 1738 754
3 pyroptosis_shuffled_train_df K 03 14 2799 1768 1616
4 pyroptosis_shuffled_train_df C 05 11 997 1653 782
5 pyroptosis_shuffled_train_df C 02 8 1590 1716 1506


  4%|▍         | 1/24 [00:01<00:23,  1.02s/it]

1 pyroptosis_shuffled_test_df E 09 3 2733 1058 1658
2 pyroptosis_shuffled_test_df C 11 9 1514 1685 959
3 pyroptosis_shuffled_test_df D 10 5 1049 1090 933
4 pyroptosis_shuffled_test_df J 09 15 2459 1657 1335


  8%|▊         | 2/24 [00:02<00:22,  1.03s/it]

5 pyroptosis_shuffled_test_df B 11 7 675 1041 701
1 pyroptosis_shuffled_validation_df K 03 8 993 76 677
2 pyroptosis_shuffled_validation_df G 09 9 2610 1135 1525
3 pyroptosis_shuffled_validation_df D 10 1 179 465 192
4 pyroptosis_shuffled_validation_df K 02 13 373 52 382
5 pyroptosis_shuffled_validation_df D 05 16 3157 1239 1764


 12%|█▎        | 3/24 [00:03<00:23,  1.11s/it]

1 pyroptosis_shuffled_treatment_holdout_df L 09 15 269 1790 185
2 pyroptosis_shuffled_treatment_holdout_df L 09 5 2735 756 2061
3 pyroptosis_shuffled_treatment_holdout_df L 09 3 2275 1717 1302
4 pyroptosis_shuffled_treatment_holdout_df L 09 13 1411 965 1095
5 pyroptosis_shuffled_treatment_holdout_df L 10 2 1268 1947 733


 17%|█▋        | 4/24 [00:04<00:23,  1.17s/it]

1 pyroptosis_shuffled_holdout_df B 05 4 1012 1292 591
2 pyroptosis_shuffled_holdout_df B 05 1 961 276 894
3 pyroptosis_shuffled_holdout_df H 02 16 743 1546 416
4 pyroptosis_shuffled_holdout_df L 03 12 2063 2142 1547
5 pyroptosis_shuffled_holdout_df L 03 12 1624 739 1245


 21%|██        | 5/24 [00:05<00:22,  1.16s/it]

1 pyroptosis_unshuffled_train_df C 10 15 200 586 106
2 pyroptosis_unshuffled_train_df H 08 10 715 2068 450
3 pyroptosis_unshuffled_train_df D 04 6 23 1354 38




4 pyroptosis_unshuffled_train_df F 08 2 2995 1598 1743
5 pyroptosis_unshuffled_train_df C 10 13 1634 408 1007


 25%|██▌       | 6/24 [00:06<00:21,  1.18s/it]

1 pyroptosis_unshuffled_test_df G 02 14 2645 1948 1567
2 pyroptosis_unshuffled_test_df K 03 9 2472 1751 1486
3 pyroptosis_unshuffled_test_df B 11 6 2025 447 1385
4 pyroptosis_unshuffled_test_df J 09 10 2666 657 1705
5 pyroptosis_unshuffled_test_df D 05 7 733 2084 930


 29%|██▉       | 7/24 [00:07<00:19,  1.14s/it]

1 pyroptosis_unshuffled_validation_df C 02 13 3370 1269 1875
2 pyroptosis_unshuffled_validation_df E 09 3 2137 449 1321
3 pyroptosis_unshuffled_validation_df K 03 13 215 809 137
4 pyroptosis_unshuffled_validation_df G 09 8 3140 2047 1813
5 pyroptosis_unshuffled_validation_df B 10 11 908 1358 1123


 33%|███▎      | 8/24 [00:09<00:20,  1.29s/it]

1 pyroptosis_unshuffled_treatment_holdout_df N 04 4 2601 284 1613
2 pyroptosis_unshuffled_treatment_holdout_df L 11 7 2038 233 1985
3 pyroptosis_unshuffled_treatment_holdout_df D 09 4 1883 1341 1413
4 pyroptosis_unshuffled_treatment_holdout_df M 10 1 326 976 273
5 pyroptosis_unshuffled_treatment_holdout_df L 09 11 363 1670 599


 38%|███▊      | 9/24 [00:11<00:20,  1.36s/it]

1 pyroptosis_unshuffled_holdout_df B 05 8 2329 1443 1585
2 pyroptosis_unshuffled_holdout_df C 04 13 468 420 296
3 pyroptosis_unshuffled_holdout_df B 05 13 2547 1263 1389
4 pyroptosis_unshuffled_holdout_df F 03 5 1123 216 765
5 pyroptosis_unshuffled_holdout_df E 03 13 1189 2002 697


 42%|████▏     | 10/24 [00:13<00:22,  1.63s/it]

1 apoptosis_shuffled_train_df L 06 11 1962 1430 1117
2 apoptosis_shuffled_train_df L 07 4 1979 1091 1898
3 apoptosis_shuffled_train_df L 07 3 3541 62 2107
4 apoptosis_shuffled_train_df D 07 6 335 637 379
5 apoptosis_shuffled_train_df D 07 7 807 1561 925


 46%|████▌     | 11/24 [00:15<00:22,  1.75s/it]

1 apoptosis_shuffled_holdout_df E 06 7 672 189 1185
2 apoptosis_shuffled_holdout_df D 06 14 1383 1504 760
3 apoptosis_shuffled_holdout_df D 06 3 435 946 240
4 apoptosis_shuffled_holdout_df D 06 15 2090 1498 1011
5 apoptosis_shuffled_holdout_df E 06 13 641 1328 378


 50%|█████     | 12/24 [00:16<00:20,  1.70s/it]

1 apoptosis_unshuffled_train_df L 07 4 362 385 328
2 apoptosis_unshuffled_train_df K 07 16 1321 1008 652
3 apoptosis_unshuffled_train_df K 06 7 770 223 446
4 apoptosis_unshuffled_train_df L 06 10 563 84 353
5 apoptosis_unshuffled_train_df K 07 6 2632 711 2062


 54%|█████▍    | 13/24 [00:18<00:18,  1.71s/it]

1 apoptosis_unshuffled_test_df E 07 11 1340 554 879
2 apoptosis_unshuffled_test_df L 06 5 2138 1437 2086
3 apoptosis_unshuffled_test_df L 06 11 2822 1735 1610
4 apoptosis_unshuffled_test_df K 06 14 508 228 249
5 apoptosis_unshuffled_test_df K 07 9 1141 905 607


 58%|█████▊    | 14/24 [00:20<00:17,  1.80s/it]

1 apoptosis_unshuffled_validation_df L 06 12 2551 1356 1654
2 apoptosis_unshuffled_validation_df L 07 12 2459 1171 1694
3 apoptosis_unshuffled_validation_df K 07 16 982 1911 484
4 apoptosis_unshuffled_validation_df L 07 11 999 1934 630
5 apoptosis_unshuffled_validation_df E 07 2 830 1238 491


 62%|██████▎   | 15/24 [00:23<00:18,  2.01s/it]

1 apoptosis_unshuffled_holdout_df E 06 16 2681 1791 1550
2 apoptosis_unshuffled_holdout_df E 06 15 2621 884 1705
3 apoptosis_unshuffled_holdout_df E 06 6 1747 1369 1527
4 apoptosis_unshuffled_holdout_df E 06 13 1412 2128 820
5 apoptosis_unshuffled_holdout_df E 06 14 2431 624 1494


 67%|██████▋   | 16/24 [00:27<00:20,  2.56s/it]

1 control_shuffled_train_df K 04 2 344 1837 484
2 control_shuffled_train_df M 12 12 1302 45 917




3 control_shuffled_train_df O 03 14 3115 1520 1889
4 control_shuffled_train_df K 10 11 1315 1212 1006
5 control_shuffled_train_df K 10 12 1366 1290 1085


 71%|███████   | 17/24 [00:28<00:15,  2.21s/it]

1 control_shuffled_test_df E 04 4 1201 1517 1010
2 control_shuffled_test_df C 12 9 3160 1717 1501
3 control_shuffled_test_df O 03 7 2623 1942 1841
4 control_shuffled_test_df C 06 1 45 1663 39




5 control_shuffled_test_df O 02 7 2915 887 2088


 75%|███████▌  | 18/24 [00:32<00:15,  2.67s/it]

1 control_shuffled_validation_df G 12 10 1689 446 816
2 control_shuffled_validation_df H 10 4 787 1877 547
3 control_shuffled_validation_df J 12 8 41 1492 28




4 control_shuffled_validation_df L 12 9 1793 1902 970
5 control_shuffled_validation_df K 12 13 3486 1828 1932


 79%|███████▉  | 19/24 [00:33<00:11,  2.35s/it]

1 control_shuffled_holdout_df O 08 10 1680 86 970
2 control_shuffled_holdout_df M 05 15 3937 844 2064
3 control_shuffled_holdout_df M 05 11 1663 1071 1209
4 control_shuffled_holdout_df N 12 1 188 1935 106
5 control_shuffled_holdout_df O 05 3 1529 1719 766


 83%|████████▎ | 20/24 [00:36<00:09,  2.44s/it]

1 control_unshuffled_train_df I 12 2 3930 67 2133
2 control_unshuffled_train_df O 09 9 2137 742 1312
3 control_unshuffled_train_df G 06 7 156 1315 100
4 control_unshuffled_train_df I 04 15 2791 61 1328
5 control_unshuffled_train_df J 12 9 356 1617 188


 88%|████████▊ | 21/24 [00:39<00:07,  2.63s/it]

1 control_unshuffled_test_df H 10 8 11 858 19




2 control_unshuffled_test_df K 10 13 61 1022 39




3 control_unshuffled_test_df F 04 5 2057 124 976
4 control_unshuffled_test_df F 12 12 1389 1527 880
5 control_unshuffled_test_df H 10 11 8 962 11


 92%|█████████▏| 22/24 [00:46<00:07,  3.95s/it]

1 control_unshuffled_validation_df I 02 7 1704 44 1420




2 control_unshuffled_validation_df B 06 12 2976 852 2027
3 control_unshuffled_validation_df O 02 16 1942 1379 1120
4 control_unshuffled_validation_df I 12 11 620 836 1097
5 control_unshuffled_validation_df E 11 15 3297 733 1452


 96%|█████████▌| 23/24 [00:52<00:04,  4.69s/it]

1 control_unshuffled_holdout_df G 07 10 1097 1002 579
2 control_unshuffled_holdout_df N 12 7 2282 419 1243
3 control_unshuffled_holdout_df L 04 14 468 595 328
4 control_unshuffled_holdout_df I 06 9 3270 1832 1611
5 control_unshuffled_holdout_df I 10 3 2006 1787 1066


100%|██████████| 24/24 [01:03<00:00,  2.63s/it]


In [15]:
# define main_df_path
main_df_path = pathlib.Path(f"../results/{CELL_TYPE}/")
# if path does not exist, create it
main_df_path.mkdir(parents=True, exist_ok=True)
# save the dataframe
main_df.to_parquet(f"{main_df_path}/single_cell_predictions.parquet")