This notebook finds random cells from each prediction category and displays them. The purpose is to get representative images examples of each category.

In [None]:
import pathlib

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# import pillow and open cv
import PIL
import seaborn as sns
import tifffile as tf
from cytocherrypick import cherrypick
from PIL import Image, ImageEnhance
from tqdm import tqdm

In [None]:
# function that selects a random image from the dataframe


def random_cell_select(df: pd.DataFrame) -> pd.DataFrame:
    """
    Selects a random cell from the dataframe

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe containing the cell features

    Returns
    -------
    pd.DataFrame
        The return dataframe with the random cell selected
    """

    # select a random cell
    random_cell = df.sample(n=1, random_state=0)
    return random_cell

In [None]:
# parameters
CELL_TYPE = "PBMC"
feature = "Nuclei_Texture_SumVariance_CorrGasdermin_3_01_256"

In [None]:
# define directories
# where the images are
image_dir_path = pathlib.Path(
    "/media/lippincm/18T/interstellar_data/70117_20230210MM1_Gasdermin514_CP_BC430856__2023-03-22T15_42_38-Measurement1/2.IC/"
)
# if path does not exist, create it
image_dir_path.mkdir(parents=True, exist_ok=True)

image_out_dir_path = pathlib.Path("../figures/")
# if path does not exist, create it
image_out_dir_path.mkdir(parents=True, exist_ok=True)

In [None]:
df_path = pathlib.Path(
    f"../../4.sc_Morphology_Neural_Network_MLP_Model/results/Multi_Class/MultiClass_MLP/{CELL_TYPE}/single_cell_predictions.parquet"
)
# read in the data
df = pd.read_parquet(df_path)

# df_no_fs_path = pathlib.Path(f"../../data/{cell_type}_sc.parquet")
# # read in the data
# df_no_fs = pd.read_parquet(df_no_fs_path)

df.head()

In [None]:
# add column for if the prediction was correct
df["correct"] = df.apply(lambda x: x["true_label"] == x["predicted_label"], axis=1)
# split the data into correct and incorrect
df_correct = df[df["correct"] == True]
df_incorrect = df[df["correct"] == False]
assert len(df_correct) + len(df_incorrect) == len(df)

In [None]:
# split the data into the different classes
pyroptosis_df = df_correct[df_correct["labels"] == "pyroptosis"]
apoptosis_df = df_correct[df_correct["labels"] == "apoptosis"]
control_df = df_correct[df_correct["labels"] == "healthy"]

# split the data classes by shuffled and unshuffled
pyroptosis_shuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == True]
pyroptosis_unshuffled_df = pyroptosis_df[pyroptosis_df["shuffle"] == False]
apoptosis_shuffled_df = apoptosis_df[apoptosis_df["shuffle"] == True]
apoptosis_unshuffled_df = apoptosis_df[apoptosis_df["shuffle"] == False]
control_shuffled_df = control_df[control_df["shuffle"] == True]
control_unshuffled_df = control_df[control_df["shuffle"] == False]

# split the shuffled/unshuffled data by the data splits
pyroptosis_shuffled_train_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "train"
]
pyroptosis_shuffled_test_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "test"
]
pyroptosis_shuffled_validation_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "validation"
]
pyroptosis_shuffled_treatment_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_shuffled_holdout_df = pyroptosis_shuffled_df[
    pyroptosis_shuffled_df["data_split"] == "holdout"
]

pyroptosis_unshuffled_train_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "train"
]
pyroptosis_unshuffled_test_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "test"
]
pyroptosis_unshuffled_validation_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "validation"
]
pyroptosis_unshuffled_treatment_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
pyroptosis_unshuffled_holdout_df = pyroptosis_unshuffled_df[
    pyroptosis_unshuffled_df["data_split"] == "holdout"
]

apoptosis_shuffled_train_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "train"
]
apoptosis_shuffled_test_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "test"
]
apoptosis_shuffled_validation_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "validation"
]
apoptosis_shuffled_treatment_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_shuffled_holdout_df = apoptosis_shuffled_df[
    apoptosis_shuffled_df["data_split"] == "holdout"
]

apoptosis_unshuffled_train_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "train"
]
apoptosis_unshuffled_test_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "test"
]
apoptosis_unshuffled_validation_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "validation"
]
apoptosis_unshuffled_treatment_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "treatment_holdout"
]
apoptosis_unshuffled_holdout_df = apoptosis_unshuffled_df[
    apoptosis_unshuffled_df["data_split"] == "holdout"
]

control_shuffled_train_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "train"
]
control_shuffled_test_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "test"
]
control_shuffled_validation_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "validation"
]
control_shuffled_treatment_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "treatment_holdout"
]
control_shuffled_holdout_df = control_shuffled_df[
    control_shuffled_df["data_split"] == "holdout"
]

control_unshuffled_train_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "train"
]
control_unshuffled_test_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "test"
]
control_unshuffled_validation_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "validation"
]
control_unshuffled_treatment_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "treatment_holdout"
]
control_unshuffled_holdout_df = control_unshuffled_df[
    control_unshuffled_df["data_split"] == "holdout"
]

# add each df to a dictionary
dict_of_dfs = {}
dict_of_dfs["pyroptosis_shuffled_train_df"] = pyroptosis_shuffled_train_df
dict_of_dfs["pyroptosis_shuffled_test_df"] = pyroptosis_shuffled_test_df
dict_of_dfs["pyroptosis_shuffled_validation_df"] = pyroptosis_shuffled_validation_df
dict_of_dfs[
    "pyroptosis_shuffled_treatment_holdout_df"
] = pyroptosis_shuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_shuffled_holdout_df"] = pyroptosis_shuffled_holdout_df

dict_of_dfs["pyroptosis_unshuffled_train_df"] = pyroptosis_unshuffled_train_df
dict_of_dfs["pyroptosis_unshuffled_test_df"] = pyroptosis_unshuffled_test_df
dict_of_dfs["pyroptosis_unshuffled_validation_df"] = pyroptosis_unshuffled_validation_df
dict_of_dfs[
    "pyroptosis_unshuffled_treatment_holdout_df"
] = pyroptosis_unshuffled_treatment_holdout_df
dict_of_dfs["pyroptosis_unshuffled_holdout_df"] = pyroptosis_unshuffled_holdout_df

dict_of_dfs["apoptosis_shuffled_train_df"] = apoptosis_shuffled_train_df
dict_of_dfs["apoptosis_shuffled_test_df"] = apoptosis_shuffled_test_df
dict_of_dfs["apoptosis_shuffled_validation_df"] = apoptosis_shuffled_validation_df
dict_of_dfs[
    "apoptosis_shuffled_treatment_holdout_df"
] = apoptosis_shuffled_treatment_holdout_df
dict_of_dfs["apoptosis_shuffled_holdout_df"] = apoptosis_shuffled_holdout_df

dict_of_dfs["apoptosis_unshuffled_train_df"] = apoptosis_unshuffled_train_df
dict_of_dfs["apoptosis_unshuffled_test_df"] = apoptosis_unshuffled_test_df
dict_of_dfs["apoptosis_unshuffled_validation_df"] = apoptosis_unshuffled_validation_df
dict_of_dfs[
    "apoptosis_unshuffled_treatment_holdout_df"
] = apoptosis_unshuffled_treatment_holdout_df
dict_of_dfs["apoptosis_unshuffled_holdout_df"] = apoptosis_unshuffled_holdout_df

dict_of_dfs["control_shuffled_train_df"] = control_shuffled_train_df
dict_of_dfs["control_shuffled_test_df"] = control_shuffled_test_df
dict_of_dfs["control_shuffled_validation_df"] = control_shuffled_validation_df
dict_of_dfs[
    "control_shuffled_treatment_holdout_df"
] = control_shuffled_treatment_holdout_df
dict_of_dfs["control_shuffled_holdout_df"] = control_shuffled_holdout_df

dict_of_dfs["control_unshuffled_train_df"] = control_unshuffled_train_df
dict_of_dfs["control_unshuffled_test_df"] = control_unshuffled_test_df
dict_of_dfs["control_unshuffled_validation_df"] = control_unshuffled_validation_df
dict_of_dfs[
    "control_unshuffled_treatment_holdout_df"
] = control_unshuffled_treatment_holdout_df
dict_of_dfs["control_unshuffled_holdout_df"] = control_unshuffled_holdout_df

# check the length of each df
for key, value in dict_of_dfs.items():
    if not len(dict_of_dfs[key]) == 0:
        pass
    else:
        print(key)

In [None]:
well_dict = {
    "A": "01",
    "B": "02",
    "C": "03",
    "D": "04",
    "E": "05",
    "F": "06",
    "G": "07",
    "H": "08",
    "I": "09",
    "J": "10",
    "K": "11",
    "L": "12",
    "M": "13",
    "N": "14",
    "O": "15",
    "P": "16",
}
column_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
    "17": "17",
    "18": "18",
    "19": "19",
    "20": "20",
    "21": "21",
    "22": "22",
    "23": "23",
    "24": "24",
}
fov_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
}

In [None]:
apoptosis_shuffled_test_df

In [None]:
random_cell_select(apoptosis_shuffled_train_df)

In [None]:
for key in tqdm(dict_of_dfs):
    print(key)
    df = dict_of_dfs[key]
    df = random_cell_select(df=df)
    #

In [None]:
image_id = df["Metadata_ImageNumber"].values[0]
# fov_id = df["Metadata_Site"].astype(str).values[0]
# cell_id = df["Metadata_Cells_Number_Object_Number"].values[
#     0
# ]
# well_id = df["Metadata_Well"].values[0]
# row_id = well_id[0]
# column_id = well_id[1:]
# center_x = df["Metadata_Nuclei_Location_Center_X"].values[
#     0
# ]
# center_y = df["Metadata_Nuclei_Location_Center_Y"].values[
#     0
# ]
# max_x_box = (
#     df["Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X"]
#     .values[0]
#     .astype(int)
# )
# max_y_box = (
#     df["Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y"]
#     .values[0]
#     .astype(int)
# )
# min_x_box = (
#     df["Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X"]
#     .values[0]
#     .astype(int)
# )
# min_y_box = (
#     df["Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y"]
#     .values[0]
#     .astype(int)
# )
# print(
#     f"Image ID: {image_id}",
#     f"Cell ID: {cell_id}",
#     f"Well ID: {well_id}",
#     f"row_id: {row_id}",
#     f"fov_id: {fov_id}",
#     f"column id: {column_id}",
#     f"Center X: {center_x}",
#     f"Center Y: {center_y}",
#     f"Max X: {max_x_box}",
#     f"Max Y: {max_y_box}",
#     f"Min X: {min_x_box}",
#     f"Min Y: {min_y_box}",
#     sep="\n",
# )

In [None]:
control_unshuffled_holdout_cell = random_cell_select(
    dict_of_dfs["control_unshuffled_holdout_df"]
)
control_unshuffled_holdout_cell
# define parts of the image from the df

In [None]:
image_id = control_unshuffled_holdout_cell["Metadata_ImageNumber"].values[0]
fov_id = control_unshuffled_holdout_cell["Metadata_Site"].astype(str).values[0]
cell_id = control_unshuffled_holdout_cell["Metadata_Cells_Number_Object_Number"].values[
    0
]
well_id = control_unshuffled_holdout_cell["Metadata_Well"].values[0]
row_id = well_id[0]
column_id = well_id[1:]
center_x = control_unshuffled_holdout_cell["Metadata_Nuclei_Location_Center_X"].values[
    0
]
center_y = control_unshuffled_holdout_cell["Metadata_Nuclei_Location_Center_Y"].values[
    0
]
# median_row['Cytoplasm_']
# 'Cytoplasm_AreaShape_BoundingBoxMaximum_X', 'Cytoplasm_AreaShape_BoundingBoxMaximum_Y', 'Cytoplasm_AreaShape_BoundingBoxMinimum_X', 'Cytoplasm_AreaShape_BoundingBoxMinimum_Y'
max_x_box = (
    control_unshuffled_holdout_cell["Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_X"]
    .values[0]
    .astype(int)
)
max_y_box = (
    control_unshuffled_holdout_cell["Metadata_Cytoplasm_AreaShape_BoundingBoxMaximum_Y"]
    .values[0]
    .astype(int)
)
min_x_box = (
    control_unshuffled_holdout_cell["Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_X"]
    .values[0]
    .astype(int)
)
min_y_box = (
    control_unshuffled_holdout_cell["Metadata_Cytoplasm_AreaShape_BoundingBoxMinimum_Y"]
    .values[0]
    .astype(int)
)
print(
    f"Image ID: {image_id}",
    f"Cell ID: {cell_id}",
    f"Well ID: {well_id}",
    f"row_id: {row_id}",
    f"fov_id: {fov_id}",
    f"column id: {column_id}",
    f"Center X: {center_x}",
    f"Center Y: {center_y}",
    f"Max X: {max_x_box}",
    f"Max Y: {max_y_box}",
    f"Min X: {min_x_box}",
    f"Min Y: {min_y_box}",
    sep="\n",
)

In [None]:
image_basename_1 = "p04-ch1sk1fk1fl1_IC.tiff"
image_basename_2 = "p04-ch2sk1fk1fl1_IC.tiff"
image_basename_3 = "p04-ch3sk1fk1fl1_IC.tiff"
image_basename_4 = "p04-ch4sk1fk1fl1_IC.tiff"
image_basename_5 = "p04-ch5sk1fk1fl1_IC.tiff"

In [None]:
image_name1 = f"r{well_dict[row_id]}c{column_dict[column_id]}f{fov_dict[fov_id]}{image_basename_1}"
image_path1 = image_dir_path.joinpath(image_name1)
print(image_name1, "\n", image_path1)

image_name2 = f"r{well_dict[row_id]}c{column_dict[column_id]}f{fov_dict[fov_id]}{image_basename_2}"
image_path2 = image_dir_path.joinpath(image_name2)
print(image_name2, "\n", image_path2)

image_name3 = f"r{well_dict[row_id]}c{column_dict[column_id]}f{fov_dict[fov_id]}{image_basename_3}"
image_path3 = image_dir_path.joinpath(image_name3)
print(image_name3, "\n", image_path3)

image_name4 = f"r{well_dict[row_id]}c{column_dict[column_id]}f{fov_dict[fov_id]}{image_basename_4}"
image_path4 = image_dir_path.joinpath(image_name4)
print(image_name4, "\n", image_path4)

image_name5 = f"r{well_dict[row_id]}c{column_dict[column_id]}f{fov_dict[fov_id]}{image_basename_5}"
image_path5 = image_dir_path.joinpath(image_name5)
print(image_name5, "\n", image_path5)

In [None]:
# crop all 5 channels of the image
im1 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_GRAYSCALE)
# im1 = cv2.convertScaleAbs(im1, alpha=alpha, beta=beta)
im_crop1 = im1[min_y_box:max_y_box, min_x_box:max_x_box]

im2 = cv2.imread(image_path2.as_posix(), cv2.IMREAD_GRAYSCALE)
# im2 = cv2.convertScaleAbs(im2, alpha=alpha, beta=beta)
im_crop2 = im2[min_y_box:max_y_box, min_x_box:max_x_box]

im3 = cv2.imread(image_path3.as_posix(), cv2.IMREAD_GRAYSCALE)
# im3 = cv2.convertScaleAbs(im3, alpha=alpha, beta=beta)
im_crop3 = im3[min_y_box:max_y_box, min_x_box:max_x_box]

im4 = cv2.imread(image_path4.as_posix(), cv2.IMREAD_GRAYSCALE)
# im4 = cv2.convertScaleAbs(im4, alpha=alpha, beta=beta)
im_crop4 = im4[min_y_box:max_y_box, min_x_box:max_x_box]

im5 = cv2.imread(image_path5.as_posix(), cv2.IMREAD_GRAYSCALE)
# im5 = cv2.convertScaleAbs(im5, alpha=alpha, beta=beta)
im_crop5 = im5[min_y_box:max_y_box, min_x_box:max_x_box]

### channels
* Channel 1: DAPI
* Channel 2: ER
* Channel 3: GasderminD
* Channel 4: AGP (Actin, Golgi, and Plasma membrane)
* Channel 5: Mitochondria


In [None]:
# pick three channels to stack
# nuclei = blue
# Gasdermin = green
# Actin = red

blue_channel_stack = np.stack(im1, axis=-1)
green_channel_stack = np.stack(im3, axis=-1)
red_channel_stack = np.stack(im4, axis=-1)

channel1 = "im1"
channel2 = "im3"
channel3 = "im4"

# Scale the pixel values to fit within the 16-bit range (0-65535)
blue_channel = (blue_channel_stack / np.max(blue_channel_stack) * 65535).astype(
    np.uint16
)
green_channel = (green_channel_stack / np.max(green_channel_stack) * 65535).astype(
    np.uint16
)
red_channel = (red_channel_stack / np.max(red_channel_stack) * 65535).astype(np.uint16)

In [None]:
composite_image = cv2.merge((blue_channel, green_channel, red_channel)).astype(
    np.uint16
)
composite_image.shape
composite_image = cv2.cvtColor(composite_image, cv2.COLOR_BGR2RGB)

In [None]:
# transformations of the image to fix the orientation post pixel scaling
# flip the image vertically
composite_image = cv2.flip(composite_image, 0)
# rotate the image 90 degrees clockwise
composite_image = cv2.rotate(composite_image, cv2.ROTATE_90_CLOCKWISE)

In [None]:
# cv2.imshow("Composite", composite_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [None]:
# crop the composite image
im_crop = composite_image[min_y_box:max_y_box, min_x_box:max_x_box]
# cv2.imshow("Composite", im_crop)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [None]:
# image_out_dir_path updated to include the feature name
image_out_dir_path = pathlib.Path(f"{image_out_dir_path}/{feature}")
image_out_dir_path.mkdir(parents=True, exist_ok=True)
# write images
tf.imwrite(
    pathlib.Path(
        f"{image_out_dir_path}/{channel1}_{channel2}_{channel3}_composite_image.tiff"
    ),
    composite_image,
    compression=None,
)
tf.imwrite(
    pathlib.Path(
        f"{image_out_dir_path}/{channel1}_{channel2}_{channel3}_composite_image_crop.tiff"
    ),
    im_crop,
    compression=None,
)