## Imports

In [1]:
import pathlib

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tifffile as tf  # write tiff files
from cytocherrypick.calculations import find_median
from PIL import Image  # read tiff files
from toml import load
from tqdm import tqdm  # progress bar

In [2]:
CELL_TYPE = "PBMC"

In [3]:
sc_cell_path = pathlib.Path(f"../../../data/{CELL_TYPE}_preprocessed_sc_norm.parquet")
sc_cell_df = pd.read_parquet(
    sc_cell_path, columns=["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"]
)

columns_to_load = [
    "Nuclei_Location_Center_Y",
    "Nuclei_Location_Center_X",
]
# get the unfeature selected data
unselected_df_path = pathlib.Path(
    f"../../../data/{CELL_TYPE}_sc.parquet",
)
unselected_df = pd.read_parquet(unselected_df_path, columns=columns_to_load)
# reanme the columns to start with "Metadata_"
unselected_df.columns = [f"Metadata_{x}" for x in unselected_df.columns]
unselected_df.head()

Unnamed: 0,Metadata_Nuclei_Location_Center_Y,Metadata_Nuclei_Location_Center_X
0,10.691589,1379.632399
1,12.182927,178.853659
2,11.690402,571.773994
3,12.425595,980.089286
4,15.194313,1449.611374


In [4]:
# add the cell df to the unselected df
sc_cell_df = pd.concat([sc_cell_df, unselected_df], axis="columns")
sc_cell_df

Unnamed: 0,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,Metadata_Nuclei_Location_Center_Y,Metadata_Nuclei_Location_Center_X
0,LPS_0.010_ug_per_ml_DMSO_0.025_%,10.691589,1379.632399
1,LPS_0.010_ug_per_ml_DMSO_0.025_%,12.182927,178.853659
2,LPS_0.010_ug_per_ml_DMSO_0.025_%,11.690402,571.773994
3,LPS_0.010_ug_per_ml_DMSO_0.025_%,12.425595,980.089286
4,LPS_0.010_ug_per_ml_DMSO_0.025_%,15.194313,1449.611374
...,...,...,...
5598677,DMSO_0.100_%_DMSO_1.000_%,2069.264228,588.813008
5598678,DMSO_0.100_%_DMSO_1.000_%,2079.912698,1325.105820
5598679,DMSO_0.100_%_DMSO_1.000_%,2080.468992,1202.267442
5598680,DMSO_0.100_%_DMSO_1.000_%,2085.898894,525.848341


In [5]:
# Get the current working directory of the repository
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")
root_dir

PosixPath('/home/lippincm/Documents/ML/Interstellar_Analysis')

In [6]:
image_out_dir_path = pathlib.Path(
    f"{root_dir}/figures/3.figure3/figures/images/{CELL_TYPE}/"
)
composite_image_out_dir_path = pathlib.Path(
    f"{root_dir}/figures/3.figure3/figures/composite_images/{CELL_TYPE}/"
)

image_out_dir_path.mkdir(parents=True, exist_ok=True)
composite_image_out_dir_path.mkdir(parents=True, exist_ok=True)

In [7]:
# define directories
# where the images are on a local machine
# this is a hard coded path to the 1TB image directory

#####
# THIS PATH NEEDS TO BE CHANGED TO THE LOCAL IMAGE DIRECTORY ON YOUR MACHINE
#####

image_dir_path = pathlib.Path(
    "/media/lippincm/18T/interstellar_data/70117_20230210MM1_Gasdermin514_CP_BC430856__2023-03-22T15_42_38-Measurement1/2.IC/"
).resolve(strict=True)

In [8]:
# path
anova_path = pathlib.Path(
    f"../../../1.Exploratory_Data_Analysis/results/{CELL_TYPE}_combined.parquet"
)
# read in the anova results
anova_results = pd.read_parquet(anova_path)

## define the groups

In [9]:
# read in the ground truth data
data_path_ground_truth = (
    "../../../4.sc_Morphology_Neural_Network_MLP_Model/MLP_utils/ground_truth.toml"
)
ground_truth = load(data_path_ground_truth)

# make a a list of the treatments that are in the ground truth data
apoptosis_ground_truth_list = ground_truth["Apoptosis"]["apoptosis_groups_list"]
pyroptosis_ground_truth_list = ground_truth["Pyroptosis"]["pyroptosis_groups_list"]
control_ground_truth_list = ground_truth["Healthy"]["healthy_groups_list"]


# replace Flagellin_1.000_0_DMSO_0.0_% with Flagellin_1.000_ug_per_ml_DMSO_0.025_%
sc_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = sc_cell_df[
    "oneb_Metadata_Treatment_Dose_Inhibitor_Dose"
].replace(
    "Flagellin_0.100_ug_per_ml_DMSO_0.000_%", "Flagellin_0.100_ug_per_ml_DMSO_0.025_%"
)
# replace Flagellin_1.000_0_DMSO_0.0_% with Flagellin_1.000_ug_per_ml_DMSO_0.025_%
sc_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = sc_cell_df[
    "oneb_Metadata_Treatment_Dose_Inhibitor_Dose"
].replace(
    "Flagellin_1.000_ug_per_ml_DMSO_0.000_%", "Flagellin_1.000_ug_per_ml_DMSO_0.025_%"
)
sc_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = sc_cell_df[
    "oneb_Metadata_Treatment_Dose_Inhibitor_Dose"
].replace("Flagellin_1.000_0_DMSO_0.025_%", "Flagellin_1.000_ug_per_ml_DMSO_0.025_%")
# convert media_ctr_0.0_ug_per_ml_Media_ctr_0_0 to media_ctr_0.0_ug_per_ml_Media_ctr_0_025
sc_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = sc_cell_df[
    "oneb_Metadata_Treatment_Dose_Inhibitor_Dose"
].replace("media_ctr_0.0_ug_per_ml_Media_ctr_0_0", "media_ctr_0.0_0_Media_ctr_0.0_0")


sc_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = sc_cell_df[
    "oneb_Metadata_Treatment_Dose_Inhibitor_Dose"
].replace("media_ctr_0.0_0_Media_0_0", "media_ctr_0.0_0_Media_ctr_0.0_0")

sc_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = sc_cell_df[
    "oneb_Metadata_Treatment_Dose_Inhibitor_Dose"
].replace(
    "Flagellin_1.000_0_Disulfiram_1.000_uM",
    "Flagellin_1.000_ug_per_ml_Disulfiram_1.000_uM",
)

# make a new column that is the treatment group based on the ground truth data
sc_cell_df["group"] = "NA"
sc_cell_df.loc[
    sc_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].isin(
        apoptosis_ground_truth_list
    ),
    "group",
] = "Apoptosis"
sc_cell_df.loc[
    sc_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].isin(
        pyroptosis_ground_truth_list
    ),
    "group",
] = "Pyroptosis"
sc_cell_df.loc[
    sc_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].isin(
        control_ground_truth_list
    ),
    "group",
] = "Control"

# make the group column a category
sc_cell_df["group"] = pd.Categorical(
    sc_cell_df["group"],
    categories=["Control", "Apoptosis", "Pyroptosis"],
    ordered=True,
)

print(sc_cell_df["group"].unique())

['Pyroptosis', 'Control', 'Apoptosis']
Categories (3, object): ['Control' < 'Apoptosis' < 'Pyroptosis']


In [10]:
# create a column that adds group1 and group2 together
anova_results["group"] = anova_results["group1"] + "_" + anova_results["group2"]
print(anova_results.shape)

# filter out rows that have p-adj_abs > 0.05
anova_results = anova_results[anova_results["p-adj_abs"] < 0.05]
print(anova_results.shape)

# change the group names to replace healthy with control
anova_results["group"] = anova_results["group"].str.replace("healthy", "control")
# make a -log10(p-adj) column
anova_results["neg-log10(p-adj_abs)"] = -np.log10(anova_results["p-adj_abs"])
# sort by neg-log10(p-adj_abs)
anova_results = anova_results.sort_values(by="neg-log10(p-adj_abs)", ascending=False)
# split the dfs into comparisons
c_p_df = anova_results[anova_results["group"] == "control_pyroptosis"]
a_c_df = anova_results[anova_results["group"] == "apoptosis_control"]
a_p_df = anova_results[anova_results["group"] == "apoptosis_pyroptosis"]
# sort by neg-log10(p-adj_abs)
c_p_df = c_p_df.sort_values(by="neg-log10(p-adj_abs)", ascending=False)
a_c_df = a_c_df.sort_values(by="neg-log10(p-adj_abs)", ascending=False)
a_p_df = a_p_df.sort_values(by="neg-log10(p-adj_abs)", ascending=False)

(3735, 11)
(2415, 11)


  result = getattr(ufunc, method)(*inputs, **kwargs)


In [11]:
# get the top 1 features for each comparison
c_p_top1 = c_p_df.iloc[:1, :]
a_c_top1 = a_c_df.iloc[:1, :]
a_p_top1 = a_p_df.iloc[:1, :]

c_p_top1["features"].to_list()
a_c_top1["features"].to_list()
a_p_top1["features"].to_list()
dict_of_top_all = {}
dict_of_top_all["control_pyroptosis"] = c_p_top1["features"].to_list()
dict_of_top_all["apoptosis_control"] = a_c_top1["features"].to_list()
dict_of_top_all["apoptosis_pyroptosis"] = a_p_top1["features"].to_list()

# get list of all the top features
top_features = []
for key in dict_of_top_all:
    top_features.extend(dict_of_top_all[key])
print(len(top_features))
# remove duplicates from the list
top_features = list(set(top_features))
print(len(top_features))
top_features

3
3


['Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1',
 'Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1',
 'Cytoplasm_AreaShape_Zernike_9_5']

In [12]:
# add columns
top_features = top_features + [
    "Metadata_Well",
    "Metadata_Site",
    "Metadata_ImageNumber",
    "Metadata_Cells_Number_Object_Number",
]

In [13]:
# get features from df
top_features_df = pd.read_parquet(
    sc_cell_path,
    columns=top_features,
)
top_features_df
# merge the top features df with the sc_cell_df
sc_cell_df = pd.concat([sc_cell_df, top_features_df], axis="columns")

In [14]:
# seperate the data into the different groups
control_df = sc_cell_df[sc_cell_df["group"] == "Control"]
apoptosis_df = sc_cell_df[sc_cell_df["group"] == "Apoptosis"]
pyroptosis_df = sc_cell_df[sc_cell_df["group"] == "Pyroptosis"]

In [15]:
# define empty dictionary
final_dict = {}

In [16]:
control_df.head()
# sort the control df by Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
control_df = control_df.sort_values(
    by="Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1", ascending=False
)
apoptosis_df = apoptosis_df.sort_values(
    by="Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1", ascending=False
)
pyroptosis_df = pyroptosis_df.sort_values(
    by="Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1", ascending=False
)

control_df.reset_index(drop=True, inplace=True)
apoptosis_df.reset_index(drop=True, inplace=True)
pyroptosis_df.reset_index(drop=True, inplace=True)

print(
    control_df["Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1"][
        control_df.last_valid_index()
    ],
    apoptosis_df["Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1"][
        apoptosis_df.last_valid_index()
    ],
    pyroptosis_df["Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1"][0],
)
# get the last item in the control df

dict_of_dfs = {}
dict_of_dfs["control"] = control_df
dict_of_dfs["apoptosis"] = apoptosis_df
dict_of_dfs["pyroptosis"] = pyroptosis_df

-1.7572623209140847 -1.757217885807288 1.6939199217700731


In [17]:
for group in tqdm(dict_of_top_all):
    print(group)
    for dataset in dict_of_dfs:
        feature = dict_of_top_all[group][0]
        key = f"{dataset}__{group}__{feature}"
        df = dict_of_dfs[dataset]
        df = df.sort_values(by=feature, ascending=False, inplace=False)
        df.reset_index(inplace=True, drop=True)
        # get the first and last 3 items in the df
        first_3 = df.head(3)
        last_3 = df.tail(3)
        # add the first and last 3 items to the final dict
        df = pd.concat([first_3, last_3], axis=0)
        print(len(df))
        final_dict[key] = df

  0%|          | 0/3 [00:00<?, ?it/s]

control_pyroptosis
6
6


 33%|███▎      | 1/3 [00:00<00:01,  1.03it/s]

6
apoptosis_control
6
6


 67%|██████▋   | 2/3 [00:01<00:00,  1.03it/s]

6
apoptosis_pyroptosis
6
6


100%|██████████| 3/3 [00:02<00:00,  1.39it/s]

6





## Get the images

In [18]:
# define a dictionary for coding the wells and FOVs correctly
well_dict = {
    "A": "01",
    "B": "02",
    "C": "03",
    "D": "04",
    "E": "05",
    "F": "06",
    "G": "07",
    "H": "08",
    "I": "09",
    "J": "10",
    "K": "11",
    "L": "12",
    "M": "13",
    "N": "14",
    "O": "15",
    "P": "16",
}
column_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
    "17": "17",
    "18": "18",
    "19": "19",
    "20": "20",
    "21": "21",
    "22": "22",
    "23": "23",
    "24": "24",
}
fov_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
}

In [19]:
image_basename_1 = "p04-ch1sk1fk1fl1_IC.tiff"
image_basename_2 = "p04-ch2sk1fk1fl1_IC.tiff"
image_basename_3 = "p04-ch3sk1fk1fl1_IC.tiff"
image_basename_4 = "p04-ch4sk1fk1fl1_IC.tiff"
image_basename_5 = "p04-ch5sk1fk1fl1_IC.tiff"

In [20]:
# set constants for the loop
radius = 50
# define the number of cells to select
n = 5

In [21]:
# define an empty df
main_df = apoptosis_df.drop(apoptosis_df.index)

In [22]:
for i in tqdm(final_dict):
    for j in range(len(final_dict[i])):
        tmp_df = pd.DataFrame(final_dict[i].iloc[j]).T
        image_id = tmp_df["Metadata_ImageNumber"].values[0]
        fov_id = tmp_df["Metadata_Site"].values[0]
        cell_id = tmp_df["Metadata_Cells_Number_Object_Number"].values[0]
        well_id = tmp_df["Metadata_Well"].values[0]
        row_id = well_id[0]
        column_id = well_id[1:]
        center_x = tmp_df["Metadata_Nuclei_Location_Center_X"].values[0]
        center_y = tmp_df["Metadata_Nuclei_Location_Center_Y"].values[0]
        # make each of the ids a string
        fov_id = str(fov_id)
        cell_id = str(cell_id)
        well_id = str(well_id)
        row_id = str(row_id)
        column_id = str(column_id)
        center_x = int(center_x)
        center_y = int(center_y)
        treatment = i.split("__")[0]
        comparison = i.split("__")[1]
        feature = i.split("__")[2]
        print(well_id, treatment, comparison, feature)
        # create a custom and contstant bounding box for the images
        # this is made from the extracted center_x and center_y of the cell (nucleus)
        min_x_box = center_x - radius
        max_x_box = center_x + radius
        min_y_box = center_y - radius
        max_y_box = center_y + radius
        print(group, fov_id, cell_id, row_id, column_id, center_x, center_y)
        # create the image paths for each channel of the image
        image_name1 = (
            f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_1}"
        )
        image_path1 = image_dir_path.joinpath(image_name1)

        image_name2 = (
            f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_2}"
        )
        image_path2 = image_dir_path.joinpath(image_name2)

        image_name3 = (
            f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_3}"
        )
        image_path3 = image_dir_path.joinpath(image_name3)

        image_name4 = (
            f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_4}"
        )
        image_path4 = image_dir_path.joinpath(image_name4)

        image_name5 = (
            f"r{well_dict[row_id]}c{column_id}f{fov_dict[fov_id]}{image_basename_5}"
        )
        image_path5 = image_dir_path.joinpath(image_name5)

        # crop all 5 channels of the image
        im1 = cv2.imread(image_path1.as_posix(), cv2.IMREAD_UNCHANGED)

        im2 = cv2.imread(image_path2.as_posix(), cv2.IMREAD_UNCHANGED)

        im3 = cv2.imread(image_path3.as_posix(), cv2.IMREAD_UNCHANGED)

        im4 = cv2.imread(image_path4.as_posix(), cv2.IMREAD_UNCHANGED)

        im5 = cv2.imread(image_path5.as_posix(), cv2.IMREAD_UNCHANGED)

        # check for non-edge cells

        ### channels ###
        # * Channel 1: DAPI
        # * Channel 2: ER
        # * Channel 3: GasderminD
        # * Channel 4: AGP (Actin, Golgi, and Plasma membrane)
        # * Channel 5: Mitochondria

        # prior to merging adjust the brightness of the image to make it easier to see
        # adjust the brightness of the image to make it easier to see
        alpha = 0.05  # Contrast control (1.0-3.0)
        beta = 0  # Brightness control (0-100)
        im3 = cv2.convertScaleAbs(im3, alpha=alpha, beta=beta)
        im4 = cv2.convertScaleAbs(im4, alpha=alpha, beta=beta)
        # blue channel does not need to be adjusted as it is the DAPI channel and is already bright

        blue_channel_stack = np.stack(im1, axis=-1)
        yellow_channel_stack = np.stack(im2, axis=-1)
        green_channel_stack = np.stack(im3, axis=-1)
        red_channel_stack = np.stack(im4, axis=-1)
        magenta_channel_stack = np.stack(im5, axis=-1)

        channel1 = "im1"
        channel2 = "im3"
        channel3 = "im4"
        channel4 = "im5"
        channel5 = "im2"

        # Scale the pixel values to fit within the 16-bit range (0-65535)
        blue_channel = (blue_channel_stack / np.max(blue_channel_stack) * 65535).astype(
            np.uint16
        )
        yellow_channel = (
            yellow_channel_stack / np.max(yellow_channel_stack) * 65535
        ).astype(np.uint16)
        green_channel = (
            green_channel_stack / np.max(green_channel_stack) * 65535
        ).astype(np.uint16)
        red_channel = (red_channel_stack / np.max(red_channel_stack) * 65535).astype(
            np.uint16
        )
        magenta_channel = (
            magenta_channel_stack / np.max(magenta_channel_stack) * 65535
        ).astype(np.uint16)

        # merge the channels together

        composite_image = cv2.merge((red_channel, green_channel, blue_channel)).astype(
            np.uint16
        )

        # The images end up being `wonky` so we need to do some post processing prior to saving
        # where wonky means that the image is not oriented correctly
        # the image is rotated 90 degrees clockwise and flipped vertically

        # this will ensure that the images are oriented correctly with X and Y centers prior to cropping
        # transformations of the image to fix the orientation post pixel scaling
        # flip the image vertically
        composite_image = cv2.flip(composite_image, 0)
        # rotate the image 90 degrees clockwise
        composite_image = cv2.rotate(composite_image, cv2.ROTATE_90_CLOCKWISE)

        # flip the channels vertically
        blue_channel = cv2.flip(blue_channel, 0)
        yellow_channel = cv2.flip(yellow_channel, 0)
        green_channel = cv2.flip(green_channel, 0)
        red_channel = cv2.flip(red_channel, 0)
        magenta_channel = cv2.flip(magenta_channel, 0)
        # rotate the channels 90 degrees clockwise
        blue_channel = cv2.rotate(blue_channel, cv2.ROTATE_90_CLOCKWISE)
        yellow_channel = cv2.rotate(yellow_channel, cv2.ROTATE_90_CLOCKWISE)
        green_channel = cv2.rotate(green_channel, cv2.ROTATE_90_CLOCKWISE)
        red_channel = cv2.rotate(red_channel, cv2.ROTATE_90_CLOCKWISE)
        magenta_channel = cv2.rotate(magenta_channel, cv2.ROTATE_90_CLOCKWISE)

        composite_image_crop = composite_image[min_y_box:max_y_box, min_x_box:max_x_box]
        # crop the individual channels
        blue_channel_crop = blue_channel[min_y_box:max_y_box, min_x_box:max_x_box]
        yellow_channel_crop = yellow_channel[min_y_box:max_y_box, min_x_box:max_x_box]
        green_channel_crop = green_channel[min_y_box:max_y_box, min_x_box:max_x_box]
        red_channel_crop = red_channel[min_y_box:max_y_box, min_x_box:max_x_box]
        magenta_channel_crop = magenta_channel[min_y_box:max_y_box, min_x_box:max_x_box]

        if composite_image_crop.shape[0] == 0 or composite_image_crop.shape[1] == 0:
            print("Cell is on the edge of the image, skipping")
            continue

            # image_out_dir_path updated to include the feature name
        # write images
        tf.imwrite(
            pathlib.Path(
                f"{composite_image_out_dir_path}/{i}_{channel1}_{channel2}_{channel3}_composite_image_cell_{j}.tiff"
            ),
            composite_image,
            compression=None,
        )
        # write each channel as a tiff file
        tf.imwrite(
            pathlib.Path(f"{image_out_dir_path}/{i}_blue_channel_cell_{j}.tiff"),
            blue_channel,
            compression=None,
        )
        tf.imwrite(
            pathlib.Path(f"{image_out_dir_path}/{i}_yellow_channel_cell_{j}.tiff"),
            yellow_channel,
            compression=None,
        )
        tf.imwrite(
            pathlib.Path(f"{image_out_dir_path}/{i}_green_channel_cell_{j}.tiff"),
            green_channel,
            compression=None,
        )
        tf.imwrite(
            pathlib.Path(f"{image_out_dir_path}/{i}_red_channel_cell_{j}.tiff"),
            red_channel,
            compression=None,
        )
        tf.imwrite(
            pathlib.Path(f"{image_out_dir_path}/{i}_magenta_channel_cell_{j}.tiff"),
            magenta_channel,
            compression=None,
        )

        # write crops
        tf.imwrite(
            pathlib.Path(
                f"{composite_image_out_dir_path}/{i}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{j}.tiff"
            ),
            composite_image_crop,
            compression=None,
        )
        tf.imwrite(
            pathlib.Path(f"{image_out_dir_path}/{i}_blue_channel_crop_cell_{j}.tiff"),
            blue_channel_crop,
            compression=None,
        )
        tf.imwrite(
            pathlib.Path(f"{image_out_dir_path}/{i}_yellow_channel_crop_cell_{j}.tiff"),
            yellow_channel_crop,
            compression=None,
        )
        tf.imwrite(
            pathlib.Path(f"{image_out_dir_path}/{i}_green_channel_crop_cell_{j}.tiff"),
            green_channel_crop,
            compression=None,
        )
        tf.imwrite(
            pathlib.Path(f"{image_out_dir_path}/{i}_red_channel_crop_cell_{j}.tiff"),
            red_channel_crop,
            compression=None,
        )
        tf.imwrite(
            pathlib.Path(
                f"{image_out_dir_path}/{i}_magenta_channel_crop_cell_{j}.tiff"
            ),
            magenta_channel_crop,
            compression=None,
        )

        composite_image = cv2.cvtColor(composite_image, cv2.COLOR_BGR2RGB)
        composite_image_crop = cv2.cvtColor(composite_image_crop, cv2.COLOR_BGR2RGB)

        # save the image as a png file
        cv2.imwrite(
            f"{composite_image_out_dir_path}/{i}_{channel1}_{channel2}_{channel3}_composite_image_cell_{j}.png",
            composite_image,
        )
        cv2.imwrite(
            f"{composite_image_out_dir_path}/{i}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{j}.png",
            composite_image_crop,
        )

        tmp_df["comparison"] = comparison
        tmp_df["treatment"] = treatment
        tmp_df["feature"] = feature

        # tmp_df = tmp_df.to_frame().T
        tmp_df[
            "image_compsite_path"
        ] = f"{composite_image_out_dir_path}/{i}_{channel1}_{channel2}_{channel3}_composite_image_cell_{j}.png"
        tmp_df[
            "image_composite_crop_path"
        ] = f"{composite_image_out_dir_path}/{i}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{j}.png"

        tmp_df[
            "image_DAPI_path"
        ] = f"{image_out_dir_path}/{i}_blue_channel_cell_{j}.png"
        tmp_df[
            "image_ER_path"
        ] = f"{image_out_dir_path}/{i}_yellow_channel_cell_{j}.png"
        tmp_df[
            "image_GasderminD_path"
        ] = f"{image_out_dir_path}/{i}_green_channel_cell_{j}.png"
        tmp_df["image_AGP_path"] = f"{image_out_dir_path}/{i}_red_channel_cell_{j}.png"
        tmp_df[
            "image_Mitochondria_path"
        ] = f"{image_out_dir_path}/{i}_magenta_channel_cell_{j}.png"

        # crops path
        tmp_df[
            "image_compsite_crop_path"
        ] = f"{composite_image_out_dir_path}/{i}_{channel1}_{channel2}_{channel3}_composite_image_crop_cell_{j}.png"
        tmp_df[
            "image_DAPI_crop_path"
        ] = f"{image_out_dir_path}/{i}_blue_channel_crop_cell_{j}.png"
        tmp_df[
            "image_ER_crop_path"
        ] = f"{image_out_dir_path}/{i}_yellow_channel_crop_cell_{j}.png"
        tmp_df[
            "image_GasderminD_crop_path"
        ] = f"{image_out_dir_path}/{i}_green_channel_crop_cell_{j}.png"
        tmp_df[
            "image_AGP_crop_path"
        ] = f"{image_out_dir_path}/{i}_red_channel_crop_cell_{j}.png"
        tmp_df[
            "image_Mitochondria_crop_path"
        ] = f"{image_out_dir_path}/{i}_magenta_channel_crop_cell_{j}.png"

        main_df = pd.concat([main_df, tmp_df], ignore_index=True)

  0%|          | 0/9 [00:00<?, ?it/s]

O04 control control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 7 189 O 04 550 149
L10 control control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 3 43 L 10 1566 49
Cell is on the edge of the image, skipping
O12 control control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 1 1066 O 12 1612 643
I07 control control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 11 1621 I 07 1769 791
I04 control control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 6 1379 I 04 180 789
O04 control control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 6 807 O 04 2082 667


 11%|█         | 1/9 [00:03<00:24,  3.05s/it]

D06 apoptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 6 816 D 06 1048 558
K07 apoptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 15 3105 K 07 974 1500
K07 apoptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 11 524 K 07 1278 275
E07 apoptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 13 2854 E 07 1802 1747
E06 apoptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 6 898 E 06 1564 800
D06 apoptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 4 2132 D 06 349 1283


 22%|██▏       | 2/9 [00:09<00:36,  5.24s/it]

B04 pyroptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 12 949 B 04 1155 694
C11 pyroptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 5 1711 C 11 2124 1346
M05 pyroptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 6 3125 M 05 138 2129
J08 pyroptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 4 1918 J 08 1649 1357
N03 pyroptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 12 1483 N 03 1596 1852
L03 pyroptosis control_pyroptosis Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1
apoptosis_pyroptosis 11 2376 L 03 867 1817


 33%|███▎      | 3/9 [00:18<00:39,  6.62s/it]

G12 control apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 8 2983 G 12 1752 2130
M12 control apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 16 1577 M 12 1399 852
O03 control apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 9 1196 O 03 1210 800
H11 control apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 2 1296 H 11 347 795
G11 control apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 6 779 G 11 1971 498
F04 control apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 14 1336 F 04 971 1232


 44%|████▍     | 4/9 [00:21<00:26,  5.28s/it]

E07 apoptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 1 1582 E 07 1574 1043
E07 apoptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 6 128 E 07 899 109
K07 apoptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 5 2109 K 07 1190 1914
E06 apoptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 15 3395 E 06 1977 2138
L06 apoptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 16 1092 L 06 2132 705
L07 apoptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 11 1448 L 07 898 922


 56%|█████▌    | 5/9 [00:28<00:23,  5.99s/it]

N10 pyroptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 3 2069 N 10 1410 1746
B10 pyroptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 3 1796 B 10 1454 1068
K08 pyroptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 7 769 K 08 1121 810
H09 pyroptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 10 685 H 09 842 392
B11 pyroptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 13 1893 B 11 1140 1046
L08 pyroptosis apoptosis_control Cytoplasm_AreaShape_Zernike_9_5
apoptosis_pyroptosis 5 1698 L 08 904 1241


 67%|██████▋   | 6/9 [00:35<00:19,  6.44s/it]

E12 control apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 14 324 E 12 1463 172
F05 control apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 8 2464 F 05 1953 1619
J04 control apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 7 600 J 04 2014 877
G05 control apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 9 2392 G 05 1641 1210
H12 control apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 12 657 H 12 607 424
F10 control apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 7 2393 F 10 1652 1734


 78%|███████▊  | 7/9 [00:52<00:19,  9.66s/it]

D07 apoptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 11 266 D 07 678 174
D06 apoptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 7 1249 D 06 120 1729
L07 apoptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 10 1698 L 07 216 1164
L06 apoptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 5 1250 L 06 1242 1334
E07 apoptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 6 844 E 07 543 720
L06 apoptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 12 3166 L 06 1016 2018


 89%|████████▉ | 8/9 [01:08<00:11, 11.69s/it]

D11 pyroptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 5 2231 D 11 1401 1666
E03 pyroptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 7 912 E 03 1408 1298
D09 pyroptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 10 328 D 09 1974 181
M04 pyroptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 1 3297 M 04 1422 1925
E09 pyroptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 12 1751 E 09 1130 1256
E09 pyroptosis apoptosis_pyroptosis Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1
apoptosis_pyroptosis 3 2630 E 09 1771 1604


100%|██████████| 9/9 [01:31<00:00, 10.12s/it]


In [23]:
# define main_df_path
main_df_path = pathlib.Path(f"../results/{CELL_TYPE}/")
# if path does not exist, create it
main_df_path.mkdir(parents=True, exist_ok=True)
# save the dataframe
main_df.to_parquet(f"{main_df_path}/single_cell_predictions.parquet")

In [24]:
# print the number of rows in the df
print(main_df.shape)
main_df

(53, 27)


Unnamed: 0,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,Metadata_Nuclei_Location_Center_Y,Metadata_Nuclei_Location_Center_X,group,Nuclei_RadialDistribution_ZernikePhase_CorrGasdermin_3_1,Cytoplasm_RadialDistribution_ZernikePhase_CorrGasdermin_9_1,Cytoplasm_AreaShape_Zernike_9_5,Metadata_Well,Metadata_Site,Metadata_ImageNumber,...,image_ER_path,image_GasderminD_path,image_AGP_path,image_Mitochondria_path,image_compsite_crop_path,image_DAPI_crop_path,image_ER_crop_path,image_GasderminD_crop_path,image_AGP_crop_path,image_Mitochondria_crop_path
0,media_ctr_0.0_0_Media_ctr_0.0_0,149.170673,550.463942,Control,1.732595,0.246475,-0.640791,O04,7,215,...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
1,DMSO_0.100_%_DMSO_1.000_%,643.401361,1612.034014,Control,1.732594,-0.387799,-0.384997,O12,1,337,...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
2,DMSO_0.100_%_DMSO_0.025_%,791.51711,1769.718631,Control,-1.728316,0.050109,1.992991,I07,11,267,...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
3,H2O2_100.000_uM_DMSO_0.025_%,789.768421,180.649123,Control,-1.728317,-0.349258,0.574726,I04,6,214,...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
4,media_ctr_0.0_0_Media_ctr_0.0_0,667.044968,2082.415418,Control,-1.728318,-1.478286,-1.407288,O04,6,214,...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
5,Thapsigargin_1.000_uM_DMSO_0.025_%,558.660147,1048.256724,Apoptosis,1.732598,0.868999,0.601962,D06,6,70,...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
6,Thapsigargin_1.000_uM_DMSO_0.025_%,1500.613293,974.924471,Apoptosis,1.732597,-0.2859,-0.133513,K07,15,271,...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
7,Thapsigargin_1.000_uM_DMSO_0.025_%,275.74359,1278.752137,Apoptosis,1.732589,-1.171877,-1.30259,K07,11,267,...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
8,Thapsigargin_10.000_uM_DMSO_0.025_%,1747.673179,1802.420959,Apoptosis,-1.7283,-1.212621,1.156497,E07,13,269,...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
9,Thapsigargin_10.000_uM_DMSO_0.025_%,800.229426,1564.862843,Apoptosis,-1.728304,0.107422,-0.037909,E06,6,246,...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...,/home/lippincm/Documents/ML/Interstellar_Analy...
