## Imports

In [1]:
import pathlib

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tifffile as tf  # write tiff files
from cytocherrypick.calculations import find_median
from PIL import Image  # read tiff files
from toml import load
from tqdm import tqdm  # progress bar

In [2]:
CELL_TYPE = "SHSY5Y"

In [3]:
agg_cell_path = pathlib.Path(f"../../../data/{CELL_TYPE}_preprocessed_sc_norm.parquet")
agg_cell_df = pd.read_parquet(agg_cell_path)

columns_to_load = [
    "Metadata_Nuclei_Location_Center_Y",
    "Metadata_Nuclei_Location_Center_X",
]
# get the unfeature selected data
unselected_df_path = pathlib.Path(
    "data/{CELL_TYPE}_sc.parquet",
    columns=columns_to_load,
)

In [4]:
# Get the current working directory of the repository
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")
root_dir

PosixPath('/home/lippincm/Documents/ML/Interstellar_Analysis')

In [5]:
image_out_dir_path = pathlib.Path(f"{root_dir}/8.cytopick_analysis/figures/PBMC/")


# if path does not exist, create it
image_out_dir_path.mkdir(parents=True, exist_ok=True)

In [6]:
# define directories
# where the images are on a local machine
# this is a hard coded path to the 1TB image directory

#####
# THIS PATH NEEDS TO BE CHANGED TO THE LOCAL IMAGE DIRECTORY ON YOUR MACHINE
#####

image_dir_path = pathlib.Path(
    "/media/lippincm/18T/interstellar_data/70117_20230210MM1_Gasdermin514_CP_BC430856__2023-03-22T15_42_38-Measurement1/2.IC/"
).resolve(strict=True)

In [7]:
# path
anova_path = pathlib.Path(
    f"../../../1.Exploratory_Data_Analysis/results/{CELL_TYPE}_combined.parquet"
)
# read in the anova results
anova_results = pd.read_parquet(anova_path)

## define the groups

In [8]:
# read in the ground truth data
data_path_ground_truth = (
    "../../../4.sc_Morphology_Neural_Network_MLP_Model/MLP_utils/ground_truth.toml"
)
ground_truth = load(data_path_ground_truth)

# make a a list of the treatments that are in the ground truth data
apoptosis_ground_truth_list = ground_truth["Apoptosis"]["apoptosis_groups_list"]
pyroptosis_ground_truth_list = ground_truth["Pyroptosis"]["pyroptosis_groups_list"]
control_ground_truth_list = ground_truth["Healthy"]["healthy_groups_list"]

# replace Flagellin_1.000_ug_per_ml_DMSO_0.0_% with Flagellin_1.000_ug_per_ml_DMSO_0.025_%
agg_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = agg_cell_df[
    "oneb_Metadata_Treatment_Dose_Inhibitor_Dose"
].replace(
    "Flagellin_1.000_ug_per_ml_DMSO_0.0_%", "Flagellin_1.000_ug_per_ml_DMSO_0.025_%"
)
# convert media_ctr_0.0_ug_per_ml_Media_ctr_0_0 to media_ctr_0.0_ug_per_ml_Media_ctr_0_025
agg_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = agg_cell_df[
    "oneb_Metadata_Treatment_Dose_Inhibitor_Dose"
].replace("media_ctr_0.0_ug_per_ml_Media_ctr_0_0", "media_ctr_0.0_0_Media_ctr_0.0_0")

# make a new column that is the treatment group based on the ground truth data
agg_cell_df["group"] = "NA"
agg_cell_df.loc[
    agg_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].isin(
        apoptosis_ground_truth_list
    ),
    "group",
] = "Apoptosis"
agg_cell_df.loc[
    agg_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].isin(
        pyroptosis_ground_truth_list
    ),
    "group",
] = "Pyroptosis"
agg_cell_df.loc[
    agg_cell_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].isin(
        control_ground_truth_list
    ),
    "group",
] = "Control"

# make the group column a category
agg_cell_df["group"] = pd.Categorical(
    agg_cell_df["group"],
    categories=["Control", "Apoptosis", "Pyroptosis"],
    ordered=True,
)

agg_cell_df["group"].unique()

['Control', 'Apoptosis', 'Pyroptosis']
Categories (3, object): ['Control' < 'Apoptosis' < 'Pyroptosis']

In [9]:
# sperate the data into the different groups
control_df = agg_cell_df[agg_cell_df["group"] == "Control"]
apoptosis_df = agg_cell_df[agg_cell_df["group"] == "Apoptosis"]
pyroptosis_df = agg_cell_df[agg_cell_df["group"] == "Pyroptosis"]

In [10]:
# create a column that adds group1 and group2 together
anova_results["group"] = anova_results["group1"] + "_" + anova_results["group2"]
print(anova_results.shape)

# filter out rows that have p-adj_abs > 0.05
anova_results = anova_results[anova_results["p-adj_abs"] < 0.05]
print(anova_results.shape)

# change the group names to replace healthy with control
anova_results["group"] = anova_results["group"].str.replace("healthy", "control")
# make a -log10(p-adj) column
anova_results["neg-log10(p-adj_abs)"] = -np.log10(anova_results["p-adj_abs"])
# sort by neg-log10(p-adj_abs)
anova_results = anova_results.sort_values(by="neg-log10(p-adj_abs)", ascending=False)
# split the dfs into comparisons
c_p_df = anova_results[anova_results["group"] == "control_pyroptosis"]
a_c_df = anova_results[anova_results["group"] == "apoptosis_control"]
a_p_df = anova_results[anova_results["group"] == "apoptosis_pyroptosis"]
# sort by neg-log10(p-adj_abs)
c_p_df = c_p_df.sort_values(by="neg-log10(p-adj_abs)", ascending=False)
a_c_df = a_c_df.sort_values(by="neg-log10(p-adj_abs)", ascending=False)
a_p_df = a_p_df.sort_values(by="neg-log10(p-adj_abs)", ascending=False)

(3753, 11)
(1753, 11)


  result = getattr(ufunc, method)(*inputs, **kwargs)


In [11]:
# get the top 10 features for each comparison
c_p_top10 = c_p_df.iloc[:10, :]
a_c_top10 = a_c_df.iloc[:10, :]
a_p_top10 = a_p_df.iloc[:10, :]

c_p_top10["features"].to_list()
a_c_top10["features"].to_list()
a_p_top10["features"].to_list()
dict_of_top_all = {}
dict_of_top_all["control_pyroptosis"] = c_p_top10["features"].to_list()
dict_of_top_all["apoptosis_control"] = a_c_top10["features"].to_list()
dict_of_top_all["apoptosis_pyroptosis"] = a_p_top10["features"].to_list()

In [12]:
group_dict = {}
final_dict = {}

for i in tqdm(dict_of_top_all):
    for feature in dict_of_top_all[i]:
        for df in [control_df, apoptosis_df, pyroptosis_df]:
            df = df.sort_values(by=feature, ascending=False, inplace=False)
            df.reset_index(inplace=True, drop=True)
            df = df.iloc[0, :]
            group_dict[feature] = df
    final_dict[i] = group_dict

100%|██████████| 3/3 [01:15<00:00, 25.28s/it]


## Get the images

In [13]:
# define a dictionary for coding the wells and FOVs correctly
well_dict = {
    "A": "01",
    "B": "02",
    "C": "03",
    "D": "04",
    "E": "05",
    "F": "06",
    "G": "07",
    "H": "08",
    "I": "09",
    "J": "10",
    "K": "11",
    "L": "12",
    "M": "13",
    "N": "14",
    "O": "15",
    "P": "16",
}
column_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
    "17": "17",
    "18": "18",
    "19": "19",
    "20": "20",
    "21": "21",
    "22": "22",
    "23": "23",
    "24": "24",
}
fov_dict = {
    "1": "01",
    "2": "02",
    "3": "03",
    "4": "04",
    "5": "05",
    "6": "06",
    "7": "07",
    "8": "08",
    "9": "09",
    "10": "10",
    "11": "11",
    "12": "12",
    "13": "13",
    "14": "14",
    "15": "15",
    "16": "16",
}

In [14]:
image_basename_1 = "p04-ch1sk1fk1fl1_IC.tiff"
image_basename_2 = "p04-ch2sk1fk1fl1_IC.tiff"
image_basename_3 = "p04-ch3sk1fk1fl1_IC.tiff"
image_basename_4 = "p04-ch4sk1fk1fl1_IC.tiff"
image_basename_5 = "p04-ch5sk1fk1fl1_IC.tiff"

In [15]:
# set constants for the loop
radius = 50
# define the number of cells to select
n = 5

In [16]:
# define an empty df
main_df = apoptosis_df.drop(apoptosis_df.index)

In [17]:
for group in final_dict:
    print(group)
    for df in final_dict[group]:
        tmp_df = pd.DataFrame(final_dict[group][df]).T
        image_id = tmp_df["Metadata_ImageNumber"].astype(int).astype(str)
        fov_id = tmp_df["Metadata_Site"].astype(int).astype(str)
        cell_id = tmp_df["Metadata_Cells_Number_Object_Number"]
        well_id = tmp_df["Metadata_Well"]
        row_id = well_id[0]
        column_id = well_id[1:]
        center_x = tmp_df["Metadata_Nuclei_Location_Center_X"].astype(int)
        center_y = tmp_df["Metadata_Nuclei_Location_Center_Y"].astype(int)
        # create a custom and contstant bounding box for the images
        # this is made from the extracted center_x and center_y of the cell (nucleus)
        min_x_box = center_x - radius
        max_x_box = center_x + radius
        min_y_box = center_y - radius
        max_y_box = center_y + radius

print(center_x, center_y)

control_pyroptosis


KeyError: 'Metadata_Nuclei_Location_Center_X'

In [None]:
tmp_df