In [1]:
import argparse
import pathlib
import random

import numpy as np
import pandas as pd
import toml
from copairs import map
from copairs.matching import assign_reference_index

# check if in a jupyter notebook
try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    parser = argparse.ArgumentParser(description="Match pairs of samples")
    parser.add_argument("--shuffle", action="store_true", help="Shuffle the data")

    args = parser.parse_args()
    shuffle = args.shuffle
else:
    shuffle = True

In [3]:
# load in the treatment groups
ground_truth = pathlib.Path(
    "../../4.sc_Morphology_Neural_Network_MLP_Model/MLP_utils/ground_truth.toml"
).resolve(strict=True)
# load in the ground truth
ground_truth = toml.load(ground_truth)
apoptosis_ground_truth = ground_truth["Apoptosis"]["apoptosis_groups_list"]
pyroptosis_ground_truth = ground_truth["Pyroptosis"]["pyroptosis_groups_list"]
control_ground_truth = ground_truth["Healthy"]["healthy_groups_list"]

map_out_dir = pathlib.Path("../data/processed/mAP_scores/secretome/")
map_out_dir.mkdir(exist_ok=True, parents=True)

In [4]:
agg_data = pathlib.Path(
    "../../data/PBMC_preprocessed_sc_norm_aggregated_nomic.parquet"
).resolve(strict=True)
df = pd.read_parquet(agg_data)
# rename oneb_Metadata_Treatment_Dose_Inhibitor_Dose to Metadata_Treatment
df = df.rename(
    columns={"oneb_Metadata_Treatment_Dose_Inhibitor_Dose": "Metadata_Treatment"}
)
df = df.filter(regex="Metadata|NSU")
df.head()

Unnamed: 0,Metadata_Well,Metadata_Treatment,Activin A [NSU],AITRL (GITR Ligand) [NSU],Amphiregulin [NSU],Amyloid beta [NSU],APRIL [NSU],BAFF [NSU],BCMA (TNFRSF17) [NSU],BDNF [NSU],...,TWEAK [NSU],uPA [NSU],VCAM-1 [NSU],VEGF Receptor 2 (Flk-1) [NSU],VEGF-A (165) [NSU],VEGF-C [NSU],VEGF-D [NSU],VEGFR-1 [NSU],WISP-1 (CCN4) [NSU],XCL1 (Lymphotactin) [NSU]
0,B02,LPS_0.010_ug_per_ml_DMSO_0.025_%,0.799864,0.239781,0.771419,0.227135,0.282281,0.077979,0.517218,0.268053,...,0.463301,0.396902,0.385081,1.0,0.0,0.430111,0.538503,0.784695,0.468448,0.237545
1,B03,LPS_0.010_ug_per_ml_DMSO_0.025_%,0.758205,0.661245,0.794392,0.71292,0.236378,0.288705,1.0,0.314184,...,0.333056,0.256691,0.327491,0.390866,0.406489,0.412096,0.10483,0.812933,0.518536,0.244397
2,B04,LPS_Nigericin_100.000_ug_per_ml_1.000_uM_DMSO_...,0.128848,0.55558,0.823758,0.246641,0.249401,0.109274,0.844234,0.368186,...,0.459161,0.555221,0.357476,0.346884,0.477553,0.427658,0.642061,0.24938,0.627712,0.31835
3,B05,LPS_Nigericin_100.000_ug_per_ml_1.000_uM_DMSO_...,0.061989,0.510255,0.785998,0.615661,0.0,0.251962,0.298381,0.492203,...,0.118607,0.308536,0.588899,0.828371,0.484102,0.294634,0.673648,0.236793,0.557634,0.350429
4,B06,DMSO_0.100_%_DMSO_0.025_%,0.09771,0.461685,0.270477,0.514695,0.479281,0.270494,0.708849,0.134432,...,0.386063,0.469875,0.395392,0.560129,0.504521,0.490444,0.258834,0.238358,0.524276,0.25067


In [5]:
# add apoptosis, pyroptosis and healthy columns to dataframe
df["Apoptosis"] = df.apply(
    lambda row: row["Metadata_Treatment"] in apoptosis_ground_truth,
    axis=1,
)
df["Pyroptosis"] = df.apply(
    lambda row: row["Metadata_Treatment"] in pyroptosis_ground_truth,
    axis=1,
)
df["Control"] = df.apply(
    lambda row: row["Metadata_Treatment"] in control_ground_truth,
    axis=1,
)

# merge apoptosis, pyroptosis, and healthy columns into one column
df["Metadata_labels"] = df.apply(
    lambda row: "Apoptosis"
    if row["Apoptosis"]
    else "Pyroptosis"
    if row["Pyroptosis"]
    else "Control",
    axis=1,
)
metadata_labels = df.pop("Metadata_labels")
df.insert(1, "Metadata_labels", metadata_labels)
# # drop apoptosis, pyroptosis, and healthy columns
df.drop(columns=["Apoptosis", "Pyroptosis", "Control"], inplace=True)

In [6]:
if shuffle:
    random.seed(0)
    # permutate the data
    for col in df.columns:
        df[col] = np.random.permutation(df[col])

In [7]:
reference_col = "Metadata_reference_index"
df_activity = assign_reference_index(
    df,
    "Metadata_Treatment == 'DMSO_0.100_%_DMSO_0.025_%'",
    reference_col=reference_col,
    default_value=-1,
)
df_activity.head()

Unnamed: 0,Metadata_Well,Metadata_labels,Metadata_Treatment,Activin A [NSU],AITRL (GITR Ligand) [NSU],Amphiregulin [NSU],Amyloid beta [NSU],APRIL [NSU],BAFF [NSU],BCMA (TNFRSF17) [NSU],...,uPA [NSU],VCAM-1 [NSU],VEGF Receptor 2 (Flk-1) [NSU],VEGF-A (165) [NSU],VEGF-C [NSU],VEGF-D [NSU],VEGFR-1 [NSU],WISP-1 (CCN4) [NSU],XCL1 (Lymphotactin) [NSU],Metadata_reference_index
0,B02,Pyroptosis,LPS_0.100_ug_per_ml_DMSO_0.025_%,0.799864,0.239781,0.771419,0.227135,0.282281,0.077979,0.517218,...,0.396902,0.385081,1.0,0.0,0.430111,0.538503,0.784695,0.468448,0.237545,-1
1,B03,Pyroptosis,LPS_10.000_ug_per_ml_Disulfiram_1.000_uM,0.758205,0.661245,0.794392,0.71292,0.236378,0.288705,1.0,...,0.256691,0.327491,0.390866,0.406489,0.412096,0.10483,0.812933,0.518536,0.244397,-1
2,B04,Pyroptosis,Topotecan_20.000_nM_DMSO_0.025_%,0.128848,0.55558,0.823758,0.246641,0.249401,0.109274,0.844234,...,0.555221,0.357476,0.346884,0.477553,0.427658,0.642061,0.24938,0.627712,0.31835,-1
3,B05,Pyroptosis,Flagellin_0.100_ug_per_ml_DMSO_0.025_%,0.061989,0.510255,0.785998,0.615661,0.0,0.251962,0.298381,...,0.308536,0.588899,0.828371,0.484102,0.294634,0.673648,0.236793,0.557634,0.350429,-1
4,B06,Control,Media,0.09771,0.461685,0.270477,0.514695,0.479281,0.270494,0.708849,...,0.469875,0.395392,0.560129,0.504521,0.490444,0.258834,0.238358,0.524276,0.25067,-1


In [8]:
pos_sameby = ["Metadata_Treatment", "Metadata_labels", reference_col]
pos_diffby = []
neg_sameby = []
neg_diffby = ["Metadata_Treatment", reference_col]
metadata = df_activity.filter(regex="Metadata")
profiles = df_activity.filter(regex="^(?!Metadata)").values

activity_ap = map.average_precision(
    metadata, profiles, pos_sameby, pos_diffby, neg_sameby, neg_diffby
)

activity_ap = activity_ap.query("Metadata_Treatment != 'DMSO_0.100_%_DMSO_0.025_%'")
activity_ap.head()

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


Unnamed: 0,Metadata_Well,Metadata_labels,Metadata_Treatment,Metadata_reference_index,n_pos_pairs,n_total_pairs,average_precision
0,B02,Pyroptosis,LPS_0.100_ug_per_ml_DMSO_0.025_%,-1,0,8,
1,B03,Pyroptosis,LPS_10.000_ug_per_ml_Disulfiram_1.000_uM,-1,2,10,0.266667
2,B04,Pyroptosis,Topotecan_20.000_nM_DMSO_0.025_%,-1,2,10,0.833333
3,B05,Pyroptosis,Flagellin_0.100_ug_per_ml_DMSO_0.025_%,-1,3,11,0.302778
4,B06,Control,Media,-1,2,10,0.333333


In [9]:
activity_map = map.mean_average_precision(
    activity_ap, pos_sameby, null_size=1000000, threshold=0.05, seed=0
)
activity_map["-log10(p-value)"] = -activity_map["corrected_p_value"].apply(np.log10)
# flatten the multi-index columns to make it easier to work with
activity_map.reset_index(inplace=True)
activity_map.head()

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Unnamed: 0,Metadata_Treatment,Metadata_labels,Metadata_reference_index,mean_average_precision,indices,p_value,corrected_p_value,below_p,below_corrected_p,-log10(p-value)
0,DMSO_0.100_%_DMSO_1.000_%,Control,-1,0.75,"[46, 57, 68]",0.04432,0.343276,True,False,0.464357
1,DMSO_0.100_%_Z-VAD-FMK_100.000_uM,Control,-1,0.564087,"[32, 80, 83, 123]",0.199671,0.343276,False,False,0.464357
2,DMSO_0.100_%_Z-VAD-FMK_30.000_uM,Pyroptosis,-1,0.513889,"[7, 78, 103]",0.221914,0.343276,False,False,0.464357
3,Disulfiram_0.100_uM_DMSO_0.025_%,Control,-1,0.2,"[86, 118]",0.444527,0.492845,False,False,0.30729
4,Disulfiram_0.100_uM_DMSO_0.025_%,Pyroptosis,-1,0.333333,"[58, 94]",0.22212,0.343276,False,False,0.464357


In [10]:
if shuffle:
    activity_map.to_parquet(map_out_dir / "activity_map_shuffled.parquet")
else:
    activity_map.to_parquet(map_out_dir / "activity_map.parquet")