In [1]:
import argparse
import pathlib
import random

import numpy as np
import pandas as pd
import toml
from copairs import map
from copairs.matching import assign_reference_index

# check if in a jupyter notebook
try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    parser = argparse.ArgumentParser(description="Match pairs of samples")
    parser.add_argument("--shuffle", action="store_true", help="Shuffle the data")

    args = parser.parse_args()
    shuffle = args.shuffle
else:
    shuffle = True

In [3]:
# load in the treatment groups
ground_truth = pathlib.Path(
    "../../4.sc_Morphology_Neural_Network_MLP_Model/MLP_utils/ground_truth.toml"
).resolve(strict=True)
# load in the ground truth
ground_truth = toml.load(ground_truth)
apoptosis_ground_truth = ground_truth["Apoptosis"]["apoptosis_groups_list"]
pyroptosis_ground_truth = ground_truth["Pyroptosis"]["pyroptosis_groups_list"]
control_ground_truth = ground_truth["Healthy"]["healthy_groups_list"]

map_out_dir = pathlib.Path("../data/processed/mAP_scores/morphology/")
map_out_dir.mkdir(exist_ok=True, parents=True)

In [4]:
agg_data = pathlib.Path(
    "../../data/PBMC_preprocessed_sc_norm_aggregated.parquet"
).resolve(strict=True)
df = pd.read_parquet(agg_data)
# rename oneb_Metadata_Treatment_Dose_Inhibitor_Dose to Metadata_Treatment
df = df.rename(
    columns={"oneb_Metadata_Treatment_Dose_Inhibitor_Dose": "Metadata_Treatment"}
)
df.head()

Unnamed: 0,Metadata_Well,Metadata_Treatment,Cytoplasm_AreaShape_Compactness,Cytoplasm_AreaShape_FormFactor,Cytoplasm_AreaShape_MajorAxisLength,Cytoplasm_AreaShape_MinorAxisLength,Cytoplasm_AreaShape_Orientation,Cytoplasm_AreaShape_Zernike_0_0,Cytoplasm_AreaShape_Zernike_1_1,Cytoplasm_AreaShape_Zernike_2_0,...,Nuclei_Texture_InverseDifferenceMoment_CorrER_3_03_256,Nuclei_Texture_InverseDifferenceMoment_CorrMito_3_02_256,Nuclei_Texture_InverseDifferenceMoment_CorrPM_3_00_256,Nuclei_Texture_InverseDifferenceMoment_CorrPM_3_01_256,Nuclei_Texture_InverseDifferenceMoment_CorrPM_3_02_256,Nuclei_Texture_InverseDifferenceMoment_CorrPM_3_03_256,Nuclei_Texture_SumEntropy_CorrPM_3_01_256,Nuclei_Texture_SumVariance_CorrPM_3_03_256,Nuclei_Texture_Variance_CorrER_3_02_256,Nuclei_Texture_Variance_CorrMito_3_01_256
0,B02,LPS_0.010_ug_per_ml_DMSO_0.025_%,0.100173,-0.059734,0.218567,0.111938,0.00742,-0.100946,-0.030356,-0.070701,...,0.021386,-0.095924,-0.182695,-0.185317,-0.183084,-0.189434,0.217271,0.023909,-0.015452,-0.004886
1,B03,LPS_0.010_ug_per_ml_DMSO_0.025_%,0.137279,-0.097646,0.205644,0.108021,-0.002159,-0.141895,-0.059932,-0.091195,...,0.034647,0.079415,-0.10595,-0.112622,-0.108821,-0.114137,0.141156,0.022128,-0.017276,-0.006272
2,B04,LPS_Nigericin_100.000_ug_per_ml_1.000_uM_DMSO_...,0.071345,-0.053566,0.055404,0.013373,0.004443,-0.111708,-0.084402,-0.043409,...,-0.087337,-0.67167,-0.068129,-0.06252,-0.063204,-0.066542,0.074449,-0.020061,0.022286,0.039616
3,B05,LPS_Nigericin_100.000_ug_per_ml_1.000_uM_DMSO_...,0.110685,-0.084346,0.107954,0.071923,0.00415,-0.121376,-0.075382,-0.052805,...,-0.096255,-1.263923,-0.102173,-0.099375,-0.10133,-0.100625,0.11406,-0.007227,0.009458,0.059863
4,B06,DMSO_0.100_%_DMSO_0.025_%,-0.021771,0.018442,-0.048689,-0.07049,-0.005284,-0.008255,-0.012815,-0.017174,...,0.082642,0.292318,0.029805,0.022969,0.026496,0.024827,-0.028355,-0.00784,-0.037983,-0.014871


In [5]:
# add apoptosis, pyroptosis and healthy columns to dataframe
df["Apoptosis"] = df.apply(
    lambda row: row["Metadata_Treatment"] in apoptosis_ground_truth,
    axis=1,
)
df["Pyroptosis"] = df.apply(
    lambda row: row["Metadata_Treatment"] in pyroptosis_ground_truth,
    axis=1,
)
df["Control"] = df.apply(
    lambda row: row["Metadata_Treatment"] in control_ground_truth,
    axis=1,
)

# merge apoptosis, pyroptosis, and healthy columns into one column
df["Metadata_labels"] = df.apply(
    lambda row: "Apoptosis"
    if row["Apoptosis"]
    else "Pyroptosis"
    if row["Pyroptosis"]
    else "Control",
    axis=1,
)
metadata_labels = df.pop("Metadata_labels")
df.insert(1, "Metadata_labels", metadata_labels)
# # drop apoptosis, pyroptosis, and healthy columns
df.drop(columns=["Apoptosis", "Pyroptosis", "Control"], inplace=True)

In [6]:
if shuffle:
    random.seed(0)
    # permutate the data
    for col in df.columns:
        df[col] = np.random.permutation(df[col])

In [7]:
reference_col = "Metadata_reference_index"
df_activity = assign_reference_index(
    df,
    "Metadata_Treatment == 'DMSO_0.100_%_DMSO_0.025_%'",
    reference_col=reference_col,
    default_value=-1,
)
df_activity.head()

Unnamed: 0,Metadata_Well,Metadata_labels,Metadata_Treatment,Cytoplasm_AreaShape_Compactness,Cytoplasm_AreaShape_FormFactor,Cytoplasm_AreaShape_MajorAxisLength,Cytoplasm_AreaShape_MinorAxisLength,Cytoplasm_AreaShape_Orientation,Cytoplasm_AreaShape_Zernike_0_0,Cytoplasm_AreaShape_Zernike_1_1,...,Nuclei_Texture_InverseDifferenceMoment_CorrMito_3_02_256,Nuclei_Texture_InverseDifferenceMoment_CorrPM_3_00_256,Nuclei_Texture_InverseDifferenceMoment_CorrPM_3_01_256,Nuclei_Texture_InverseDifferenceMoment_CorrPM_3_02_256,Nuclei_Texture_InverseDifferenceMoment_CorrPM_3_03_256,Nuclei_Texture_SumEntropy_CorrPM_3_01_256,Nuclei_Texture_SumVariance_CorrPM_3_03_256,Nuclei_Texture_Variance_CorrER_3_02_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Metadata_reference_index
0,B03,Control,Flagellin_0.100_ug_per_ml_DMSO_0.025_%,0.212101,-0.086266,0.199821,0.023845,-0.019501,-0.136982,-0.043793,...,-0.112343,0.130465,0.099233,-0.031162,-0.085304,-0.00505,-0.014238,0.017012,-0.011485,-1
1,O12,Pyroptosis,LPS_0.100_ug_per_ml_DMSO_0.025_%,0.158213,0.018494,-0.086838,0.020259,-0.004711,-0.026149,0.066402,...,-0.072985,0.011064,-0.068718,-0.135211,0.068626,-0.017263,-0.040739,-0.018409,-0.007831,-1
2,L06,Control,Flagellin_1.000_ug_per_ml_Disulfiram_1.000_uM,-0.075121,0.095494,0.060143,0.125031,-0.017194,-0.145544,-0.044858,...,-0.3609,0.134689,0.086578,0.095933,-0.286537,-0.078258,0.013157,-0.012277,0.02338,-1
3,K03,Pyroptosis,Thapsigargin_1.000_uM_DMSO_0.025_%,0.127851,-0.039661,-0.037166,0.051403,0.010892,-0.169862,-0.065433,...,0.229581,-0.122204,0.138037,-0.04538,-0.01282,0.082831,-0.049986,-0.00355,0.021821,-1
4,G02,Pyroptosis,Topotecan_5.000_nM_DMSO_0.025_%,0.076874,-0.009482,0.215223,0.084962,-0.007959,0.012145,-0.073524,...,0.872669,0.002117,-0.280193,-0.156617,0.026433,0.066846,0.035393,-0.001726,-0.004956,-1


In [8]:
pos_sameby = ["Metadata_Treatment", "Metadata_labels", reference_col]
pos_diffby = []
neg_sameby = []
neg_diffby = ["Metadata_Treatment", reference_col]
metadata = df_activity.filter(regex="Metadata")
profiles = df_activity.filter(regex="^(?!Metadata)").values

activity_ap = map.average_precision(
    metadata, profiles, pos_sameby, pos_diffby, neg_sameby, neg_diffby
)

activity_ap = activity_ap.query("Metadata_Treatment != 'DMSO_0.100_%_DMSO_0.025_%'")
activity_ap.head()

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


Unnamed: 0,Metadata_Well,Metadata_labels,Metadata_Treatment,Metadata_reference_index,n_pos_pairs,n_total_pairs,average_precision
0,B03,Control,Flagellin_0.100_ug_per_ml_DMSO_0.025_%,-1,2,10,0.625
1,O12,Pyroptosis,LPS_0.100_ug_per_ml_DMSO_0.025_%,-1,2,10,0.173611
2,L06,Control,Flagellin_1.000_ug_per_ml_Disulfiram_1.000_uM,-1,1,9,0.111111
3,K03,Pyroptosis,Thapsigargin_1.000_uM_DMSO_0.025_%,-1,3,11,0.303571
4,G02,Pyroptosis,Topotecan_5.000_nM_DMSO_0.025_%,-1,2,10,0.225


In [9]:
activity_map = map.mean_average_precision(
    activity_ap, pos_sameby, null_size=1000000, threshold=0.05, seed=0
)
activity_map["-log10(p-value)"] = -activity_map["corrected_p_value"].apply(np.log10)
# flatten the multi-index columns to make it easier to work with
activity_map.reset_index(inplace=True)
activity_map.head()

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

Unnamed: 0,Metadata_Treatment,Metadata_labels,Metadata_reference_index,mean_average_precision,indices,p_value,corrected_p_value,below_p,below_corrected_p,-log10(p-value)
0,DMSO_0.100_%_DMSO_1.000_%,Control,-1,0.118056,"[16, 99]",0.888463,0.888463,False,False,0.051361
1,DMSO_0.100_%_DMSO_1.000_%,Pyroptosis,-1,0.126984,"[57, 108]",0.777372,0.838046,False,False,0.076732
2,DMSO_0.100_%_Z-VAD-FMK_100.000_uM,Control,-1,0.399242,"[11, 64, 66, 116]",0.436271,0.750905,False,False,0.124415
3,DMSO_0.100_%_Z-VAD-FMK_30.000_uM,Pyroptosis,-1,0.200926,"[30, 82, 107]",0.799953,0.838046,False,False,0.076732
4,Disulfiram_0.100_uM_DMSO_0.025_%,Control,-1,0.291667,"[37, 87]",0.333306,0.637628,False,False,0.195432


In [10]:
if shuffle:
    activity_map.to_parquet(map_out_dir / "activity_map_shuffled.parquet")
else:
    activity_map.to_parquet(map_out_dir / "activity_map.parquet")