In [1]:
import pathlib
import warnings

import numpy as np
import pandas as pd
import plotly.express as px
from copairs.map import aggregate

warnings.filterwarnings("ignore")

In [2]:
# Directories
processed_data_dir = pathlib.Path("../data/processed/")
sc_ap_scores_dir = (processed_data_dir / "mAP_scores/morphology").resolve()
agg_sc_ap_scores_dir = (processed_data_dir / "aggregate_mAPs/morphology").resolve()
agg_sc_ap_scores_dir.mkdir(parents=True, exist_ok=True)

## Preparing the dataset


In [3]:
all_files = list(sc_ap_scores_dir.glob("*.csv"))
# get the files that contain the string class
class_files = [file for file in all_files if "treatment" in file.stem]
mAPs = []
for file in class_files:
    df = pd.read_csv(file)
    df["file"] = file.stem
    mAPs.append(df)
# single-cell mAP scores
mAPs = pd.concat(mAPs)
mAPs.head()

Unnamed: 0,Metadata_Well,Metadata_labels,average_precision,p_value,n_pos_pairs,n_total_pairs,shuffled,comparison,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,file
0,B02,Pyroptosis,1.0,1.0,3.0,3.0,non-shuffled,LPS_0.010_ug_per_ml_DMSO_0.025_%_Pyroptosis_vs...,LPS_0.010_ug_per_ml_DMSO_0.025_%,mAP_scores_regular_treatment
1,B03,Pyroptosis,1.0,1.0,3.0,3.0,non-shuffled,LPS_0.010_ug_per_ml_DMSO_0.025_%_Pyroptosis_vs...,LPS_0.010_ug_per_ml_DMSO_0.025_%,mAP_scores_regular_treatment
2,B08,Pyroptosis,1.0,1.0,3.0,3.0,non-shuffled,LPS_0.010_ug_per_ml_DMSO_0.025_%_Pyroptosis_vs...,LPS_0.010_ug_per_ml_DMSO_0.025_%,mAP_scores_regular_treatment
3,B09,Pyroptosis,1.0,1.0,3.0,3.0,non-shuffled,LPS_0.010_ug_per_ml_DMSO_0.025_%_Pyroptosis_vs...,LPS_0.010_ug_per_ml_DMSO_0.025_%,mAP_scores_regular_treatment
4,B06,Control,0.071988,1.0,7.0,77.0,non-shuffled,LPS_0.010_ug_per_ml_DMSO_0.025_%_Pyroptosis_vs...,DMSO_0.100_%_DMSO_0.025_%,mAP_scores_regular_treatment


In [4]:
# grabbing all cp features (regular, feature shuffled and labeled shuffled)
reg_sc_mAPs = mAPs.loc[mAPs["shuffled"] == "non-shuffled"]
shuffled_feat_sc_mAPs = mAPs.loc[mAPs["shuffled"] == "features_shuffled"]

In [6]:
# grouping dataframe based on phenotype levels, feature and feature types
df_group = mAPs.groupby(by=["oneb_Metadata_Treatment_Dose_Inhibitor_Dose", "shuffled"])

# calculating sampling error
sampling_error_df = []
for name, df in df_group:
    pheno, shuffled_type = name

    # caclulating sampling error
    avg_percision = df["average_precision"].values
    sampling_error = np.std(avg_percision) / np.sqrt(len(avg_percision))

    sampling_error_df.append([pheno, shuffled_type, sampling_error])
cols = ["oneb_Metadata_Treatment_Dose_Inhibitor_Dose", "shuffled", "sampling_error"]
sampling_error_df = pd.DataFrame(sampling_error_df, columns=cols)

sampling_error_df.head()

Unnamed: 0,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,shuffled,sampling_error
0,DMSO_0.100_%_DMSO_0.025_%,non-shuffled,0.007061
1,DMSO_0.100_%_DMSO_1.000_%,non-shuffled,0.016898
2,DMSO_0.100_%_Z-VAD-FMK_100.000_uM,non-shuffled,0.010757
3,DMSO_0.100_%_Z-VAD-FMK_30.000_uM,non-shuffled,0.015292
4,Disulfiram_0.100_uM_DMSO_0.025_%,non-shuffled,0.010588


In [8]:
# Generating aggregate scores with a threshold p-value of 0.05
mAP_dfs = []
for name, df in tuple(
    mAPs.groupby(by=["oneb_Metadata_Treatment_Dose_Inhibitor_Dose", "shuffled"])
):
    agg_df = aggregate(
        df, sameby=["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"], threshold=0.05
    )
    agg_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = name[0]
    agg_df["shuffled"] = name[1]
    mAP_dfs.append(agg_df)

mAP_dfs = pd.concat(mAP_dfs)
mAP_dfs.to_csv(agg_sc_ap_scores_dir / "mAP_scores_treatment.csv", index=False)
mAP_dfs.head()

Unnamed: 0,oneb_Metadata_Treatment_Dose_Inhibitor_Dose,mean_average_precision,nlog10pvalue,q_value,nlog10qvalue,above_p_threshold,above_q_threshold,shuffled
0,DMSO_0.100_%_DMSO_0.025_%,0.175935,-0.0,1.0,-0.0,False,False,non-shuffled
0,DMSO_0.100_%_DMSO_1.000_%,0.583296,0.69897,0.2,0.69897,False,False,non-shuffled
0,DMSO_0.100_%_Z-VAD-FMK_100.000_uM,0.401186,0.69897,0.2,0.69897,False,False,non-shuffled
0,DMSO_0.100_%_Z-VAD-FMK_30.000_uM,0.407908,0.69897,0.2,0.69897,False,False,non-shuffled
0,Disulfiram_0.100_uM_DMSO_0.025_%,0.110472,0.09691,0.8,0.09691,False,False,non-shuffled
