## Calculate Precision and Recall of profile clusters

Given correlations, can we retrieve profiles of similar MOAs?

### Part 2 - Calculate precision and recall for each MOA/target

In [1]:
import pathlib
import pandas as pd
import warnings

from scripts.precision_recall_utils import process_precision_matching

In [2]:
warnings.filterwarnings("ignore", category=RuntimeWarning)

In [3]:
# Load results
assay = "cell_painting"

results_dir = pathlib.Path("results")
results_file = pathlib.Path(results_dir, f"dose_corr_matching_moa_target_{assay}.tsv.gz")

results_df = pd.read_csv(results_file, sep="\t")

print(results_df.shape)
results_df.head(2)

(5172648, 12)


Unnamed: 0,pert_iname,moa,Metadata_target,Metadata_dose_recode,original_index,correlation,pert_iname_compare,moa_compare,Metadata_target_compare,Metadata_dose_recode_compare,match_moa,match_target
0,zaldaride,calmodulin antagonist,CALM1,1,1,0.126103,clobetasol,glucocorticoid receptor agonist,NR3C1|PLA2G1B,1,False,False
1,saquinavir,hiv protease inhibitor,CYP3A4,1,1,0.023967,clobetasol,glucocorticoid receptor agonist,NR3C1|PLA2G1B,1,False,False


In [4]:
# Calculate scores
cp_precision_df = process_precision_matching(results_df).assign(assay=assay)

print(cp_precision_df.shape)
cp_precision_df.head(2)

(4536, 5)


Unnamed: 0,drug_impact,dose,avg_precision,impact_category,assay
0,5 alpha reductase inhibitor,1,0.001601,moa,cell_painting
1,5 alpha reductase inhibitor,2,0.00149,moa,cell_painting


In [5]:
# Load results
assay = "L1000"

results_dir = pathlib.Path("results")
results_file = pathlib.Path(results_dir, f"dose_corr_matching_moa_target_{assay}.tsv.gz")

results_df = pd.read_csv(results_file, sep="\t")

# Calculate scores
l1000_precision_df = process_precision_matching(results_df, dose_col="dose").assign(assay=assay)

print(l1000_precision_df.shape)
l1000_precision_df.head(4)

(4536, 5)


Unnamed: 0,drug_impact,dose,avg_precision,impact_category,assay
0,5 alpha reductase inhibitor,1,0.006042,moa,L1000
1,5 alpha reductase inhibitor,2,0.002398,moa,L1000
2,5 alpha reductase inhibitor,3,0.005141,moa,L1000
3,5 alpha reductase inhibitor,4,0.002491,moa,L1000


In [6]:
# Combine and output scores
precision_df = pd.concat([cp_precision_df, l1000_precision_df], axis="rows").reset_index(drop=True)

output_file = pathlib.Path("results", "moa_target_precision.tsv.gz")
precision_df.to_csv(output_file, sep="\t", index=False)

print(precision_df.shape)
precision_df.head()

(9072, 5)


Unnamed: 0,drug_impact,dose,avg_precision,impact_category,assay
0,5 alpha reductase inhibitor,1,0.001601,moa,cell_painting
1,5 alpha reductase inhibitor,2,0.00149,moa,cell_painting
2,5 alpha reductase inhibitor,3,0.001189,moa,cell_painting
3,5 alpha reductase inhibitor,4,0.001618,moa,cell_painting
4,5 alpha reductase inhibitor,5,0.002347,moa,cell_painting
