## Compile supplementary tables


| Data | Table |
| :--- | :---: |
| Compound to MOA/Target map | Table S1 |
| Per compound median pairwise Spearman correlations | Table S2 |
| Overrepresentation analysis for compounds that change morphology highly but induce low transcriptional activity | Table S3 |
| Per MOA median pairwise Spearman correlations | Table S4 |
| Percent matching MOA | Table S4 |
| Average precision MOA | Table S4 |
| Per target median pairwise Spearman correlations | Table S5 |
| Average precision targets | Table S5 |


In [1]:
suppressPackageStartupMessages(library(dplyr))

source("viz_themes.R")
source("plotting_functions.R")
source("data_functions.R")

In [2]:
updated_dose_rename <- dose_rename
names(updated_dose_rename) <- paste(format(round(as.integer(names(updated_dose_rename)), 1), nsmall = 1))
updated_dose_rename <- c(updated_dose_rename, "All" = "All")
updated_dose_rename

In [3]:
assay_update <- c("cell_painting" = "Cell Painting", "l1000" = "L1000")

## Table S1 - Compound/MOA Map

In [4]:
# Load compound to moa map
file <- file.path(
    "..",
    "1.Data-exploration",
    "Consensus",
    "cell_painting",
    "moa_sizes_consensus_datasets",
    "cell_painting_moa_analytical_set_profiles.tsv.gz"
)

df_cols <- readr::cols(
  .default = readr::col_double(),
  Metadata_Plate_Map_Name = readr::col_character(),
  Metadata_cell_id = readr::col_character(),
  Metadata_broad_sample = readr::col_character(),
  Metadata_pert_well = readr::col_character(),
  Metadata_time_point = readr::col_character(),
  Metadata_moa = readr::col_character(),
  Metadata_target = readr::col_character(),
  broad_id = readr::col_character(),
  pert_iname = readr::col_character(),
  moa = readr::col_character()
)

cp_df <- readr::read_tsv(file, col_types = df_cols) %>%
    dplyr::select(broad_id, pert_iname, Metadata_dose_recode, moa, Metadata_target) %>%
    dplyr::distinct() %>%
    dplyr::arrange(pert_iname) %>%
    dplyr::rename(target = Metadata_target, dose = Metadata_dose_recode)

cp_df$pert_iname <- tolower(cp_df$pert_iname)
cp_df$moa <- tolower(cp_df$moa)

print(dim(cp_df))
head(cp_df)

[1] 5574    5


broad_id,pert_iname,dose,moa,target
<chr>,<chr>,<dbl>,<chr>,<chr>
BRD-A29731977,17-hydroxyprogesterone-caproate,6,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-A29731977,17-hydroxyprogesterone-caproate,5,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-A29731977,17-hydroxyprogesterone-caproate,4,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-A29731977,17-hydroxyprogesterone-caproate,3,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-A29731977,17-hydroxyprogesterone-caproate,2,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-A29731977,17-hydroxyprogesterone-caproate,1,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR


In [5]:
# Load compound to moa map
file <- file.path(
    "..",
    "1.Data-exploration",
    "Consensus",
    "L1000",
    "moa_sizes_consensus_datasets",
    "L1000_moa_analytical_set_profiles.tsv.gz"
)

df_cols <- readr::cols(
  .default = readr::col_double(),
  sig_id = readr::col_character(),
  pert_id = readr::col_character(),
  pert_idose = readr::col_character(),
  pert_iname = readr::col_character(),
  moa = readr::col_character()
)

l1000_df <- readr::read_tsv(file, col_types = df_cols) %>%
    dplyr::full_join(
        cp_df,
        by = c(
            "pert_id" = "broad_id",
            "pert_iname" = "pert_iname",
            "moa" = "moa",
            "dose" = "dose"
        )
    ) %>%
    dplyr::select(pert_id, pert_iname, dose, moa, target) %>%
    dplyr::distinct() %>%
    dplyr::rename(broad_id = pert_id)

l1000_df$pert_iname <- tolower(l1000_df$pert_iname)
l1000_df$moa <- tolower(l1000_df$moa)

print(dim(l1000_df))
head(l1000_df)

[1] 5754    5


broad_id,pert_iname,dose,moa,target
<chr>,<chr>,<dbl>,<chr>,<chr>
BRD-K25114078,aminoguanidine,6,nitric oxide synthase inhibitor,AKR1B1|NOS2|TIMP3
BRD-K25114078,aminoguanidine,5,nitric oxide synthase inhibitor,AKR1B1|NOS2|TIMP3
BRD-K25114078,aminoguanidine,4,nitric oxide synthase inhibitor,AKR1B1|NOS2|TIMP3
BRD-K25114078,aminoguanidine,3,nitric oxide synthase inhibitor,AKR1B1|NOS2|TIMP3
BRD-K25114078,aminoguanidine,2,nitric oxide synthase inhibitor,AKR1B1|NOS2|TIMP3
BRD-K25114078,aminoguanidine,1,nitric oxide synthase inhibitor,AKR1B1|NOS2|TIMP3


In [6]:
map_df <- dplyr::bind_rows(cp_df, l1000_df) %>%
    dplyr::distinct()

map_df$dose <- dplyr::recode_factor(map_df$dose, !!!dose_rename)

map_df <- map_df %>%
    dplyr::arrange(pert_iname, dose)

# This is Supplementary Table 4
output_file <- file.path("results", "tableS1_compound_to_moa_target_map.tsv")
map_df %>% readr::write_tsv(output_file)

print(dim(map_df))
head(map_df, 7)

[1] 5754    5


broad_id,pert_iname,dose,moa,target
<chr>,<chr>,<fct>,<chr>,<chr>
BRD-A29731977,17-hydroxyprogesterone-caproate,0.04 uM,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-A29731977,17-hydroxyprogesterone-caproate,0.12 uM,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-A29731977,17-hydroxyprogesterone-caproate,0.37 uM,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-A29731977,17-hydroxyprogesterone-caproate,1.11 uM,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-A29731977,17-hydroxyprogesterone-caproate,3.33 uM,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-A29731977,17-hydroxyprogesterone-caproate,10 uM,progesterone receptor agonist,AR|ESR1|ESR2|NR1H4|NR1I2|NR3C1|PGR
BRD-K07954936,2-iminobiotin,0.04 uM,nitric oxide synthase inhibitor,NOS1|NOS2


## Table S2 - per compound median pairwise correlations 

In [7]:
# Load median pairwise Spearman correlations
compound_cols <- readr::cols(
  compound = readr::col_character(),
  no_of_compounds = readr::col_double(),
  well = readr::col_character(),
  dose_recode = readr::col_double(),
  median_score = readr::col_double(),
  p_value = readr::col_double(),
  assay = readr::col_character(),
  normalization = readr::col_character(),
  category = readr::col_character(),
  pass_thresh = readr::col_logical(),
  neg_log_10_p_val = readr::col_double(),
  dose = readr::col_character()
)

# This is Supplementary Table 1
compound_df <- readr::read_tsv(file.path("results", "compound_scores.tsv"), col_types = compound_cols) %>%
    dplyr::select(compound, dose, no_of_compounds, well, median_score, p_value, assay) %>%
    dplyr::rename(
        no_of_replicates_per_compound = no_of_compounds,
        median_replicate_correlation = median_score
    )

# Output sup table 1
output_file <- file.path("results", "tableS2_compound_percent_replicating.tsv")
compound_df %>% readr::write_tsv(output_file)

print(dim(compound_df))
head(compound_df, 3)

[1] 15138     7


compound,dose,no_of_replicates_per_compound,well,median_replicate_correlation,p_value,assay
<chr>,<chr>,<dbl>,<chr>,<dbl>,<dbl>,<chr>
17-hydroxyprogesterone-caproate,0.04 uM,5,N18,0.05455739,0.009,Cell Painting
2-iminobiotin,0.04 uM,5,B12,0.05379096,0.004,Cell Painting
3-amino-benzamide,0.04 uM,5,D24,0.11193277,0.001,Cell Painting


## Table S3 - Overrepresentation Analysis

In [8]:
# Load Signature Strength and MAS scores
ora_results_dir <- file.path("..", "5.Gene-analysis", "results")

ora_results_file <- file.path(ora_results_dir, "ora_compound_results.tsv")

ora_cols <- readr::cols(
    geneSet = readr::col_character(),
    description = readr::col_character(),
    link = readr::col_character(),
    size = readr::col_double(),
    overlap = readr::col_double(),
    expect = readr::col_double(),
    enrichmentRatio = readr::col_double(),
    pValue = readr::col_double(),
    FDR = readr::col_double(),
    overlapId = readr::col_character(),
    database = readr::col_character(),
    userId = readr::col_character(),
    compound = readr::col_character()
)

ora_df <- readr::read_tsv(ora_results_file, col_types = ora_cols) %>%
    dplyr::left_join(
        map_df %>%
            dplyr::select(broad_id, pert_iname, moa, target) %>%
            dplyr::distinct(),
        by = c("compound" = "pert_iname")
        )

# This is Supplementary Table 5
output_file <- file.path("results", "tableS3_overrepresentationanalysis.tsv")
ora_df %>% readr::write_tsv(output_file)

print(dim(ora_df))
head(ora_df, 3)

[1] 80 16


geneSet,description,link,size,overlap,expect,enrichmentRatio,pValue,FDR,overlapId,database,userId,compound,broad_id,moa,target
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
GO:0003824,catalytic activity,http://amigo.geneontology.org/amigo/term/GO:0003824,448,22,13.9249732,1.579895,0.001851467,1,329;998;1017;1026;1643;1738;1983;2778;3162;5255;5603;8318;9133;10038;10730;23139;23536;27244;55008;55256;55699;60493,geneontology_Molecular_Function,PHKA1;MAST2;SESN1;CDK2;IARS2;CDKN1A;MAPK13;DLD;CDC42;HMOX1;ADAT1;BIRC2;HERC6;CDC45;ADI1;YME1L1;FASTKD5;GNAS;CCNB2;EIF5;DDB2;PARP2,alisertib,BRD-K75295174,aurora kinase inhibitor,AURKA
GO:0004674,protein serine/threonine kinase activity,http://amigo.geneontology.org/amigo/term/GO:0004674,68,7,2.113612,3.311866,0.003438725,1,998;1017;1026;5255;5603;9133;23139,geneontology_Molecular_Function,PHKA1;MAST2;CDK2;CDKN1A;MAPK13;CDC42;CCNB2,alisertib,BRD-K75295174,aurora kinase inhibitor,AURKA
GO:1902554,serine/threonine protein kinase complex,http://amigo.geneontology.org/amigo/term/GO:1902554,23,4,0.7148982,5.595202,0.004434779,1,1017;1026;5255;9133,geneontology_Cellular_Component,PHKA1;CDK2;CDKN1A;CCNB2,alisertib,BRD-K75295174,aurora kinase inhibitor,AURKA


## Table S4 - MOA percent matching/Avg Precision

In [9]:
# Load MOA percent matching
moa_cols <- readr::cols(
  moa = readr::col_character(),
  no_of_replicates = readr::col_double(),
  dose = readr::col_character(),
  matching_score = readr::col_double(),
  assay = readr::col_character(),
  p_value = readr::col_double(),
  pass_thresh = readr::col_logical(),
  neg_log_10_p_val = readr::col_double()
)

moa_df <- readr::read_tsv(file.path("results", "moa_scores.tsv"), col_types = moa_cols) %>%
    dplyr::select(moa, dose, no_of_replicates, matching_score, p_value, assay) %>%
    dplyr::rename(
        no_of_compounds_per_moa = no_of_replicates,
        median_replicate_correlation = matching_score
    )

print(dim(moa_df))
head(moa_df, 3)

[1] 2200    6


moa,dose,no_of_compounds_per_moa,median_replicate_correlation,p_value,assay
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>
acat inhibitor,0.04 uM,3,0.004444607,0.513,Cell Painting
acetylcholine receptor agonist,0.04 uM,8,0.005142343,0.576,Cell Painting
acetylcholine receptor antagonist,0.04 uM,21,0.019262183,0.017,Cell Painting


In [10]:
# Load MOA and target average precision
file <- file.path(
    "..",
    "1.Data-exploration",
    "results",
    "moa_target_precision.tsv.gz"
)

df_cols <- readr::cols(
  drug_impact = readr::col_character(),
  dose = readr::col_character(),
  avg_precision = readr::col_double(),
  impact_category = readr::col_character(),
  assay = readr::col_character(),
  dose_comparison = readr::col_character()
)

avg_precision_df <- readr::read_tsv(file, col_types = df_cols)

# Update dose and assay column
avg_precision_df$dose <- dplyr::recode_factor(avg_precision_df$dose, !!!updated_dose_rename)
avg_precision_df$assay <- dplyr::recode_factor(avg_precision_df$assay, !!!assay_update)

print(dim(avg_precision_df))
head(avg_precision_df, 3)

[1] 10584     6


drug_impact,dose,avg_precision,impact_category,assay,dose_comparison
<chr>,<fct>,<dbl>,<chr>,<fct>,<chr>
5 alpha reductase inhibitor,0.04 uM,0.001601281,moa,Cell Painting,same_dose
5 alpha reductase inhibitor,0.12 uM,0.001490313,moa,Cell Painting,same_dose
5 alpha reductase inhibitor,0.37 uM,0.001189061,moa,Cell Painting,same_dose


In [11]:
# Generate sup table 4
sup_table_4_df <- avg_precision_df %>%
    dplyr::filter(impact_category == "moa") %>%
    dplyr::inner_join(
        moa_df,
        by = c("drug_impact" = "moa", "dose" = "dose", "assay" = "assay")
    ) %>%
    dplyr::select(
        drug_impact,
        dose,
        assay,
        avg_precision,
        no_of_compounds_per_moa,
        median_replicate_correlation,
        p_value
    ) %>%
    dplyr::rename(
        p_value_percent_matching = p_value,
        moa = drug_impact
    ) %>%
    dplyr::arrange(moa)


output_file <- file.path("results", "tableS4_moa_metrics.tsv")
sup_table_4_df %>% readr::write_tsv(output_file)

head(sup_table_4_df)

“Column `dose` joining factor and character vector, coercing into character vector”
“Column `assay` joining factor and character vector, coercing into character vector”


moa,dose,assay,avg_precision,no_of_compounds_per_moa,median_replicate_correlation,p_value_percent_matching
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
5 alpha reductase inhibitor,All,Cell Painting,0.0009074757,12,-0.018411522,1.0
5 alpha reductase inhibitor,All,L1000,0.001144654,12,0.08463029,0.0
acat inhibitor,0.04 uM,Cell Painting,0.0025054914,3,0.004444607,0.513
acat inhibitor,0.12 uM,Cell Painting,0.0055769517,3,0.02324071,0.247
acat inhibitor,0.37 uM,Cell Painting,0.0025420775,3,-0.017092617,0.797
acat inhibitor,1.11 uM,Cell Painting,0.0026907806,3,-0.080471943,0.997


## Table S5 - Gene target percent matching/Avg Precision

In [12]:
file <- file.path(
    "..",
    "1.Data-exploration",
    "results",
    "gene_target_median_pairwise_correlations.tsv.gz"
)

target_cols <- readr::cols(
  target = readr::col_character(),
  dose = readr::col_character(),
  median_correlation = readr::col_double(),
  n_compounds = readr::col_double(),
  assay = readr::col_character()
)

target_cor_df <- readr::read_tsv(file, col_types = target_cols)

target_update_dose_rename <- c(dose_rename, "All" = "All")

target_cor_df$dose <- dplyr::recode_factor(target_cor_df$dose, !!!target_update_dose_rename)

print(dim(target_cor_df))
head(target_cor_df)

[1] 7637    5


target,dose,median_correlation,n_compounds,assay
<chr>,<fct>,<dbl>,<dbl>,<chr>
RPL3,All,0.823493,6,L1000
HSP90AA1,1.11 uM,0.7603772,2,L1000
HSP90AA1,3.33 uM,0.7384674,2,L1000
HSP90AA1,10 uM,0.7377925,2,L1000
RPL3,All,0.7352301,6,Cell Painting
HSP90AA1,All,0.7295974,12,L1000


In [13]:
sup_table_5_df <- avg_precision_df %>%
    dplyr::filter(impact_category == "target") %>%
    dplyr::inner_join(
        target_cor_df,
        by = c("drug_impact" = "target", "dose" = "dose", "assay" = "assay")
    ) %>%
    dplyr::select(
        drug_impact,
        dose,
        assay,
        avg_precision,
        n_compounds,
        median_correlation,
    ) %>%
    dplyr::rename(
        target = drug_impact,
        no_of_compounds_per_target = n_compounds
    ) %>%
    dplyr::arrange(target, dose)

output_file <- file.path("results", "tableS5_target_metrics.tsv")
sup_table_5_df %>% readr::write_tsv(output_file)

print(dim(sup_table_5_df))
head(sup_table_5_df)

“Column `assay` joining factor and character vector, coercing into character vector”


[1] 7630    6


target,dose,assay,avg_precision,no_of_compounds_per_target,median_correlation
<chr>,<fct>,<chr>,<dbl>,<dbl>,<dbl>
ABAT|ACADSB|ALDH5A1|HDAC1|HDAC2|HDAC9|OGDH|SCN10A|SCN11A|SCN1A|SCN1B|SCN2A|SCN2B|SCN3A|SCN3B|SCN4A|SCN4B|SCN5A|SCN7A|SCN8A|SCN9A,0.04 uM,Cell Painting,0.02802298,31,0.005335295
ABAT|ACADSB|ALDH5A1|HDAC1|HDAC2|HDAC9|OGDH|SCN10A|SCN11A|SCN1A|SCN1B|SCN2A|SCN2B|SCN3A|SCN3B|SCN4A|SCN4B|SCN5A|SCN7A|SCN8A|SCN9A,0.04 uM,L1000,0.04340889,30,0.029659288
ABAT|ACADSB|ALDH5A1|HDAC1|HDAC2|HDAC9|OGDH|SCN10A|SCN11A|SCN1A|SCN1B|SCN2A|SCN2B|SCN3A|SCN3B|SCN4A|SCN4B|SCN5A|SCN7A|SCN8A|SCN9A,0.12 uM,Cell Painting,0.03459945,31,0.007825567
ABAT|ACADSB|ALDH5A1|HDAC1|HDAC2|HDAC9|OGDH|SCN10A|SCN11A|SCN1A|SCN1B|SCN2A|SCN2B|SCN3A|SCN3B|SCN4A|SCN4B|SCN5A|SCN7A|SCN8A|SCN9A,0.12 uM,L1000,0.02492024,30,0.03017836
ABAT|ACADSB|ALDH5A1|HDAC1|HDAC2|HDAC9|OGDH|SCN10A|SCN11A|SCN1A|SCN1B|SCN2A|SCN2B|SCN3A|SCN3B|SCN4A|SCN4B|SCN5A|SCN7A|SCN8A|SCN9A,0.37 uM,Cell Painting,0.03565959,31,0.009868759
ABAT|ACADSB|ALDH5A1|HDAC1|HDAC2|HDAC9|OGDH|SCN10A|SCN11A|SCN1A|SCN1B|SCN2A|SCN2B|SCN3A|SCN3B|SCN4A|SCN4B|SCN5A|SCN7A|SCN8A|SCN9A,0.37 uM,L1000,0.0456296,30,0.027971655


## Table S6 - Deep learning average precision

In [14]:
# Load average precision scores for all models and targets
metric_dir <- file.path("..", "2.MOA-prediction", "metrics")
ap_file <- file.path(metric_dir, "average_precision_full_results.tsv.gz")

ap_cols <- readr::cols(
  average_precision = readr::col_double(),
  target = readr::col_character(),
  assay = readr::col_character(),
  model = readr::col_character(),
  shuffle = readr::col_logical(),
  data_split = readr::col_character(),
  target_category = readr::col_character()
)

ap_df <- readr::read_tsv(ap_file, col_types = ap_cols)

ap_df$assay <- dplyr::recode(
    ap_df$assay, "cp" = "Cell_Painting", "L1000" = "L1000"
)

ap_df$model <- dplyr::recode(
    ap_df$model,
        "mlknn" = "KNN",
        "simplenn" = "Simple_NN",
        "resnet" = "ResNet",
        "1dcnn" = "1D_CNN",
        "tabnet" = "TabNet",
        "blend" = "Ensemble"
)

# This is Supplementary Table 6
output_file <- file.path("results", "tableS6_deeplearning_avgprecision.tsv.gz")
ora_df %>% readr::write_tsv(output_file)

print(dim(ap_df))
head(ap_df, 2)

[1] 48547     9


average_precision,target,assay,model,shuffle,data_split,subsample_status,target_category,n_pos_count
<dbl>,<chr>,<chr>,<chr>,<lgl>,<chr>,<lgl>,<chr>,<dbl>
0.0009466525,11-beta hydroxysteroid dehydrogenase inhibitor,Cell_Painting,KNN,True,train,True,moa,17
0.0009980768,11-beta-hsd1 inhibitor,Cell_Painting,KNN,True,train,True,moa,18
