# Number of active readouts

Compute the number of active compounds for morphology (broken down by distance type and representation), cell count, MTT, and LDH. 

In [1]:
import polars as pl


output_dir = "../../1_snakemake/outputs"

cellprofiler_dir = f"{output_dir}/cellprofiler/mad_featselect/curves"
cpcnn_dir = f"{output_dir}/cpcnn/mad_featselect/curves"
dino_dir = f"{output_dir}/dino/mad_featselect/curves"

In [2]:
# Get total cmpd number
len(pl.read_parquet(f"{dino_dir}/ccpods.parquet").select("Metadata_Compound").to_series().unique().to_list())

1085

## Cell count and biochemical assays

In [3]:
# Compile cell count and biochemical assay readouts
cc = pl.read_parquet(f"{dino_dir}/ccpods.parquet").filter(pl.col("all.pass") == True).select(["Metadata_Compound", "bmd", "adv.incr"]).rename({"bmd": "Metadata_cc_POD"})
ldh = pl.read_parquet(f"{dino_dir}/ldhpods.parquet").filter(pl.col("all.pass") == True).select(["Metadata_Compound", "bmd", "adv.incr"]).rename({"bmd": "Metadata_ldh_POD"})
mtt = pl.read_parquet(f"{dino_dir}/mttpods.parquet").filter(pl.col("all.pass") == True).select(["Metadata_Compound", "bmd", "adv.incr"]).rename({"bmd": "Metadata_mtt_POD"})

In [5]:

print(f"cell count: {cc.shape[0]} total")
print(f"cell count: {cc.filter(pl.col('adv.incr') == 'true').shape[0]} increase")
print(cc.filter(pl.col('adv.incr') == 'true').select("Metadata_Compound"))
print(f"cell count: {cc.filter(pl.col('adv.incr') == 'false').shape[0]} decrease")

print(f"LDH: {ldh.shape[0]} total")
print(f"LDH: {ldh.filter(pl.col('adv.incr') == 'true').shape[0]} increase")
print(f"LDH: {ldh.filter(pl.col('adv.incr') == 'false').shape[0]} decrease")

print(f"MTT: {mtt.shape[0]} total")
print(f"MTT: {mtt.filter(pl.col('adv.incr') == 'true').shape[0]} increase")
print(mtt.filter(pl.col('adv.incr') == 'true').select("Metadata_Compound"))
print(f"MTT: {mtt.filter(pl.col('adv.incr') == 'false').shape[0]} decrease")

cell count: 221 total
cell count: 3 increase
shape: (3, 1)
┌───────────────────────────┐
│ Metadata_Compound         │
│ ---                       │
│ str                       │
╞═══════════════════════════╡
│ Alectinib (Hydrochloride) │
│ Baloxavir marboxil        │
│ Oritavancin (diphosphate) │
└───────────────────────────┘
cell count: 218 decrease
LDH: 144 total
LDH: 143 increase
LDH: 1 decrease
MTT: 430 total
MTT: 10 increase
shape: (10, 1)
┌─────────────────────────────────┐
│ Metadata_Compound               │
│ ---                             │
│ str                             │
╞═════════════════════════════════╡
│ 2-Ethyl-2-(hydroxymethyl)propa… │
│ 2-Ethylanthracene-9,10-dione    │
│ 6-Mercaptopurine                │
│ Atazanavir                      │
│ Benzarone                       │
│ Compound_211c050e               │
│ Estradiol                       │
│ Tiaprofenic acid                │
│ Tiratricol                      │
│ Tolcapone                       │
└─────────

In [5]:
cc_cmpds = cc.filter(pl.col('adv.incr') == 'false').select(pl.col("Metadata_Compound")).to_series().to_list()
ldh_cmpds = ldh.filter(pl.col('adv.incr') == 'true').select(pl.col("Metadata_Compound")).to_series().to_list()
mtt_cmpds = mtt.filter(pl.col('adv.incr') == 'false').select(pl.col("Metadata_Compound")).to_series().to_list()

cytotox_cmpds = list(set(cc_cmpds + ldh_cmpds + mtt_cmpds))

print(f"Cytotox cmpds: {len(cytotox_cmpds)}")

Cytotox cmpds: 429


## Morphology

In [None]:
# cellprofiler
cellprofiler_bmds = pl.read_parquet(f"{cellprofiler_dir}/bmds.parquet").with_columns(
    (3*pl.col("SDctrl")).alias("SDres_thresh")
)

# gmd
cellprofiler_gmd = cellprofiler_bmds.filter(
    (pl.col("gene.id") == "gmd") & (pl.col("all.pass") == "true") & (pl.col("SDres") < pl.col("SDres_thresh"))
)
cellprofiler_gmd_num = cellprofiler_gmd.select("Metadata_Compound").to_series().unique().shape
print(f"cellprofiler gmd #: {cellprofiler_gmd_num}")
print(f"cellprofiler gmd median: {cellprofiler_gmd.select(pl.col('bmd').median()).item()}")

# cmd
cellprofiler_cmd = cellprofiler_bmds.filter(
    (pl.col("gene.id") != "gmd") & (pl.col("all.pass") == "true") & (pl.col("SDres") < pl.col("SDres_thresh"))
).group_by("Metadata_Compound").agg(
    pl.col("bmd").min().alias("bmd")
)
cellprofiler_cmd_num = cellprofiler_cmd.select("Metadata_Compound").to_series().unique().shape
print(f"cellprofiler cmd #: {cellprofiler_cmd_num}")
print(f"cellprofiler cmd median: {cellprofiler_cmd.select(pl.col('bmd').median()).item()}")

# cmd & gmd
cellprofiler_all = pl.read_parquet(f"{cellprofiler_dir}/pods.parquet")
print(f"cellprofiler gmd & cmd #: {len(cellprofiler_all.select('Metadata_Compound').to_series().unique().to_list())}")

cellprofiler gmd #: (172,)
cellprofiler gmd median: 3.5222091934258897
cellprofiler cmd #: (598,)
cellprofiler cmd median: 2.951186789189215
cellprofiler gmd & cmd #: 607


In [19]:
# CPCNN
cpcnn_bmds = pl.read_parquet(f"{cpcnn_dir}/bmds.parquet").with_columns(
    (3*pl.col("SDctrl")).alias("SDres_thresh")
)
cpcnn_gmd = cpcnn_bmds.filter(
    (pl.col("gene.id") == "gmd") & (pl.col("all.pass") == "true") & (pl.col("SDres") < pl.col("SDres_thresh"))
)
cpcnn_gmd_num = cpcnn_gmd.select("Metadata_Compound").to_series().unique().shape

print(f"cpcnn gmd #: {cpcnn_gmd_num}")
print(f"cpcnn gmd median: {cpcnn_gmd.select(pl.col('bmd').median()).item()}")

cpcnn gmd #: (538,)
cpcnn gmd median: 3.239297233623755


In [21]:
# DINO
dino_bmds = pl.read_parquet(f"{dino_dir}/bmds.parquet").with_columns(
    (3*pl.col("SDctrl")).alias("SDres_thresh")
)

# gmd
dino_gmd = dino_bmds.filter(
    (pl.col("gene.id") == "gmd") & (pl.col("all.pass") == "true") & (pl.col("SDres") < pl.col("SDres_thresh"))
)
dino_gmd_num = dino_gmd.select("Metadata_Compound").to_series().unique().shape
print(f"dino gmd #: {dino_gmd_num}")
print(f"dino gmd median: {dino_gmd.select(pl.col('bmd').median()).item()}")

# cmd
dino_cmd = dino_bmds.filter(
    (pl.col("gene.id") != "gmd") & (pl.col("all.pass") == "true") & (pl.col("SDres") < pl.col("SDres_thresh"))
).group_by("Metadata_Compound").agg(
    pl.col("bmd").min().alias("bmd")
)
dino_cmd_num = dino_cmd.select("Metadata_Compound").to_series().unique().shape
print(f"dino cmd #: {dino_cmd_num}")
print(f"dino cmd median: {dino_cmd.select(pl.col('bmd').median()).item()}")

# cmd & gmd
dino_all = pl.read_parquet(f"{dino_dir}/pods.parquet")
print(f"Dino gmd & cmd #: {len(dino_all.select('Metadata_Compound').to_series().unique().to_list())}")

dino gmd #: (546,)
dino gmd median: 3.00183062346817
dino cmd #: (624,)
dino cmd median: 3.162274437178345
Dino gmd & cmd #: 642
