# Number of active readouts

Compute the number of active compounds for morphology (broken down by distance type and representation), cell count, MTT, and LDH. 

In [2]:
import polars as pl


output_dir = "../../1_snakemake/outputs"

cellprofiler_dir = f"{output_dir}/cellprofiler/mad_featselect/curves"
cpcnn_dir = f"{output_dir}/cpcnn/mad_featselect/curves"
dino_dir = f"{output_dir}/dino/mad_featselect/curves"

In [5]:
# Get total cmpd number
len(pl.read_parquet(f"{cellprofiler_dir}/ccpods.parquet").select("Metadata_Compound").to_series().unique().to_list())

1085

## Cell count and biochemical assays

In [7]:
# Compile cell count and biochemical assay readouts
cc = pl.read_parquet(f"{cellprofiler_dir}/ccpods.parquet").filter(pl.col("all.pass") == True).select(["Metadata_Compound", "bmd", "adv.incr"]).rename({"bmd": "Metadata_cc_POD"})
ldh = pl.read_parquet(f"{cellprofiler_dir}/ldhpods.parquet").filter(pl.col("all.pass") == True).select(["Metadata_Compound", "bmd", "adv.incr"]).rename({"bmd": "Metadata_ldh_POD"})
mtt = pl.read_parquet(f"{cellprofiler_dir}/mttpods.parquet").filter(pl.col("all.pass") == True).select(["Metadata_Compound", "bmd", "adv.incr"]).rename({"bmd": "Metadata_mtt_POD"})

In [11]:

print(f"cell count: {cc.shape[0]} total")
print(f"cell count: {cc.filter(pl.col('adv.incr') == 'true').shape[0]} increase")
print(f"cell count: {cc.filter(pl.col('adv.incr') == 'false').shape[0]} decrease")

print(f"LDH: {ldh.shape[0]} total")
print(f"LDH: {ldh.filter(pl.col('adv.incr') == 'true').shape[0]} increase")
print(f"LDH: {ldh.filter(pl.col('adv.incr') == 'false').shape[0]} decrease")

print(f"MTT: {mtt.shape[0]} total")
print(f"MTT: {mtt.filter(pl.col('adv.incr') == 'true').shape[0]} increase")
print(f"MTT: {mtt.filter(pl.col('adv.incr') == 'false').shape[0]} decrease")

cell count: 220 total
cell count: 3 increase
cell count: 217 decrease
LDH: 147 total
LDH: 146 increase
LDH: 1 decrease
MTT: 431 total
MTT: 13 increase
MTT: 418 decrease


## Morphology

In [22]:
# CellProfiler
cellprofiler_bmds = pl.read_parquet(f"{cellprofiler_dir}/bmds.parquet").with_columns(
    (3*pl.col("SDctrl")).alias("SDres_thresh")
)
cellprofiler_gmd_num = cellprofiler_bmds.filter(
    (pl.col("gene.id") == "gmd") & (pl.col("all.pass") == "true") & (pl.col("SDres") < pl.col("SDres_thresh"))
).select("Metadata_Compound").to_series().unique().shape

print(f"cellprofiler gmd: {cellprofiler_gmd_num}")

# cmd & gmd
cellprofiler_all = pl.read_parquet(f"{cellprofiler_dir}/pods.parquet")
print(f"cellprofiler gmd & cmd: {len(cellprofiler_all.select('Metadata_Compound').to_series().unique().to_list())}")

cellprofiler gmd: (172,)
cellprofiler gmd & cmd: 607


In [18]:
# CPCNN
cpcnn_bmds = pl.read_parquet(f"{cpcnn_dir}/bmds.parquet").with_columns(
    (3*pl.col("SDctrl")).alias("SDres_thresh")
)
cpcnn_gmd_num = cpcnn_bmds.filter(
    (pl.col("gene.id") == "gmd") & (pl.col("all.pass") == "true") & (pl.col("SDres") < pl.col("SDres_thresh"))
).select("Metadata_Compound").to_series().unique().shape

print(f"cpcnn gmd: {cpcnn_gmd_num}")

cpcnn gmd: (535,)


In [21]:
# DINO gmd
dino_bmds = pl.read_parquet(f"{dino_dir}/bmds.parquet").with_columns(
    (3*pl.col("SDctrl")).alias("SDres_thresh")
)
dino_gmd_num = dino_bmds.filter(
    (pl.col("gene.id") == "gmd") & (pl.col("all.pass") == "true") & (pl.col("SDres") < pl.col("SDres_thresh"))
).select("Metadata_Compound").to_series().unique().shape

print(f"Dino gmd: {dino_gmd_num}")

# cmd & gmd
dino_all = pl.read_parquet(f"{dino_dir}/pods.parquet")
print(f"Dino gmd & cmd: {len(dino_all.select('Metadata_Compound').to_series().unique().to_list())}")

Dino gmd: (545,)
Dino gmd & cmd: 646
