# Performance Evaluation in Cell Lines

## Contents

- [Data Loading](#data-loading)
- [Drug-level Pearson Correlation](#drug-level-pearson-correlation)
- [Drug-Level Pearson Correlation Stratified by Biological Mechanism](#drug-level-pearson-correlation-stratified-by-biological-mechanism)
- [Drug-Level Pearson Correlation Stratified by Tissue Type](#drug-level-pearson-correlation-stratified-by-tissue-type)
- [auROC Analysis](#auroc-analysis)
- [Response Rate Analysis](#response-rate-analysis)

In [None]:
from __future__ import annotations

import json

import altair as alt
import pandas as pd
import numpy as np
import typing as t
import sklearn.metrics as skm

from pathlib import Path
from scipy import stats

from cdrpy.datasets import Dataset
from cdrpy.data.preprocess import GroupStandardScaler

from screendl.utils import evaluation as eval_utils

## Data Loading

In [None]:
root = Path("../../../datastore")

In [None]:
drug_types_path = root / "processed/DrugAnnotations/drug_types.json"
fixed_drug_types = {"chemotherapy": "Chemo", "targeted": "Targeted", "other": "Other"}
with open(drug_types_path, "r") as fh:
    drug_to_type = {k: fixed_drug_types[v] for k,v in json.load(fh).items()}

In [None]:
dataset_dir = root / "inputs/CellModelPassportsGDSCv1v2Hallmark"

cell_meta = pd.read_csv(dataset_dir / "MetaCellAnnotations.csv", index_col=0)
drug_meta = pd.read_csv(dataset_dir / "MetaDrugAnnotations.csv", index_col=0)
drug_meta["type"] = drug_meta.index.map(drug_to_type)

D = Dataset.from_csv(
    dataset_dir / "LabelsLogIC50.csv",
    cell_meta=cell_meta,
    drug_meta=drug_meta,
    name=dataset_dir.name,
)

print(D)

In [None]:
def load_multirun_predictions(
    multirun_dir: str | Path, regex: str, splits: list[str] | None = None
) -> pd.DataFrame:
    """Loads predictions from a multirun."""
    if isinstance(multirun_dir, str):
        multirun_dir = Path(multirun_dir)

    file_list = multirun_dir.glob(regex)
    pred_df = pd.concat(map(pd.read_csv, file_list))

    if splits is not None:
        pred_df = pred_df[pred_df["split_group"].isin(splits)]

    return pred_df

In [None]:
def rescale_predictions(df: pd.DataFrame) -> pd.DataFrame:
    """Rescales the predictions based on predictions in the train set."""
    df_trn = df[df["split_group"] == "train"]
    df_tst = df[df["split_group"] == "test"]

    gss = GroupStandardScaler()
    df_trn["y_true"] = gss.fit_transform(df_trn[["y_true"]], groups=df_trn["drug_id"])
    df_tst["y_true"] = gss.transform(df_tst[["y_true"]], groups=df_tst["drug_id"])

    gss = GroupStandardScaler()
    df_trn["y_pred"] = gss.fit_transform(df_trn[["y_pred"]], groups=df_trn["drug_id"])
    df_tst["y_pred"] = gss.transform(df_tst[["y_pred"]], groups=df_tst["drug_id"])

    return pd.concat([df_trn, df_tst])

In [None]:
model_results: t.Dict[str, pd.DataFrame] = {}
output_dir = root / "outputs"
path_fmt = "basic/{0}/{1}/multiruns/{2}"
column_mapper = {"fold": "split_id", "split": "split_group"}

In [None]:
# HiDRA results

model = "HiDRA-legacy"
date = "2024-04-17_19-29-28"

run_dir = output_dir / path_fmt.format(D.name, model, date)
run_regex = "*/predictions.csv"

model_results[model.split("-")[0]] = (
    load_multirun_predictions(run_dir, run_regex, splits=["train", "test"])
    .rename(columns=column_mapper)
    .groupby("split_id", as_index=False)
    .apply(rescale_predictions)
    .assign(model=model.split("-")[0])
)

In [None]:
# DualGCN results

model = "DualGCN-legacy"
dates = [
    # NOTE: exceeded 72 hr maximum timelimit so folds are split over multiple runs
    "2024-04-12_09-31-07",
    "2024-04-12_09-32-20",
    "2024-04-14_08-02-47",
    "2024-04-14_08-03-56",
    "2024-04-15_16-17-18",
]

temp = []
for date in dates:
    run_dir = output_dir / path_fmt.format(D.name, model, date)
    run_regex = "*/predictions.csv"
    temp.append(load_multirun_predictions(run_dir, run_regex, splits=["train", "test"]))

model_results[model.split("-")[0]] = (
    pd.concat(temp)
    .rename(columns=column_mapper)
    .groupby("split_id", as_index=False)
    .apply(rescale_predictions)
    .assign(model=model.split("-")[0])
)

In [None]:
model = "DeepCDR-legacy"
date = "2024-04-02_09-27-37"

run_dir = output_dir / path_fmt.format(D.name, model, date)
run_regex = "*/predictions.csv"

model_results[model.split("-")[0]] = (
    load_multirun_predictions(run_dir, run_regex, splits=["train", "test"])
    .rename(columns=column_mapper)
    .groupby("split_id", as_index=False)
    .apply(rescale_predictions)
    .assign(model=model.split("-")[0])
)

In [None]:
model = "ScreenDL"
# date = "2024-11-12_16-43-56"
# date = "2024-04-18_12-54-20"
date = "2024-11-18_19-33-29"
path_fmt = "screenahead/{0}/{1}/multiruns/{2}"

run_dir = output_dir / path_fmt.format(D.name, model, date)

model_results[model + "-PT"] = (
    load_multirun_predictions(run_dir, "*/predictions.csv", splits=["train", "test"])
    .groupby("split_id", as_index=False)
    .apply(rescale_predictions)
    .assign(model=model + "-PT")
)

temp_ = model_results[model + "-PT"].query("split_group == 'train'").copy()
model_results[model + "-SA"] = (
    load_multirun_predictions(run_dir, "*/predictions_sa.csv", splits=None)
    .pipe(lambda df: pd.concat([df, temp_]))
    .groupby("split_id", as_index=False)
    .apply(rescale_predictions)
    .assign(model=model + "-SA")
)

In [None]:
model_results_df = pd.concat(model_results.values())
model_results_df.head()

In [None]:
model_results_df_trn = model_results_df.query("split_group == 'train'")
model_results_df_tst = model_results_df.query("split_group == 'test'")

In [None]:
counts = model_results_df_tst.groupby(["drug_id", "cell_id"])["model"].nunique()
eval_pairs = counts[counts == model_results_df_tst["model"].nunique()].index

model_results_df_tst = (
    model_results_df_tst.set_index(["drug_id", "cell_id"]).loc[eval_pairs].reset_index()
)

## Drug-level Pearson Correlation

In [None]:
MODELS = ["DualGCN", "HiDRA", "DeepCDR", "ScreenDL-PT", "ScreenDL-SA"]

In [None]:
pcc_metrics = (
    model_results_df_tst.groupby(["model", "drug_id"])
    .apply(lambda g: eval_utils.pcorr(g, "y_true", "y_pred"))
    .to_frame(name="pcc")
    .reset_index()
)

pcc_metrics.groupby("model")["pcc"].describe().loc[MODELS]

In [None]:
# compare performance of ScreenDL-SA with ScreenDL-PT

pcc_metrics_wide = pcc_metrics.set_index(["drug_id", "model"])["pcc"].unstack()

m1, m2 = "ScreenDL-PT", "ScreenDL-SA"
stats.wilcoxon(pcc_metrics_wide[m1], pcc_metrics_wide[m2])

In [None]:
# compare performance of ScreenDL-PT with DeepCDR

m1, m2 = "DeepCDR", "ScreenDL-PT"
stats.wilcoxon(pcc_metrics_wide[m1], pcc_metrics_wide[m2])

In [None]:
MODEL_COLOR_SCALE = alt.Scale(
    domain=MODELS,
    range=("lightgray", "darkgray", "gray", "#4C78A8", "#5CA453"),
)

BOXPLOT_CONFIG = {
    "size": 26,
    "median": alt.MarkConfig(fill="black"),
    "box": alt.MarkConfig(stroke="black"),
    "ticks": alt.MarkConfig(size=10),
    "outliers": alt.MarkConfig(stroke="black", size=15, strokeWidth=1.5),
}

AXIS_CONFIG = {
    "titleFont": "arial",
    "titleFontStyle": "regular",
    "labelFont": "arial",
    "tickColor": "black",
    "domainColor": "black",
}

In [None]:
def configure_chart(chart: alt.Chart) -> alt.Chart:
    """Configures altair chart for viewing."""
    return (
        chart.configure_view(strokeOpacity=0)
        .configure_axis(**AXIS_CONFIG)
        .configure_header(labelFont="arial")
        .configure_legend(titleFontSize=10, labelFontSize=10)
    )

In [None]:
annots = D.drug_meta[["target_pathway", "type"]].reset_index()
source = pcc_metrics.merge(annots, on="drug_id")

In [None]:
source.drop_duplicates("drug_id")["type"].value_counts(dropna=False)

In [None]:
boxes_all_drugs = (
    alt.Chart(source, width=31 * len(MODELS), height=250)
    .mark_boxplot(**BOXPLOT_CONFIG)
    .encode(
        alt.X("model:N").axis(labelAngle=-45, labelPadding=5).sort(MODELS).title(None),
        alt.Y("pcc:Q")
        .axis(titlePadding=10, tickCount=6, grid=False)
        .scale(domain=[-0.2, 1])
        .title("Pearson Correlation"),
        alt.Color("model:N", scale=MODEL_COLOR_SCALE).legend(None),
    )
)

boxes_drug_types = (
    alt.Chart(source, width=31 * 2, height=250)
    .transform_filter(alt.FieldOneOfPredicate("type", ["Targeted", "Chemo"]))
    .mark_boxplot(**BOXPLOT_CONFIG)
    .encode(
        alt.Column("model:N").spacing(5).sort(MODELS).title(None).header(orient="top"),
        alt.X("type:N")
        .axis(labelAngle=-45, labelPadding=5, orient="bottom")
        .sort(["Targeted", "Chemo"])
        .title(None),
        alt.Y("pcc:Q")
        .axis(grid=False, labels=False, ticks=False, domainOpacity=0)
        .scale(domain=[-0.2, 1])
        .title(None),
        alt.Color("model:N", scale=MODEL_COLOR_SCALE).legend(None),
    )
)

pcc_boxplot = alt.hconcat(boxes_all_drugs, boxes_drug_types, spacing=-5)
configure_chart(pcc_boxplot)

In [None]:
pcc_metrics_u = pcc_metrics.set_index(["drug_id", "model"])["pcc"].unstack().dropna()
m1 = "ScreenDL-PT"
m2 = "ScreenDL-SA"
stats.wilcoxon(pcc_metrics_u[m1], pcc_metrics_u[m2])

## Drug-Level Pearson Correlation Stratified by Biological Mechanism

In [None]:
MIN_DRUGS_PER_PATHWAY = 10

drug_to_pathway = D.drug_meta.loc[pcc_metrics["drug_id"].unique()]["target_pathway"]

drugs_per_pathway = drug_meta["target_pathway"].value_counts()
keep_pathways = drugs_per_pathway[drugs_per_pathway >= MIN_DRUGS_PER_PATHWAY].index
drug_to_pathway = drug_to_pathway[drug_to_pathway.isin(keep_pathways)]

pathway_blacklist = ["Other", "Chromatin other", "Unclassified"]
drug_to_pathway = drug_to_pathway[~drug_to_pathway.isin(pathway_blacklist)]

drug_to_pathway.value_counts()

In [None]:
pathway_pcc_metrics = (
    pcc_metrics.assign(target_pathway=lambda df: df["drug_id"].map(drug_to_pathway))
    .groupby(["model", "target_pathway"])
    .agg({"pcc": "median", "drug_id": "nunique"})
    .reset_index()
    .rename(columns={"drug_id": "count", "pcc": "median_pcc"})
)
pathway_pcc_metrics.head()

In [None]:
sorted_pathways = (
    pathway_pcc_metrics.query("model == 'ScreenDL-PT'")
    .sort_values("median_pcc", ascending=False)["target_pathway"]
    .to_list()
)

points = (
    alt.Chart(pathway_pcc_metrics, width=300, height=len(sorted_pathways) * 13)
    .mark_circle()
    .encode(
        alt.X("median_pcc:Q")
        .axis(titlePadding=10, values=[-0.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0], grid=False)
        .scale(domain=(-0.2, 1.0))
        .title("Median Pearson Correlation Per Drug"),
        alt.Y("target_pathway:N").sort(sorted_pathways).title(None),
        alt.Color("model:N", scale=MODEL_COLOR_SCALE).legend(
            orient="top", title=None, symbolStrokeWidth=1
        ),
        tooltip=["median_pcc:Q", "target_pathway:N"],
    )
)

bars = alt.Chart(
    pathway_pcc_metrics.query("model == 'ScreenDL-PT'"),
    width=100,
    height=len(sorted_pathways) * 13,
).encode(
    alt.X("count:Q")
    .axis(grid=False, values=[0, 25, 50], titlePadding=10)
    .scale(domain=(0, 50))
    .title("No. Drugs"),
    alt.Y("target_pathway:N")
    .axis(ticks=False, labels=False, offset=0, domainOpacity=0)
    .sort(sorted_pathways)
    .title(None),
    text="count",
)

bars = bars.mark_bar() + bars.mark_text(align="left", dx=4, fontSize=10)

pathway_pcc_chart = alt.hconcat(points, bars, spacing=10)

(
    configure_chart(pathway_pcc_chart)
    .configure_circle(size=60, opacity=0.8, stroke="black", strokeWidth=0.5)
    .configure_bar(stroke="black", strokeWidth=0.5, size=11, color="#999999")
)

## Drug-Level Pearson Correlation Stratified by Tissue Type

##

In [None]:
MIN_TUMORS_PER_TISSUE = 15
tumor_to_tissue = D.cell_meta["tissue"]

tissue_pcc_metrics = (
    model_results_df_tst.assign(tissue=lambda df: df["cell_id"].map(tumor_to_tissue))
    .groupby(["model", "tissue", "drug_id"])
    .apply(eval_utils.pcorr)
    .groupby(["model", "tissue"])
    .median()
    .dropna()
    .to_frame(name="median_pcc")
    .reset_index()
    .assign(count=lambda df: df["tissue"].map(tumor_to_tissue.value_counts()))
    .query("count.ge(@MIN_TUMORS_PER_TISSUE)")
)
tissue_pcc_metrics.head()

In [None]:
best_model_by_tissue = (
    tissue_pcc_metrics[tissue_pcc_metrics["model"] != "ScreenDL-SA"]
    .groupby("tissue", as_index=False)
    .apply(lambda g: g.loc[g["median_pcc"].idxmax()])
)

best_model_by_tissue["model"].value_counts() / len(best_model_by_tissue)

In [None]:
sorted_tissues = (
    tissue_pcc_metrics.query("model == 'ScreenDL-PT'")
    .sort_values("median_pcc", ascending=False)["tissue"]
    .to_list()
)

points = (
    alt.Chart(
        tissue_pcc_metrics, width=300, height=tissue_pcc_metrics["tissue"].nunique() * 13
    )
    .mark_circle()
    .encode(
        alt.X("median_pcc:Q")
        .axis(titlePadding=10, values=[-0.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0], grid=False)
        .scale(domain=(-0.2, 1.0))
        .title("Median Pearson Correlation Per Drug"),
        alt.Y("tissue:N").sort(sorted_tissues).title(None),
        alt.Color("model:N", scale=MODEL_COLOR_SCALE).legend(
            orient="top", title=None, symbolStrokeWidth=1
        ),
    )
)

bars = alt.Chart(
    tissue_pcc_metrics.query("model == 'ScreenDL-PT'"),
    width=100,
    height=tissue_pcc_metrics["tissue"].nunique() * 13,
).encode(
    alt.X("count:Q")
    .axis(grid=False, values=[0, 100, 200], titlePadding=10)
    .scale(domain=(0, 200))
    .title("No. Cell Lines"),
    alt.Y("tissue:N")
    .axis(ticks=False, labels=False, offset=0, domainOpacity=0)
    .sort(sorted_tissues)
    .title(None),
    text="count",
)

bars = bars.mark_bar() + bars.mark_text(align="left", dx=4, fontSize=10)
tissue_pcc_chart = alt.hconcat(points, bars, spacing=10)

(
    configure_chart(tissue_pcc_chart)
    .configure_circle(size=60, opacity=0.8, stroke="black", strokeWidth=0.5)
    .configure_bar(stroke="black", strokeWidth=0.5, size=11, color="#999999")
)

## auROC Analysis

In [None]:
def get_responder_assignments(df: pd.DataFrame, q: float) -> pd.DataFrame:
    df_train = df[df["split_group"] == "train"]
    df_test = df[df["split_group"] == "test"]

    thresh = df_train["y_true"].quantile(q)

    df_test["y_true_class"] = (df_test["y_true"] < thresh).astype(int)

    return df_test


get_response_rate = lambda df: df["y_true_class"].sum() / len(df["y_true_class"])
is_best_index = lambda g: g.index == g.idxmin()

In [None]:
# annotate responders vs non-responders and select best drugs for each model

TRUE_RESPONDER_PERCENTILE = 0.3

model_results_df_responder_cls = (
    model_results_df.groupby(["model", "split_id", "drug_id"], as_index=False)
    .apply(lambda g: get_responder_assignments(g, TRUE_RESPONDER_PERCENTILE))
    .reset_index(drop=True)
    .assign(
        selected_drug=lambda df: df.groupby(["model", "cell_id"])["y_pred"].transform(
            is_best_index
        )
    )
)

model_results_df_responder_cls.head()

In [None]:
def compute_roc_auc(y_true: pd.Series, y_pred: pd.Series) -> float:
    if y_true.nunique() <= 1:
        return np.nan
    return skm.roc_auc_score(y_true, -1 * y_pred)


drug_auroc_metrics = (
    model_results_df_responder_cls.groupby(["model", "drug_id"])
    .apply(lambda g: compute_roc_auc(g["y_true_class"], g["y_pred"]))
    .to_frame(name="auROC")
    .reset_index()
)

drug_auroc_metrics.groupby("model")["auROC"].describe().loc[MODELS]

In [None]:
sorted_models = (
    drug_auroc_metrics.groupby("model")["auROC"]
    .median()
    .sort_values(ascending=False)
    .index.to_list()
)

bars = (
    alt.Chart()
    .mark_bar(stroke="black", size=20, strokeWidth=1)
    .encode(
        alt.X("median(auROC):Q")
        .axis(grid=False, tickCount=5, domainColor="black", titlePadding=10)
        .scale(domain=(0.5, 0.9))
        .title("auROC"),
        alt.Y("model:N")
        .axis(domainColor="black")
        .scale(domain=sorted_models, paddingOuter=0.15)
        .title(None),
        alt.Color("model:N", scale=MODEL_COLOR_SCALE).legend(None),
    )
    .properties(width=200, height=120)
)

error_bars = (
    alt.Chart()
    .mark_errorbar(
        extent="iqr", ticks=alt.MarkConfig(size=5, color="black", strokeWidth=1)
    )
    .encode(alt.Y("model:N"), alt.X("auROC:Q"))
)

drug_auroc_chart = alt.layer(bars, error_bars, data=drug_auroc_metrics)
configure_chart(drug_auroc_chart)

## Response Rate Analysis

In [None]:
rr_metrics = (
    model_results_df_responder_cls.groupby("model")
    .apply(
        lambda g: pd.Series(
            {
                "max_rr": g["y_true_class"].sum() / len(g),
                "no_cells": g.query("selected_drug == True")["y_true_class"].sum(),
                "sel_rr": g.query("selected_drug == True")["y_true_class"].sum()
                / len(g.query("selected_drug == True")),
            }
        )
    )
    .reset_index()
)

rr_metrics

In [None]:
pt_selected_drugs = model_results_df_responder_cls.query(
    "model == 'ScreenDL-PT' and selected_drug == True"
)
no_uniq_drugs = pt_selected_drugs["drug_id"].nunique()
no_uniq_pathways = (
    pt_selected_drugs["drug_id"].map(D.drug_meta["target_pathway"]).nunique()
)
print(f"No. Drugs (ScreenDL-PT): {no_uniq_drugs}")
print(f"No. Pathways (ScreenDL-PT): {no_uniq_pathways}")

In [None]:
# NOTE: this is over 15 since we have many in the other category

In [None]:
sorted_models = rr_metrics.sort_values("sel_rr", ascending=False)["model"].to_list()

base = alt.Chart(rr_metrics)

bars = (
    base.mark_bar(stroke="black", size=20, strokeWidth=1)
    .encode(
        alt.X("sel_rr:Q")
        .axis(grid=False, tickCount=5, domainColor="black", format="%", titlePadding=10)
        .scale(domain=(0.0, 0.8))
        .title("Response Rate (%)"),
        alt.Y("model:N")
        .axis(domainColor="black")
        .scale(domain=sorted_models, paddingOuter=0.15)
        .title(None),
        alt.Color("model:N", scale=MODEL_COLOR_SCALE).legend(None),
    )
    .properties(width=200, height=120)
)

text = base.mark_text(align="left", dx=6, fontSize=10).encode(
    alt.X("sel_rr:Q")
    .axis(grid=False, tickCount=5, domainColor="black", format="%", titlePadding=10)
    .scale(domain=(0.0, 0.8))
    .title("Response Rate (%)"),
    alt.Y("model:N")
    .axis(domainColor="black")
    .scale(domain=sorted_models, paddingOuter=0.15)
    .title(None),
    alt.Text("sel_rr:Q", format=".1%"),
)

rr_chart = alt.layer(bars, text)
configure_chart(rr_chart)

In [None]:
left_panel = alt.vconcat(
    pcc_boxplot,
    alt.hconcat(drug_auroc_chart, rr_chart).resolve_scale(color="independent"),
    spacing=35,
)
left_panel = left_panel.resolve_scale(color="independent")

right_panel = alt.vconcat(pathway_pcc_chart, tissue_pcc_chart, spacing=25)
right_panel = right_panel.resolve_scale(color="shared")

final_chart = alt.hconcat(left_panel, right_panel).resolve_scale(color="independent")

(
    configure_chart(final_chart)
    .configure_circle(size=60, opacity=0.8, stroke="black", strokeWidth=0.5)
    .configure_bar(stroke="black", strokeWidth=0.5, size=11, color="#999999")
)