# Comparison of PDX Response with PDxO Screening Data

In [None]:
from __future__ import annotations

import altair as alt
import pandas as pd

from pathlib import Path
from scipy import stats

In [None]:
root = Path("../../../datastore")

In [None]:
raw_pdx_obs = pd.read_csv(root / "processed/WelmPDX/ScreenClinicalResponseV14B20.csv")
raw_pdx_obs.head()

In [None]:
raw_pdxo_obs = pd.read_csv(
    root / "inputs/CellModelPassports-GDSCv1v2-HCI/LabelsLogIC50.csv"
)

raw_pdxo_obs = raw_pdxo_obs[~raw_pdxo_obs["cell_id"].str.startswith("SIDM")]
raw_pdxo_obs["label"] = raw_pdxo_obs.groupby("drug_id")["label"].transform(stats.zscore)
raw_pdx_obs.head()

In [None]:
raw_pdxo_screen = pd.read_csv(
    root / "processed/WelmBreastPDMC-v1.0.0/internal/ScreenDoseResponse.csv"
)
grouped = raw_pdxo_screen.groupby("drug_name")
# NOTE: a large GR_AOC is better so we multiple Zd values by -1
raw_pdxo_screen["z_GR_AOC"] = grouped["GR_AOC"].transform(lambda x: stats.zscore(x) * -1)
raw_pdxo_screen["z_LN_IC50"] = grouped["LN_IC50"].transform(stats.zscore)
raw_pdxo_screen.head()

In [None]:
raw_pdx_data = pd.read_csv(root / "processed/WelmPDX/ScreenClinicalResponseV14B20RawData.csv")
raw_pdx_data_ctrl = raw_pdx_data[raw_pdx_data["drug_name"] == "Vehicle"]
raw_pdx_data_drug = raw_pdx_data[raw_pdx_data["drug_name"] != "Vehicle"]

In [None]:
temp = (
    raw_pdxo_screen[["model_id", "drug_name", "z_LN_IC50", "z_GR_AOC"]]
    .rename(columns={"model_id": "cell_id", "drug_name": "drug_id"})
    .assign(GDS=lambda df: df.groupby("cell_id")["z_LN_IC50"].transform("mean"))
)

X = (
    raw_pdx_obs.drop(columns="id")
    .merge(temp, on=["cell_id", "drug_id"], suffixes=("_pdx", "_pdxo"))
    .dropna(subset=["z_LN_IC50", "z_GR_AOC"])
)

X.head()

In [None]:
BOXPLOT_CONFIG = {
    "size": 28,
    "median": alt.MarkConfig(fill="black"),
    "box": alt.MarkConfig(stroke="black"),
    "ticks": alt.MarkConfig(size=10),
    "outliers": alt.MarkConfig(stroke="black", size=15, strokeWidth=1.5),
}

AXIS_CONFIG = {
    "titleFont": "arial",
    "titleFontStyle": "regular",
    "labelFont": "arial",
    "tickColor": "black",
    "domainColor": "black",
}

def configure_chart(chart: alt.Chart) -> alt.Chart:
    """Configures boxplot for viewing."""
    return (
        chart.configure_view(strokeOpacity=0)
        .configure_axis(**AXIS_CONFIG)
        .configure_header(labelFont="arial", titleFont="arial")
    )

In [None]:
CB_source = X.assign(
    CB_group=lambda df: df["mRECIST"]
    .isin(["CR", "PR", "SD"])
    .map({True: "Yes", False: "No"})
)


CB_grouped_Zd = CB_source.groupby("CB_group")["z_LN_IC50"].agg(list)
stats.mannwhitneyu(CB_grouped_Zd.loc["No"], CB_grouped_Zd.loc["Yes"])

In [None]:
CB_chart = (
    alt.Chart(CB_source)
    .mark_boxplot(**BOXPLOT_CONFIG)
    .encode(
        alt.X("CB_group:O")
        .axis(grid=False, labelAngle=0, titlePadding=10)
        .title(["Clinical", "Benefit"]),
        alt.Y("z_LN_IC50:Q")
        .axis(grid=False, tickCount=5, titlePadding=10)
        .title("Observed PDxO Resposne (Zd)"),
        alt.Color("CB_group:O")
        .scale(domain=("Yes", "No"), range=("#9DCAEA", "#F69B99"))
        .legend(None),
    )
    .properties(width=35 * 2, height=250)
)

configure_chart(CB_chart)

In [None]:
CB_waterfall_source = CB_source.assign(
    id=lambda df: df["cell_id"] + " + " + df["drug_id"]
)
CB_waterfall_chart = (
    alt.Chart(CB_waterfall_source)
    .mark_bar(size=13.5, stroke="black", strokeWidth=1)
    .encode(
        alt.X("id:N")
        .sort("-y")
        .axis(domainOpacity=0, labelAngle=-60, labelPadding=5)
        .scale(paddingOuter=0.2),
        alt.Y("z_LN_IC50:Q").axis(grid=False).title("Observed PDxO Resposne (Zd)"),
        alt.Color("CB_group:N").scale(
            domain=("Yes", "No"),
            range=("#9ECAE9", "#FF9D98"),
        ),
    )
    .properties(width=16 * CB_waterfall_source["id"].nunique(), height=250)
)

configure_chart(
    alt.hconcat(CB_chart, CB_waterfall_chart, spacing=30).resolve_scale(
        color="independent"
    )
)

In [None]:
assign_label = lambda x: (x < x.quantile(0.3)).astype(int)

temp = (
    raw_pdxo_screen[["model_id", "drug_name", "z_LN_IC50", "z_GR_AOC"]]
    .rename(columns={"model_id": "cell_id", "drug_name": "drug_id"})
    .assign(y_pred=lambda df: df.groupby("drug_id")["z_LN_IC50"].transform(assign_label))
)


X = (
    raw_pdx_obs.drop(columns="id")
    .merge(temp, on=["cell_id", "drug_id"], suffixes=("_pdx", "_pdxo"))
    .dropna(subset=["z_LN_IC50", "z_GR_AOC"])
    .assign(CB=lambda df: df["mRECIST"].isin(["CR", "PR", "SD"]).astype(int))
    .assign(OR=lambda df: df["mRECIST"].isin(["CR", "PR"]).astype(int))
)

X.head()

In [None]:
CB_ctab = X.groupby(["y_pred", "CB"]).size().unstack()
CB_ctab

In [None]:
stats.fisher_exact(CB_ctab)