In [None]:
sensitive = ic50_uM <= cmax_free_uM     # free = total × (1 − protein_binding/100)

In [None]:
#!/usr/bin/env python
"""
Fetch human Cmax and plasma-protein-binding for PRISM compounds
from the ChEMBL (chembl.org) pharmacokinetics endpoint.

Author: <you>
Date  : 2025-05-24
"""

import pandas as pd
import requests
from time import sleep

# ────────────────────────────────────────────────────────────────
# 1.  Load the PRISM drug table
#     Expect columns: 'broad_id', 'name', 'smiles'
# ────────────────────────────────────────────────────────────────
drug_df = pd.read_csv("drug_info.csv")       # shipped with PRISM

# add empty columns for the PK values we’ll fetch
drug_df["cmax_total_uM"]   = pd.NA
drug_df["protein_binding_%"] = pd.NA

# helper: µg/mL → µM when mol-weight is known
def ug_ml_to_uM(value_ug_ml, mol_weight):
    if pd.isna(value_ug_ml) or pd.isna(mol_weight):
        return pd.NA
    return (value_ug_ml / mol_weight) * 1000.0        # 1 µg/mL = 1 mg/L

# ────────────────────────────────────────────────────────────────
# 2.  Loop over compounds and query ChEMBL
#     • First, map name → ChEMBL molecule
#     • Then pull pharmacokinetics (PK) resource
# ────────────────────────────────────────────────────────────────
SEARCH_URL = "https://www.ebi.ac.uk/chembl/api/data/molecule/search.json?q={}"
PK_URL     = "https://www.ebi.ac.uk/chembl/api/data/pharmacokinetics.json?molecule_chembl_id={}"

def fetch_pk_for_name(drug_name):
    """Return (chembl_id, mol_wt, cmax_uM, protein_bind_percent) or (None, …) if missing."""
    try:
        # 2-a  name → chembl_id
        sr = requests.get(SEARCH_URL.format(requests.utils.quote(drug_name)))
        sr.raise_for_status()
        hits = sr.json()["molecules"]
        if not hits:              # no match
            return (None, pd.NA, pd.NA, pd.NA)
        chembl_id = hits[0]["molecule_chembl_id"]
        mol_weight = hits[0]["molecule_properties"]["full_mwt"]
        # 2-b  pull PK rows
        pk_r = requests.get(PK_URL.format(chembl_id))
        pk_r.raise_for_status()
        rows = pk_r.json()["pharmacokinetics"]
        cmax, pb = pd.NA, pd.NA
        for row in rows:
            if row["parameter"] == "Cmax":
                val = row["standard_value"]
                unit = row["units"]
                if unit.lower() in ("um", "µm"):
                    cmax = float(val)
                elif unit.lower() in ("ug/ml", "µg/ml"):
                    cmax = ug_ml_to_uM(float(val), float(mol_weight))
            elif row["parameter"].lower().startswith("plasma protein binding"):
                pb = float(row["standard_value"])
        return chembl_id, float(mol_weight), cmax, pb
    except Exception:
        return (None, pd.NA, pd.NA, pd.NA)

for idx, row in drug_df.iterrows():
    chembl_id, mw, cmax_uM, pbct = fetch_pk_for_name(row["name"])
    if chembl_id:
        drug_df.at[idx, "chembl_id"]          = chembl_id
        drug_df.at[idx, "mol_weight"]         = mw
        drug_df.at[idx, "cmax_total_uM"]      = cmax_uM
        drug_df.at[idx, "protein_binding_%"]  = pbct
    sleep(0.2)        # be polite to the API!

# ────────────────────────────────────────────────────────────────
# 3.  Compute “free” Cmax and save
# ────────────────────────────────────────────────────────────────
drug_df["cmax_free_uM"] = (
    drug_df["cmax_total_uM"] * (1.0 - drug_df["protein_binding_%"] / 100.0)
)

drug_df.to_csv("prism_drug_pk_table.csv", index=False)
print("🔹  Done — PK table written to prism_drug_pk_table.csv")




In [None]:
# Merge prism_drug_pk_table.csv with your PRISM IC₅₀ file:

df = pd.read_csv("prism_curve_parameters.csv")
pk = pd.read_csv("prism_drug_pk_table.csv")

merged = df.merge(pk, on="broad_id")     # PRISM curve params already carry broad_id
merged["label_sensitive"] = (
    merged["ic50_um"] <= merged["cmax_free_uM"]
).astype(int)
# abel_sensitive == 1