check the drugability of the identified proteins

In [21]:
import os
import requests
import json
import pandas as pd
import numpy as np

In [11]:
output_dir = "/home/bbc8731/HSV/7-protein_drugability/data"

In [23]:
url = "https://api.platform.opentargets.org/api/v4/graphql"

query = """
query targetQuery($ensemblId: String!) {
  target(ensemblId: $ensemblId) {
    id
    approvedSymbol
    tractability {
      label
      modality
      value
    }
  }
}
"""

variables = {"ensemblId": "ENSG00000141510"}

response = requests.post(
    url,
    json={"query": query, "variables": variables}
)

data = response.json()

print(json.dumps(data, indent=2))

# with open(os.path.join(output_dir, "tp53_tractability.json"), "w") as f:
#     json.dump(data, f, indent = 4)

{
  "data": {
    "target": {
      "id": "ENSG00000141510",
      "approvedSymbol": "TP53",
      "tractability": [
        {
          "label": "Approved Drug",
          "modality": "SM",
          "value": false
        },
        {
          "label": "Advanced Clinical",
          "modality": "SM",
          "value": true
        },
        {
          "label": "Phase 1 Clinical",
          "modality": "SM",
          "value": false
        },
        {
          "label": "Structure with Ligand",
          "modality": "SM",
          "value": true
        },
        {
          "label": "High-Quality Ligand",
          "modality": "SM",
          "value": true
        },
        {
          "label": "High-Quality Pocket",
          "modality": "SM",
          "value": false
        },
        {
          "label": "Med-Quality Pocket",
          "modality": "SM",
          "value": true
        },
        {
          "label": "Druggable Family",
          "modality": "SM",
        

In [22]:
# save as csv
target_info = data["data"]["target"]

target_id = target_info["id"]
symbol = target_info["approvedSymbol"]
tractability_list = target_info["tractability"]

# flatten into rows
rows = []
for item in tractability_list:
    rows.append({
        "target_id": target_id,
        "symbol": symbol,
        "modality": item["modality"],
        "label": item["label"],
        "value": item["value"]
    })

df = pd.DataFrame(rows)

# save to CSV
df.to_csv(os.path.join(output_dir, "tp53_tractability.csv"), index=False)

print(df.head())


         target_id symbol modality                  label  value
0  ENSG00000141510   TP53       SM          Approved Drug  False
1  ENSG00000141510   TP53       SM      Advanced Clinical   True
2  ENSG00000141510   TP53       SM       Phase 1 Clinical  False
3  ENSG00000141510   TP53       SM  Structure with Ligand   True
4  ENSG00000141510   TP53       SM    High-Quality Ligand   True


In [14]:
def hsv_sm_tractability_tier(tractability_list):
    """
    tractability_list: list of dicts from Open Targets 'tractability'
    returns:
        tier (int): 0â€“4
        details (dict)
    """

    # Define tiers (highest first)
    tier_definitions = {
        4: [("SM", "Approved Drug")],
        3: [
            ("SM", "Advanced Clinical"),
            ("SM", "Phase 1 Clinical")
        ],
        2: [
            ("SM", "High-Quality Ligand")
        ],
        1: [
            ("SM", "Structure with Ligand"),
            ("SM", "High-Quality Pocket"),
            ("SM", "Med-Quality Pocket"),
            ("SM", "Druggable Family")
        ]
    }

    # Collect all positive SM evidence
    positive_evidence = set(
        (item["modality"], item["label"])
        for item in tractability_list
        if item.get("modality") == "SM" and item.get("value") is True
    )

    # Determine highest tier reached
    assigned_tier = 0
    supporting_labels = []

    for tier in sorted(tier_definitions.keys(), reverse=True):
        for evidence in tier_definitions[tier]:
            if evidence in positive_evidence:
                assigned_tier = tier
                supporting_labels = [
                    label for (mod, label) in positive_evidence
                    if (mod, label) in tier_definitions[tier]
                ]
                break
        if assigned_tier > 0:
            break

    details = {
        "tier": assigned_tier,
        "supporting_evidence": supporting_labels,
        "is_sm_tractable": assigned_tier > 0
    }

    return assigned_tier, details


In [15]:
tp53 = data["data"]["target"]
score, details = hsv_sm_tractability_tier(tp53["tractability"])
print(tp53["approvedSymbol"], score, details)


TP53 3 {'tier': 3, 'supporting_evidence': ['Advanced Clinical'], 'is_sm_tractable': True}


In [16]:
# check bioactivity
from chembl_webresource_client.new_client import new_client

target = new_client.target
activity = new_client.activity

# Search by gene name
targets = target.search("EGFR")
target_id = targets[0]['target_chembl_id']

# Check bioactivities
activities = activity.filter(target_chembl_id=target_id).filter(standard_type="IC50")

print(len(activities))

  __version__ = __import__('pkg_resources').get_distribution('chembl_webresource_client').version


97


In [17]:
def has_pdb_structure(gene_symbol):
    url = "https://rest.uniprot.org/uniprotkb/search"
    params = {
        "query": f"gene:{gene_symbol} AND organism_id:9606",
        "fields": "xref_pdb",
        "format": "json"
    }

    response = requests.get(url, params=params)
    data = response.json()

    if "results" in data and len(data["results"]) > 0:
        pdb_entries = data["results"][0].get("uniProtKBCrossReferences", [])
        return len(pdb_entries) > 0

    return False
print(has_pdb_structure("TP53"))


True


In [20]:
def get_pdb_ids(gene_symbol):
    url = "https://rest.uniprot.org/uniprotkb/search"
    params = {
        "query": f"gene:{gene_symbol} AND organism_id:9606",
        "fields": "xref_pdb",
        "format": "json"
    }

    response = requests.get(url, params=params)
    data = response.json()

    if "results" in data and len(data["results"]) > 0:
        pdb_entries = data["results"][0].get("uniProtKBCrossReferences", [])
        pdb_ids = [entry["id"] for entry in pdb_entries]
        return pdb_ids

    return []
# print(get_pdb_ids("TP53"))


In [19]:
def compute_druggability_score(sm_level, potent_ligands, has_structure,
                                w1=1.0, w2=0.5, w3=0.5):

    # stabilize ligand counts
    ligand_term = np.log1p(potent_ligands)

    score = (
        w1 * sm_level +
        w2 * ligand_term +
        w3 * int(has_structure)
    )

    return score
