## Step 1: Import relevant libraries

In [1]:
import pandas as pd
import numpy as np
import requests as r
import json

## Step 2: Import list of targets and diseases of interest

In [2]:
target_list = [
    {
        "target_symbol": "PIK3CA",
        "target_id": "ENSG00000121879",
    },
    {
        "target_symbol": "AKT1",
        "target_id": "ENSG00000142208",
    },
    {
        "target_symbol": "APC",
        "target_id": "ENSG00000134982",
    },
    {
        "target_symbol": "ESR1",
        "target_id": "ENSG00000091831",
    },
    {
        "target_symbol": "AXIN1",
        "target_id": "ENSG00000103126",
    },
    
]

In [57]:
disease_list = [
    {
        "disease_label": "melanoma",
        "disease_id": "EFO_0000756"
    },
    {
        "disease_label": "lymphoma",
        "disease_id": "EFO_0000574"
    },
    {
        "disease_label": "multiple_myeloma",
        "disease_id": "EFO_0001378"
    }
]

## Step 3: Generate dataframe with association scores and supporting publications

In [70]:
association_score_base_url = "https://platform-api.opentargets.io/v3/platform/public/association/filter?"

text_mining_evidence_base_url = "https://platform-api.opentargets.io/v3/platform/public/evidence/filter?"

all_dataframe_rows = []

for target in target_list:
    
    dataframe_entry = {}
    
    target_id = target["target_id"]
    
    dataframe_entry["target_id"] = target_id
    
    for disease in disease_list:
        
        disease_id = disease["disease_id"]
        association_params = "target=" + target_id + "&disease=" + disease_id + "&facets=false"
        api_call_url = association_score_base_url + association_params
        # print(api_call_url)
        raw_api_response = r.get(api_call_url).json()
        if raw_api_response["data"]:
            overall_association_score = round(raw_api_response["data"][0]["association_score"]["overall"], 5)
        else:
            overall_association_score = 0
        assoc_score_key_label = disease["disease_label"] + "_association_score"
        dataframe_entry[assoc_score_key_label] = overall_association_score 
        
        evidence_params = (
            "target=" 
            + target_id 
            + "&disease=" 
            + disease_id 
            + "&datasource=europepmc&size=10000&"
            + "fields=evidence.literature_ref.lit_id&expandefo=true"
        )
        text_mining_evidence_api_call_url = text_mining_evidence_base_url + evidence_params
        raw_api_response_text_mining = r.get(text_mining_evidence_api_call_url).json()
        number_of_text_mining_evidence_strings = raw_api_response_text_mining ["total"]
        text_mining_evidence_count_key_label = disease["disease_label"] + "_number_of_publications"
        dataframe_entry[text_mining_evidence_count_key_label] = number_of_text_mining_evidence_strings
        all_pmids = []
        if raw_api_response_text_mining["data"]:
            for evidence_string in raw_api_response_text_mining["data"]:
                text_mining_evidence_string_pmid_url = evidence_string["evidence"]["literature_ref"]["lit_id"].split("/")
                text_mining_evidence_string_pmid = text_mining_evidence_string_pmid_url[-1]
                all_pmids.append(text_mining_evidence_string_pmid)
        
        text_mining_publications_key_label = disease["disease_label"] + "_publications"
        dataframe_entry[text_mining_publications_key_label] = all_pmids
        
    all_dataframe_rows.append(dataframe_entry)

In [71]:
association_df = pd.DataFrame(all_dataframe_rows)
association_df.head()

Unnamed: 0,lymphoma_association_score,lymphoma_number_of_publications,lymphoma_publications,melanoma_association_score,melanoma_number_of_publications,melanoma_publications,multiple_myeloma_association_score,multiple_myeloma_number_of_publications,multiple_myeloma_publications,target_id
0,1.0,76,"[28461758, 29029507, 24418330, 23341541, 19723...",1.0,49,"[25627962, 31277584, 28233937, 30237495, 29535...",1.0,2,"[24766330, 20022634]",ENSG00000121879
1,0.64094,212,"[25987255, 20022634, 30405828, 30577817, 11018...",0.92738,75,"[11828257, 18579561, 22287561, 18813315, 26565...",0.33864,8,"[20022634, 30867372, 24816239, 27919956, 26914...",ENSG00000142208
2,0.82698,0,[],0.63063,0,[],0.48611,0,[],ENSG00000134982
3,0.65351,165,"[30340560, 30736096, 30496125, 21767241, 12542...",0.734,106,"[23144997, 16818643, 26225426, 28299306, 26949...",0.05955,17,"[12542441, 11429412, 23925045, 15033743, 38109...",ENSG00000091831
4,0.58042,4,"[30111415, 30038380, 27063176, 31237044]",0.60946,4,"[23869245, 22234612, 22895053, 28212537]",0.0,0,[],ENSG00000103126


## Step 4: Generate dataframe with known drug evidence

In [84]:
drug_evidence_base_url = "https://platform-api.opentargets.io/v3/platform/public/evidence/filter?"

drug_evidence_df_rows = []

for target in target_list:
    
    dataframe_entry = {}
    
    target_id = target["target_id"]
    
    dataframe_entry["target_id"] = target_id
    
    drug_evidence_params = (
        "target=" 
        + target_id 
        + "&size=10000&datasource=chembl&"
        + "fields=drug.id"
    )
    
    drug_evidence_api_call_url = drug_evidence_base_url + drug_evidence_params
    raw_api_response_drugs = r.get(drug_evidence_api_call_url).json()
    if raw_api_response_drugs["data"]:
        dataframe_entry["known_drug_in_clinic"] = True
        all_drugs = []
        for obj in raw_api_response_drugs["data"]:
            drug_url = obj["drug"]["id"]
            drug_url_strings = drug_url.split("/")
            chembl_id = drug_url_strings[-1]
            all_drugs.append(chembl_id)
        drugs_list = list(set(all_drugs))
        dataframe_entry["drug_in_clinic_ids"] = drugs_list
        
    else:
        dataframe_entry["known_drug_in_clinic"] = False
        
    drug_evidence_df_rows.append(dataframe_entry)

In [85]:
drug_evidence_df = pd.DataFrame(drug_evidence_df_rows)
drug_evidence_df.head()

Unnamed: 0,drug_in_clinic_ids,known_drug_in_clinic,target_id
0,"[CHEMBL411907, CHEMBL586701, CHEMBL1922094, CH...",True,ENSG00000121879
1,"[CHEMBL2178577, CHEMBL3545422, CHEMBL3137336, ...",True,ENSG00000142208
2,,False,ENSG00000134982
3,"[CHEMBL1201468, CHEMBL135, CHEMBL226267, CHEMB...",True,ENSG00000091831
4,,False,ENSG00000103126
