#### Example queries from Multi-omic provider.

##### This jupyter notebook give an example to query data from mutation dependent drug response 

In [561]:
import requests

import pandas as pd
def Query_fields():
    query_str = "https://biothings.ncats.io/drug_response_kp/metadata/fields"
    response = requests.get(Query_str)
    result = response.json()
    print("================Example data from the KG!=====================")
    for item in ['association','subject','object']:
        print(item+":")
        print(result['hits'][0][item])
    return()

def format_result(result):
    subject_id = []
    subject_symbol = []
    subject_type = []

    object_id = []
    object_name = []
    object_type = []

    edge_context = []
    edge_label = []
    edge_effect_size = []
    edge_ic50s_mut = []
    edge_ic50s_wt = []
    edge_median_ic50_mut = []
    edge_median_ic50_wt = []
    edge_provided_by = []
    edge_prevenence_publications = []
    edge_confidence_p = []
    edge_confidence_sample_size = []
    edge_confidence_mut_size = []
    edge_confidence_wt_size = []
    
    for i in range(0,len(result['hits'])):
        #Edges and context
        edge_context.append(result['hits'][i]['association']['context']['disease']['mondo'])
        edge_label.append(result['hits'][i]['association']['edge_label'])
        edge_effect_size.append(result['hits'][i]['association']['effect_size'])
        edge_ic50s_mut.append(result['hits'][i]['association']['ic50s_mut'])
        edge_ic50s_wt.append(result['hits'][i]['association']['ic50s_wt'])
        edge_median_ic50_mut.append(result['hits'][i]['association']['median_ic50_mut'])
        edge_median_ic50_wt.append(result['hits'][i]['association']['median_ic50_wt'])
        edge_provided_by.append(result['hits'][i]['association']['provided_by'])
        edge_prevenence_publications.append(result['hits'][i]['association']['publications'])
        edge_confidence_p.append(result['hits'][i]['association']['pvalue'])
        edge_confidence_sample_size.append(result['hits'][i]['association']['sample_size'])
        edge_confidence_mut_size.append(result['hits'][i]['association']['size_mut'])
        edge_confidence_wt_size.append(result['hits'][i]['association']['size_wt'])

        #Subject
        if 'NCBIGene' in result['hits'][i]['subject']:
            subject_id.append(result['hits'][i]['subject']['NCBIGene'])
        else:
            subject_id.append("")
            
        subject_symbol.append(result['hits'][i]['subject']['SYMBOL'])
        subject_type.append(result['hits'][i]['subject']['type'])

        #Object
        object_id.append(result['hits'][i]['object']['id'])
        object_name.append(result['hits'][i]['object']['name'])
        object_type.append(result['hits'][i]['object']['type'])

    #Generating dataFrame for the results
    result_df = pd.DataFrame({
    "subject_id":subject_id,
    "subject_symbol": subject_symbol,
    "subject_type": subject_type,
    "object_id":object_id,
    "object_name":object_name,
    "object_type":object_type,
    "edge_label": edge_label,
    "edge_context_disease": edge_context,
    "edge_confidence_p":edge_confidence_p,
    "edge_effect_size":edge_effect_size,
    "edge_median_ic50_wt":edge_median_ic50_wt,
    "edge_median_ic50_mut":edge_median_ic50_mut,
    "edge_confidence_sample_size":edge_confidence_sample_size,
    "edge_confidence_mut_size":edge_confidence_mut_size,
    "edge_confidence_wt_size":edge_confidence_wt_size})
    return(result_df)


def Query_DrugResponse_KP(Query):
    Query_str = "https://biothings.ncats.io/drug_response_kp/query?q="
    
    count = 0
    for i in Query:
        count = count + 1
        if count != len(Query):
            Query_str = Query_str + i + ":"+ Query[i] + "%20AND%20"
        else:
            Query_str = Query_str + i + ":"+ Query[i]+"&size=1000"
    print(Query_str)
    response = requests.get(Query_str)
    result = response.json()
    return(result)

#### Browse the field info in the KG

In [563]:
#Get to know the fields in the KG
Query_fields()

association:
{'context': {'disease': {'id': 'MONDO:0004056', 'mondo': ['MONDO:0004056', 'MONDO:0004163']}}, 'edge_label': 'related_to', 'effect_size': -1.5853485689688667, 'ic50s_mut': [-2.35908375859577, -2.00244120144471, -1.56309856166318, -1.21064725258572, -1.17277292868612, -1.01282568478527, -0.572779592116121, -0.55510999924606, -0.515675036218207, -0.463731661771494, -0.301129519401201, 0.333342254138349, 0.358766798231742, 0.4620618105287, 1.02918907631275], 'ic50s_wt': [-0.856879922786714, 0.515798048579352, 1.84164538007384, 2.06962432571167], 'median_ic50_mut': -0.55510999924606, 'median_ic50_wt': 1.178721714326596, 'method': 't-test', 'provided_by': 'GDSC', 'publications': 'PMC:PMC4967469', 'pvalue': 0.01722870346223569, 'sample_size': 19, 'size_mut': 15, 'size_wt': 4}
subject:
{'NCBIGene': '7157', 'SYMBOL': 'TP53', 'id': 'NCBIGene:7157', 'type': 'Gene'}
object:
{'PUBCHEM': '9829523', 'id': 'PUBCHEM:9829523', 'name': 'Midostaurin', 'type': 'ChemicalSubstance'}


()

####  Set up the query

In [564]:
#https://www.w3schools.com/tags/ref_urlencode.ASP

Query = {"subject.SYMBOL":"EGFR",  #Query by gene symbol
         #"subject.id": "%22NCBIGene:4893%22" #Query by id
        # "association.effect_size": "%3C%2D1", #effect_size, %3C: <; %2D1: -1
         "association.effect_size": "%3C0", #effect_size, %3C: <; %2D1: -1
         "association.pvalue": "%3C0.05", ##p value from t-test 
         #"association.context.disease.mondo": "%22MONDO:0005061%22", #Disease type
         "association.median_ic50_mut":"%3C0", #The median value of IC50 in the mutated group
         #"association.median_ic50_wt":"%3E0"  ##The median value of IC50 in the wild type group
         #"object.name":"Trametinib", #Query by drug
        }

#### Query data from the drug_response_kp

In [565]:
result = Query_DrugResponse_KP(Query)  #Jason format

https://biothings.ncats.io/drug_response_kp/query?q=subject.SYMBOL:EGFR%20AND%20association.effect_size:%3C0%20AND%20association.pvalue:%3C0.05%20AND%20association.median_ic50_mut:%3C0&size=1000


In [569]:
result_df = format_result(result) #Pandas DataFrame format

In [573]:
result_df.sort_values(by = ['edge_confidence_p']) #View the results

Unnamed: 0,subject_id,subject_symbol,subject_type,object_id,object_name,object_type,edge_label,edge_context_disease,edge_confidence_p,edge_effect_size,edge_median_ic50_wt,edge_median_ic50_mut,edge_confidence_sample_size,edge_confidence_mut_size,edge_confidence_wt_size
3,1956,EGFR,Gene,PUBCHEM:57519523,Afatinib,ChemicalSubstance,related_to,MONDO:0005061,1.3e-05,-2.18,1.189939,-2.102841,63,6,46
4,1956,EGFR,Gene,PUBCHEM:123631,Gefitinib,ChemicalSubstance,related_to,MONDO:0005061,0.001952,-1.497063,1.308749,-0.110006,63,6,46
0,1956,EGFR,Gene,PUBCHEM:126941,Methotrexate,ChemicalSubstance,related_to,MONDO:0005170,0.031477,-1.543391,-1.574933,-3.618622,18,3,15
2,1956,EGFR,Gene,PUBCHEM:159324,Tipifarnib,ChemicalSubstance,related_to,"[MONDO:0003093, MONDO:0005580]",0.039579,-1.208813,0.952282,-0.149451,35,4,23
1,1956,EGFR,Gene,CHEMBL.COMPOUND:CHEMBL2107358,Obatoclax Mesylate,ChemicalSubstance,related_to,"[MONDO:0003093, MONDO:0005580]",0.042948,-1.186224,-1.63068,-3.467816,35,4,23
