In [None]:
import requests
import pandas as pd
import json

In [None]:
# query open targets and return json
def gene_coloc_open_targets_query(geneid):
  """
  queries open targets for colocalization data given gene id (ENS###)

  input: geneid (ENS###)
  output: coloc dataframe with the following columns:
  phenotypeId	qtlStudyId	h3	h4	log2h4h3	study.studyId	study.traitReported	
    study.pubAuthor	leftVariant.id	tissue.id	tissue.name
  """
  # query open targets for each gene in list
  api_query = f'''query {{  
    colocalisationsForGene(geneId: "{geneid}") {{
      study{{
        studyId
        traitReported
        pubAuthor
      }}
      leftVariant {{
        id
      }}
      phenotypeId
      tissue {{
        id
        name
      }}
      qtlStudyId
      h3
      h4
      log2h4h3 
    }}
  }}'''

  #set base_url for Open Targets Genetics Portal API
  base_url = "http://genetics-api.opentargets.io/graphql"

  #perform API call and check status code of response
  r = requests.post(base_url, json={'query': api_query})
  if str(r.status_code) == '400':
    print(f'{geneid} query status code: {r.status_code}')
  
  else:
    pass

  #transform API response into JSON 
  api_response_as_json = json.loads(r.text)
  
  return api_response_as_json


def qtl_coloc_open_targets_query(variant_id):
  """
  queries open targets for qtl data given variant_id in chr_pos_a1_a2 format 
    (hg38)

  input: variant id in chr_pos_a1_a2 format (hg38)
  output: dataframe with the following columns:
  qtls gene.symbol gene.id
  """
  api_query = f'''query {{
    genesForVariant(variantId: "{variant_id}") {{
      gene {{
        id
        symbol
      }}
      qtls{{
        typeId
        sourceId
        aggregatedScore
        tissues{{
          tissue {{
            id
            name
          }}
          quantile
          beta
          pval
        }}
      }}
    }}
  }}'''

  #set base_url for Open Targets Genetics Portal API
  base_url = "http://genetics-api.opentargets.io/graphql"

  #perform API call and check status code of response
  r = requests.post(base_url, json={'query': api_query})
  if str(r.status_code) == '400':
    print(f'{variant_id} query status code: {r.status_code}')
  
  else:
    pass

  #transform API response into JSON 
  api_response_as_json = json.loads(r.text)
  
  return api_response_as_json


def phewas_per_variant_open_targets_query(variant_id):
  """
  query open targets for phewas data

  input: variant id (chr_pos_a1_a2 format with hg38 positions)
  output: phewas dataframe with the following columns:
  pval	beta	nTotal	nCases	oddsRatio	eaf	se	study.studyId	
    study.traitReported	study.pubAuthor
  """
  api_query = """
    query pheWAS($variantId: String!){
      pheWAS(variantId: $variantId) {
        associations {
          study {
            studyId
            traitReported
            pubAuthor
          }
          pval
          beta
          nTotal
          nCases
          oddsRatio
          eaf
          se
        }
      }
    }""" 

  #set base_url for Open Targets Genetics Portal API
  base_url = "http://genetics-api.opentargets.io/graphql"
  #set variables object
  variables = {"variantId": variant_id}

  #perform API call and check status code of response
  r = requests.post(base_url, json={'query':api_query, "variables":variables})
  if str(r.status_code) == '400':
    print(f'{variant_id} query status code: {r.status_code}')
  
  else:
    pass

  #transform API response into JSON 
  api_response_as_json = json.loads(r.text)
  
  return api_response_as_json


def phewas_per_gene_open_targets_query(geneid):
  api_query = """
    query studiesAndLeadVariantsForGene($geneId: String!){
      studiesAndLeadVariantsForGene(geneId: $geneId) {
        indexVariant {
          id
          rsId
        }
        study {
          studyId
          traitReported
        }
        pval
        beta
        oddsRatio
        direction
      }
    }"""

  #set base_url for Open Targets Genetics Portal API
  base_url = "http://genetics-api.opentargets.io/graphql"
  #set variables object
  variables = {"geneId": geneid}

  #perform API call and check status code of response
  r = requests.post(base_url, json={'query':api_query, "variables":variables})
  if str(r.status_code) == '400':
    print(f'{geneid} query status code: {r.status_code}')
  
  else:
    pass

  #transform API response into JSON 
  api_response_as_json = json.loads(r.text)
  
  return api_response_as_json


def search_rsid_open_targets_query(rsid):

  """
  query open targets for snp ids in chr_pos_a1_a2 and allele freqs with rsid

  input: rsid
  output: ids and allele frequencies for different ancestry groups with columns:
  id	rsId	gnomadAFR	gnomadAMR	gnomadASJ	gnomadEAS	gnomadFIN	gnomadNFE	
    gnomadNFEEST	gnomadNFENWE	gnomadNFESEU	gnomadNFEONF	gnomadOTH

  Some information on the allele frequencies being pulled 

  "gnomAD Allele frequency (African/African-American population)"
    gnomadAFR: Float

    "gnomAD Allele frequency (Latino/Admixed American population)"
    gnomadAMR: Float

    "gnomAD Allele frequency (Ashkenazi Jewish population)"
    gnomadASJ: Float

    "gnomAD Allele frequency (East Asian population)"
    gnomadEAS: Float

    "gnomAD Allele frequency (Finnish population)"
    gnomadFIN: Float

    "gnomAD Allele frequency (Non-Finnish European population)"
    gnomadNFE: Float

    "gnomAD Allele frequency (Non-Finnish Eurpoean Estonian sub-population)"
    gnomadNFEEST: Float

    "gnomAD Allele frequency (Non-Finnish Eurpoean North-Western European sub-population)"
    gnomadNFENWE: Float

    "gnomAD Allele frequency (Non-Finnish Eurpoean Southern European sub-population)"
    gnomadNFESEU: Float

    "gnomAD Allele frequency (Non-Finnish Eurpoean Other non-Finnish European sub-population)"
    gnomadNFEONF: Float

    "gnomAD Allele frequency (Other (population not assigned) population)"
    gnomadOTH: Float
    """

  api_query = """
  query search($queryString: String!){
    search(queryString: $queryString) {
      variants{
        id
        rsId
        gnomadAFR
        gnomadAMR
        gnomadASJ
        gnomadEAS
        gnomadFIN
        gnomadNFE
        gnomadNFEEST
        gnomadNFENWE
        gnomadNFESEU
        gnomadNFEONF
        gnomadOTH
      }
    }
  }"""

  #set base_url for Open Targets Genetics Portal API
  base_url = "http://genetics-api.opentargets.io/graphql"
  #set variables object
  variables = {"queryString": rsid}

  #perform API call and check status code of response
  r = requests.post(base_url, json={'query':api_query, "variables":variables})
  if str(r.status_code) == '400':
    print(f'{rsid} query status code: {r.status_code}')
  
  else:
    pass

  #transform API response into JSON 
  api_response_as_json = json.loads(r.text)
  
  return api_response_as_json


def query_qtls(snp_list):
  """
  uses qtl_coloc_open_targets_query() function to query open targets for qtls
    given a list of snps in chr_pos_a1_a2 format (hg38)

  input: list of snps (chr_pos_a1_a2 formatted with hg38 positions)
  output: cleaned up dataframe of qtls with following columns:
  snp	gene_symbol	gene_id	type	tissue	beta	pval
  """

  total_qtls_df = pd.DataFrame()
  for snp in snp_list:
    qtl_query = qtl_coloc_open_targets_query(snp)
    qtl_query_df = pd.json_normalize(qtl_query['data']['genesForVariant'])
    if len(qtl_query_df) !=  0:
      qtl_query_df['snp'] = snp
      qtls_df = qtl_query_df.loc[qtl_query_df.qtls.map(lambda d: len(d)) > 0].reset_index(drop=True).copy()
      total_qtls_df = total_qtls_df.append(qtls_df)
  total_qtls_df = total_qtls_df.reset_index(drop=True)

  final_qtls_df = pd.DataFrame()
  for i, qtl in enumerate(total_qtls_df.qtls):
    for j, qt in enumerate(qtl):
      for k, tissue in enumerate(qt['tissues']):

        qtl_dict = {
            'snp': total_qtls_df.loc[i,'snp'],
            'gene_symbol': total_qtls_df.loc[i,'gene.symbol'],
            'gene_id': total_qtls_df.loc[i,'gene.id'],
            'type': qt['typeId'],
            'tissue': tissue['tissue']['name'],
            'beta': tissue['beta'],
            'pval': tissue['pval']
        }
        
        qtl_dict_df = pd.DataFrame.from_records([qtl_dict])
        final_qtls_df = final_qtls_df.append(qtl_dict_df).reset_index(drop=True)
        
  return final_qtls_df

In [None]:
# query coloc data given gene_id
coloc_query = gene_coloc_open_targets_query('ENSG00000188906')
coloc_query_df = pd.json_normalize(coloc_query['data']['colocalisationsForGene'])
coloc_query_df.head()

Unnamed: 0,phenotypeId,qtlStudyId,h3,h4,log2h4h3,study.studyId,study.traitReported,study.pubAuthor,leftVariant.id,tissue.id,tissue.name
0,ENSG00000160714,QUACH_2016,0.183847,0.238134,0.373264,NEALE2_2395_1,Pattern 1 | hair/balding pattern,UKB Neale v2,1_153901213_C_CA,MONOCYTE_IAV,Monocyte iav
1,ENSG00000160714,QUACH_2016,0.222941,0.116175,-0.940364,NEALE2_1050,Time spend outdoors in summer,UKB Neale v2,1_153987528_A_AAAAG,MONOCYTE_IAV,Monocyte iav
2,ENSG00000160714,QUACH_2016,0.152196,0.422189,1.471959,NEALE2_30040_raw,Mean corpuscular volume,UKB Neale v2,1_154078049_A_G,MONOCYTE_IAV,Monocyte iav
3,ENSG00000160714,QUACH_2016,0.199173,0.239907,0.268452,NEALE2_30050_raw,Mean corpuscular haemoglobin,UKB Neale v2,1_154095720_C_T,MONOCYTE_IAV,Monocyte iav
4,ENSG00000160714,QUACH_2016,0.208134,0.210694,0.017637,NEALE2_864,Number of days/week walked 10+ minutes,UKB Neale v2,1_154113167_A_T,MONOCYTE_IAV,Monocyte iav


In [None]:
# query qtls for a list of snps
# query_qtls() uses qtl_coloc_open_targets_query() for each snp id in input list and returns combined and cleaned up dataframe
test_snp_list = ['1_154453788_C_T', '1_154445939_T_C']
qtls_df = query_qtls(test_snp_list)
qtls_df.head()

Unnamed: 0,snp,gene_symbol,gene_id,type,tissue,beta,pval
0,1_154453788_C_T,UBE2Q1,ENSG00000160714,eqtl,Blood (eQTLGen),-0.047601,8.8194e-09
1,1_154453788_C_T,IL6R,ENSG00000160712,eqtl,Blood (GTEX v7),-0.106938,3.64123e-07
2,1_154453788_C_T,IL6R,ENSG00000160712,eqtl,Macrophage listeria (NEDELEC 2016),0.153303,1.44316e-06
3,1_154453788_C_T,IL6R,ENSG00000160712,eqtl,Monocyte r848 (QUACH 2016),0.119603,2.83283e-07
4,1_154453788_C_T,IL6R,ENSG00000160712,eqtl,Colon transverse (GTEX v7),-0.269436,7.8356e-06


In [None]:
# query phewas data for a given snp
phewas_variant_query = phewas_per_variant_open_targets_query('1_154453788_C_T')
phewas_variant_query_df = pd.json_normalize(phewas_variant_query['data']['pheWAS']['associations'])
phewas_variant_query_df.head()

Unnamed: 0,pval,beta,nTotal,nCases,oddsRatio,eaf,se,study.studyId,study.traitReported,study.pubAuthor
0,0.032147,0.043068,361141,5182.0,1.044009,0.409865,0.020101,NEALE2_20002_1398,"Pneumonia | non-cancer illness code, self-repo...",UKB Neale v2
1,0.00691,0.004799,350404,,,0.40984,0.001776,NEALE2_1289,Cooked vegetable intake,UKB Neale v2
2,0.024036,0.018557,117763,53389.0,1.01873,0.408367,0.008223,NEALE2_20446,Ever had prolonged feelings of sadness or depr...,UKB Neale v2
3,0.000109,0.028852,361141,41934.0,1.029273,0.409865,0.007457,NEALE2_20002_1111,"Asthma | non-cancer illness code, self-reported",UKB Neale v2
4,0.00927,0.259938,361141,207.0,1.29685,0.409865,0.099904,NEALE2_20003_1141180392,Cefalexin | treatment/medication code,UKB Neale v2


In [15]:
# query phewas data for a given gene
phewas_gene_query = phewas_per_gene_open_targets_query('ENSG00000188906')
phewas_gene_query_df = pd.json_normalize(phewas_gene_query['data']['studiesAndLeadVariantsForGene'])
phewas_gene_query_df.head()

Unnamed: 0,pval,beta,oddsRatio,direction,indexVariant.id,indexVariant.rsId,study.studyId,study.traitReported
0,2e-10,0.032,,+,12_40212861_T_C,rs1907631,GCST007131,Low density lipoprotein cholesterol levels
1,6e-15,,,,12_40227006_C_G,rs1491942,GCST001445_2,Parkinson's disease [EA]
2,9e-09,0.03,,+,12_40212861_T_C,rs1907631,GCST007134,Total cholesterol levels
3,5e-14,,1.155,+,12_40220632_C_T,rs76904798,GCST002544,Parkinson's disease
4,4.236247e-08,,0.7593,-,12_40320530_T_C,rs11564177,GCST005527,Psoriasis


In [None]:
# get hg38 chr_pos_a1_a2 and allele freqs for given rsid
rsid_query = search_rsid_open_targets_query("rs4129267")
rsid_query_df = pd.json_normalize(rsid_query['data']['search']['variants'])
rsid_query_df.head()

Unnamed: 0,id,rsId,gnomadAFR,gnomadAMR,gnomadASJ,gnomadEAS,gnomadFIN,gnomadNFE,gnomadNFEEST,gnomadNFENWE,gnomadNFESEU,gnomadNFEONF,gnomadOTH
0,1_154453788_C_T,rs4129267,0.137483,0.509434,0.451724,0.379534,0.301957,0.375928,0.340299,0.389706,0.528302,0.388941,0.331801
