In [76]:
import requests
import json
import pandas as pd
import time

In [101]:
def request_gene_variants(gene_id, gnomad_version='gnomad_r2_1',reference_genome='GRCh37'):
    QUERY = """
    query ($geneId: String!, $refGenome: ReferenceGenomeId!, $dataSet:DatasetId!) {
      gene(gene_id: $geneId, reference_genome: $refGenome) {
        gencode_symbol
        gene_id
        gnomad_constraint {
            oe_lof
            oe_lof_lower
            oe_lof_upper
            oe_mis
            oe_mis_lower
            oe_mis_upper
        }
        variants(dataset: $dataSet) {
          variant_id
          chrom
          pos
          ref
          alt
          rsids
          exome {
            ac
            an
          }
          genome {
            ac
            an
          }
          transcript_consequence {
            hgvsp
            major_consequence
            is_canonical
            transcript_id
            polyphen_prediction
            sift_prediction
            lof
            lof_filter
            lof_flags
            }
          lof_curation {
            verdict
            flags
          }
          flags
        }
        clinvar_variants {
          hgvsp
          major_consequence
          review_status
          clinical_significance
        }
      }
    }
    """
    VARIABLES = {
      "geneId": gene_id,
      "refGenome": reference_genome,
      "dataSet": gnomad_version
    }

    response = requests.post(
       "https://gnomad.broadinstitute.org/api",
       data=json.dumps({
          "query": QUERY,
           "variables": VARIABLES
       }),
       headers={
          "Content-Type": "application/json",
       },
    )
    
    return response

In [106]:
gpcr_targets = pd.read_csv('../data/Ensembl_gene_sequences/Ensembl_Grch37_gpcr_genome_locations.csv')

target_gene_symbols = gpcr_targets['HGNC symbol']
target_gene_ids = gpcr_targets['Ensembl id GRCh37']

gnomad_variants_r2_1_GRCh37 = []
for gene_symbol, gene_id in zip(target_gene_symbols,target_gene_ids):
    print('Fetching gnomad annotations for {}'.format(gene_id))
    response = request_gene_variants(gene_id)
    while response.status_code != 200:
        print()
        time.sleep(10) # Limit request rate to avoid rate limit
        response = request_gene_variants(gene_id)
    gnomad_variants_r2_1_GRCh37.append(response.json())

Fetching gnomad annotations for ENSG00000213088
Fetching gnomad annotations for ENSG00000144648
Fetching gnomad annotations for ENSG00000144476
Fetching gnomad annotations for ENSG00000129048
Fetching gnomad annotations for ENSG00000078549
Fetching gnomad annotations for ENSG00000197177
Fetching gnomad annotations for ENSG00000020181
Fetching gnomad annotations for ENSG00000152990
Fetching gnomad annotations for ENSG00000181790
Fetching gnomad annotations for ENSG00000121753
Fetching gnomad annotations for ENSG00000135298
Fetching gnomad annotations for ENSG00000111452
Fetching gnomad annotations for ENSG00000180264
Fetching gnomad annotations for ENSG00000174837
Fetching gnomad annotations for ENSG00000127507





Fetching gnomad annotations for ENSG00000131355
Fetching gnomad annotations for ENSG00000123146
Fetching gnomad annotations for ENSG00000153292
Fetching gnomad annotations for ENSG00000164393
Fetching gnomad annotations for ENSG00000173567
Fetching gnomad annotations for ENS

In [109]:
 with open('../data/gnomAD_population_variants/gnomad_r2_1_GRCh37_raw_data.json', 'w') as outfile:
    json.dump(gnomad_variants_r2_1_GRCh37, outfile)