<a href="https://colab.research.google.com/github/comparativechrono/QGrE_Query_Gnomad_with_rsIDs_using_Ensembl/blob/main/QGrE_Query_Gnomad_with_rsIDs_using_Ensembl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

To run this code you will require an input file called rsID.txt that contains your input rsIDs with one rsID per line. This script pulls out the allele frequencies for Gnomad Genomes for each allele associated with the rsIDs, and concatenates them in table format.


In [1]:
import requests
import pandas as pd

def get_gnomad_frequencies(rsid):
    server = "https://rest.ensembl.org"
    ext = f"/variation/human/{rsid}?pops=1"

    r = requests.get(server+ext, headers={ "Content-Type" : "application/json"})

    if not r.ok:
        r.raise_for_status()
        return None

    data = r.json()
    gnomad_frequencies = [pop for pop in data.get('populations', []) if pop['population'].startswith('gnomADg')]
    return gnomad_frequencies

def main():
    rsids = []
    with open('rsID.txt', 'r') as file:
        rsids = file.read().splitlines()

    # Initialize an empty DataFrame
    df = pd.DataFrame(columns=['rsID', 'Population', 'Allele', 'Frequency', 'Allele Count'])
    rows = []

    for rsid in rsids:
        frequencies = get_gnomad_frequencies(rsid)
        if frequencies:
            for freq in frequencies:
                rows.append({'rsID': rsid,
                            'Population': freq['population'],
                            'Allele': freq['allele'],
                            'Frequency': freq['frequency'],
                            'Allele Count': freq['allele_count']})
        else:
            rows.append({'rsID': rsid, 'Population': 'No GnomAD data available'})

    df = pd.concat([df, pd.DataFrame(rows)], ignore_index=True)

    # Save DataFrame to a CSV file
    df.to_csv('gnomad_frequencies.csv', index=False)

    return df

# Run the main function
df = main()
print(df)


     rsID   Population Allele  Frequency Allele Count
0   rs334  gnomADg:eas      T   1.000000         5198
1   rs334  gnomADg:afr      T   0.956600        39633
2   rs334  gnomADg:afr      A   0.043420         1799
3   rs334  gnomADg:mid      T   1.000000          316
4   rs334  gnomADg:ami      T   1.000000          912
5   rs334  gnomADg:oth      A   0.007177           15
6   rs334  gnomADg:oth      T   0.992800         2075
7   rs334  gnomADg:amr      A   0.005890           90
8   rs334  gnomADg:amr      T   0.994100        15190
9   rs334  gnomADg:asj      T   1.000000         3470
10  rs334  gnomADg:nfe      A   0.000162           11
11  rs334  gnomADg:nfe      T   0.999800        68027
12  rs334  gnomADg:ALL      A   0.012660         1926
13  rs334  gnomADg:ALL      T   0.987300       150250
14  rs334  gnomADg:sas      T   0.997700         4819
15  rs334  gnomADg:sas      A   0.002277           11
16  rs334  gnomADg:fin      T   1.000000        10610
