<a href="https://colab.research.google.com/github/comparativechrono/query_ensembl_for_variant_info/blob/main/Ensembl_API_request_variants.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import requests
import pandas as pd

def get_variant_info(rsid, assembly):
    if assembly == 'grch37':
        server = "https://grch37.rest.ensembl.org"
    else:
        server = "https://rest.ensembl.org"
    ext = f"/variation/human/{rsid}?"
    headers = {"Content-Type": "application/json"}

    url = server + ext
    print(f"Requesting URL: {url}")
    response = requests.get(url, headers=headers)
    if not response.ok:
        print(f"Error fetching {rsid} from {assembly}: {response.text}")
        response.raise_for_status()

    return response.json()

def extract_coordinates(mappings, assembly_name):
    for mapping in mappings:
        if mapping['assembly_name'] == assembly_name:
            return {
                'Chromosome': mapping['seq_region_name'],
                'Start': mapping['start'],
                'End': mapping['end']
            }
    return None

def get_coordinates(rsid_list):
    data = []
    for rsid in rsid_list:
        try:
            info_GRCh38 = get_variant_info(rsid, 'human')
            info_GRCh37 = get_variant_info(rsid, 'grch37')

            coords_GRCh38 = extract_coordinates(info_GRCh38.get('mappings', []), 'GRCh38')
            coords_GRCh37 = extract_coordinates(info_GRCh37.get('mappings', []), 'GRCh37')

            data.append({
                "rsID": rsid,
                "GRCh38_Chromosome": coords_GRCh38['Chromosome'] if coords_GRCh38 else None,
                "GRCh38_Start": coords_GRCh38['Start'] if coords_GRCh38 else None,
                "GRCh38_End": coords_GRCh38['End'] if coords_GRCh38 else None,
                "GRCh37_Chromosome": coords_GRCh37['Chromosome'] if coords_GRCh37 else None,
                "GRCh37_Start": coords_GRCh37['Start'] if coords_GRCh37 else None,
                "GRCh37_End": coords_GRCh37['End'] if coords_GRCh37 else None,
            })
        except Exception as e:
            data.append({
                "rsID": rsid,
                "GRCh38_Chromosome": None,
                "GRCh38_Start": None,
                "GRCh38_End": None,
                "GRCh37_Chromosome": None,
                "GRCh37_Start": None,
                "GRCh37_End": None,
                "Error": str(e)
            })

    return pd.DataFrame(data)

# List of rsIDs
rsid_list = ['rs56116432', 'rs456', 'rs789']  # Replace with your actual rsIDs

# Get coordinates
coordinates_df = get_coordinates(rsid_list)

# Display the results
print(coordinates_df)

# Save the DataFrame to a text file
output_file = "rsid_coordinates.txt"
coordinates_df.to_csv(output_file, sep='\t', index=False)
print(f"DataFrame saved to {output_file}")


Requesting URL: https://rest.ensembl.org/variation/human/rs56116432?
Requesting URL: https://grch37.rest.ensembl.org/variation/human/rs56116432?
Requesting URL: https://rest.ensembl.org/variation/human/rs456?
Requesting URL: https://grch37.rest.ensembl.org/variation/human/rs456?
Requesting URL: https://rest.ensembl.org/variation/human/rs789?
Requesting URL: https://grch37.rest.ensembl.org/variation/human/rs789?
         rsID GRCh38_Chromosome  GRCh38_Start  GRCh38_End GRCh37_Chromosome  \
0  rs56116432                 9     133256042   133256042        HG79_PATCH   
1       rs456                 7      24922800    24922800                 7   
2       rs789                 3      29355524    29355524                 3   

   GRCh37_Start  GRCh37_End  
0     136131576   136131576  
1      24962419    24962419  
2      29397015    29397015  
DataFrame saved to rsid_coordinates.txt
