In [31]:
import re
# API handling 
from easy_entrez import EntrezAPI
# Parsing information
from easy_entrez.parsing import xml_to_string

import xml.etree.ElementTree as ET

In [78]:
# Init class to use the API
entrez_api = EntrezAPI(
    'ret_toolkit',
    'fernando.ggfigueroa@icloud.com',
    return_type='json'
)

# Fetching genes for a variant from dbSNP
ret_snp = entrez_api.fetch(['rs6311', 'rs662138'], max_results=2, database='snp')
print(ret_snp)           # <EntrezResponse status=200 for FetchQuery ['rs6311'] in snp>
print(ret_snp.data[0])   # <Element '{https://www.ncbi.nlm.nih.gov/SNP/docsum}DocumentSummary' at 0x108d22930>

<EntrezResponse status=200 for FetchQuery ['rs6311', 'rs662138'] in snp>
<Element '{https://www.ncbi.nlm.nih.gov/SNP/docsum}DocumentSummary' at 0x0000022B786BEC00>


In [74]:
print(xml_to_string(ret_snp.data[1]))

<?xml version="1.0" ?>
<ns0:DocumentSummary xmlns:ns0="https://www.ncbi.nlm.nih.gov/SNP/docsum" uid="662138">
    <ns0:SNP_ID>662138</ns0:SNP_ID>
    <ns0:ALLELE_ORIGIN/>
    <ns0:GLOBAL_MAFS>
        <ns0:MAF>
            <ns0:STUDY>1000Genomes</ns0:STUDY>
            <ns0:FREQ>G=0.121487/608</ns0:FREQ>
        </ns0:MAF>
        <ns0:MAF>
            <ns0:STUDY>ALSPAC</ns0:STUDY>
            <ns0:FREQ>G=0.193046/744</ns0:FREQ>
        </ns0:MAF>
        <ns0:MAF>
            <ns0:STUDY>Estonian</ns0:STUDY>
            <ns0:FREQ>G=0.154464/692</ns0:FREQ>
        </ns0:MAF>
        <ns0:MAF>
            <ns0:STUDY>GENOME_DK</ns0:STUDY>
            <ns0:FREQ>G=0.225/9</ns0:FREQ>
        </ns0:MAF>
        <ns0:MAF>
            <ns0:STUDY>GnomAD</ns0:STUDY>
            <ns0:FREQ>G=0.138558/19420</ns0:FREQ>
        </ns0:MAF>
        <ns0:MAF>
            <ns0:STUDY>GoNL</ns0:STUDY>
            <ns0:FREQ>G=0.162325/162</ns0:FREQ>
        </ns0:MAF>
        <ns0:MAF>
            <ns0:STUDY

In [75]:
ret_snp_xml = ET.fromstring(xml_to_string(ret_snp.data[1]))

In [76]:
maf_elements = ret_snp_xml.findall('.//{https://www.ncbi.nlm.nih.gov/SNP/docsum}MAF')
for maf in maf_elements:
    study = maf.find('{https://www.ncbi.nlm.nih.gov/SNP/docsum}STUDY').text
    freq = maf.find('{https://www.ncbi.nlm.nih.gov/SNP/docsum}FREQ').text
    print(f"Study: {study}, Frequency: {freq}")

genes_element = ret_snp_xml.find('.//{https://www.ncbi.nlm.nih.gov/SNP/docsum}GENES')
if genes_element is not None:
    gene_name = genes_element.find('{https://www.ncbi.nlm.nih.gov/SNP/docsum}GENE_E/{https://www.ncbi.nlm.nih.gov/SNP/docsum}NAME').text
    gene_id = genes_element.find('{https://www.ncbi.nlm.nih.gov/SNP/docsum}GENE_E/{https://www.ncbi.nlm.nih.gov/SNP/docsum}GENE_ID').text
    
    print(f"Gene Name: {gene_name}")
    print(f"Gene ID: {gene_id}")
else:
    print("No GENES element found in the XML.")

Study: 1000Genomes, Frequency: G=0.121487/608
Study: ALSPAC, Frequency: G=0.193046/744
Study: Estonian, Frequency: G=0.154464/692
Study: GENOME_DK, Frequency: G=0.225/9
Study: GnomAD, Frequency: G=0.138558/19420
Study: GoNL, Frequency: G=0.162325/162
Study: HapMap, Frequency: G=0.117647/192
Study: KOREAN, Frequency: G=0.001027/3
Study: MGP, Frequency: G=0.136704/73
Study: NorthernSweden, Frequency: G=0.155/93
Study: PAGE_STUDY, Frequency: G=0.117242/9226
Study: PRJEB36033, Frequency: G=0.088889/8
Study: PRJEB37584, Frequency: G=0.002525/2
Study: PRJEB37766, Frequency: G=0.291363/958
Study: Qatari, Frequency: G=0.097222/21
Study: SGDP_PRJ, Frequency: C=0.47541/58
Study: Siberian, Frequency: C=0.444444/8
Study: TOPMED, Frequency: G=0.141025/37328
Study: TWINSUK, Frequency: G=0.171521/636
Study: ALFA, Frequency: G=0.160454/13541
Gene Name: SLC22A1
Gene ID: 6580
