# Quering for an Specific SNPs

In [3]:
import requests
import pandas as pd

def query_gwas_catalog_associations(snp):
    """
    Query GWAS Catalog for associations of a specific SNP.
    
    Parameters:
        snp (str): The SNP ID (e.g., "rs10830962").
        
    Returns:
        dict: The JSON response from the GWAS Catalog API, or None if the query fails.
    """
    base_url = f"https://www.ebi.ac.uk/gwas/rest/api/singleNucleotidePolymorphisms/{snp}/associations?projection=associationBySnp"
    response = requests.get(base_url)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Associations for SNP {snp} not found or error in request.")
        return None

def extract_gene_trait_associations(data):
    """
    Extract gene-trait associations from the GWAS Catalog API response.
    
    Parameters:
        data (dict): The JSON response from the GWAS Catalog API.

    Returns:
        list of dict: A list of dictionaries containing gene and trait information.
    """
    associations = []  # Initialize an empty list to store associations
    
    # Check if the expected data structure exists
    if isinstance(data, dict) and '_embedded' in data:
        # Iterate over associations in the response
        for association in data['_embedded'].get('associations', []):
            # Extract traits and mapped genes
            traits = association.get('efoTraits', [])
            loci = association.get('loci', [])
            
            # Iterate through loci to retrieve gene and trait details
            for locus in loci:
                for gene in locus.get('authorReportedGenes', []):
                    for trait in traits:
                        # Handle the possibility of missing 'ensemblGeneIds'
                        ensembl_ids = gene.get('ensemblGeneIds', [])
                        ensembl_gene_id = ensembl_ids[0].get('ensemblGeneId', 'N/A') if ensembl_ids else 'N/A'
                        
                        # Append relevant details to the associations list
                        associations.append({
                            'Gene Name': gene.get('geneName', 'N/A'),
                            'Ensembl Gene ID': ensembl_gene_id,
                            'Trait': trait.get('trait', 'N/A')
                        })
    return associations

def query_multiple_snps(snps):
    """
    Query GWAS Catalog for multiple SNPs and extract gene-trait associations.
    
    Parameters:
        snps (list of str): List of SNP IDs (e.g., ["rs10830962", "rs1800795"]).
        
    Returns:
        dict: A dictionary where keys are SNP IDs and values are lists of gene-trait associations.
    """
    results = {}  # Dictionary to store results for each SNP
    for snp in snps:
        print(f"Querying SNP: {snp}...")
        result = query_gwas_catalog_associations(snp)
        if result:
            associations = extract_gene_trait_associations(result)
            results[snp] = associations
        else:
            results[snp] = []  # No associations found for this SNP
    return results

def associations_to_dataframe(results):
    """
    Convert gene-trait associations for multiple SNPs into a pandas DataFrame.

    Parameters:
        results (dict): A dictionary of SNPs and their associations.

    Returns:
        pd.DataFrame: A DataFrame with columns: SNP, Gene Name, Ensembl Gene ID, Trait.
    """
    data = []
    for snp, associations in results.items():
        if associations:
            for association in associations:
                data.append({
                    "SNP": snp,
                    "Gene Name": association['Gene Name'],
                    "Ensembl Gene ID": association['Ensembl Gene ID'],
                    "Trait": association['Trait']
                })
        else:
            data.append({
                "SNP": snp,
                "Gene Name": "N/A",
                "Ensembl Gene ID": "N/A",
                "Trait": "No associations found"
            })
    return pd.DataFrame(data)

# Main execution
if __name__ == "__main__":
    # List of SNPs to query
    snps = ["rs2624337","rs73680451","rs34129253","rs10152564","rs139154686","rs554755106","rs5748810",
            "rs143821435","rs185742121","rs142596565","rs78298311","rs1374251239","rs1124666353"]
    
    # Query GWAS Catalog and extract associations
    results = query_multiple_snps(snps)
    
    # Convert the results into a DataFrame
    associations_df = associations_to_dataframe(results)
    
    # Display the DataFrame
    print(associations_df)

In [4]:
# Main execution
if __name__ == "__main__":
    # List of SNPs to query
    snps = ["rs2624337","rs73680451","rs34129253","rs10152564","rs139154686","rs554755106","rs5748810",
            "rs143821435","rs185742121","rs142596565","rs78298311","rs1374251239","rs1124666353"]
    
    # Query GWAS Catalog and extract associations
    results = query_multiple_snps(snps)
    
    # Convert the results into a DataFrame
    associations_df = associations_to_dataframe(results)
    
    # Display the DataFrame
    #print(associations_df)

associations_df

Querying SNP: rs2624337...
Associations for SNP rs2624337 not found or error in request.
Querying SNP: rs73680451...
Associations for SNP rs73680451 not found or error in request.
Querying SNP: rs34129253...
Associations for SNP rs34129253 not found or error in request.
Querying SNP: rs10152564...
Associations for SNP rs10152564 not found or error in request.
Querying SNP: rs139154686...
Associations for SNP rs139154686 not found or error in request.
Querying SNP: rs554755106...
Associations for SNP rs554755106 not found or error in request.
Querying SNP: rs5748810...
Associations for SNP rs5748810 not found or error in request.
Querying SNP: rs143821435...
Associations for SNP rs143821435 not found or error in request.
Querying SNP: rs185742121...
Associations for SNP rs185742121 not found or error in request.
Querying SNP: rs142596565...
Associations for SNP rs142596565 not found or error in request.
Querying SNP: rs78298311...
Associations for SNP rs78298311 not found or error in re

Unnamed: 0,SNP,Gene Name,Ensembl Gene ID,Trait
0,rs2624337,,,No associations found
1,rs73680451,,,No associations found
2,rs34129253,,,No associations found
3,rs10152564,,,No associations found
4,rs139154686,,,No associations found
5,rs554755106,,,No associations found
6,rs5748810,,,No associations found
7,rs143821435,,,No associations found
8,rs185742121,,,No associations found
9,rs142596565,,,No associations found


In [6]:
import requests
import pandas as pd

def query_gwas_catalog_by_gene(gene):
    """
    Query GWAS Catalog for SNPs associated with a specific gene.
    
    Parameters:
        gene (str): The gene name (e.g., "FTO").
        
    Returns:
        dict: The JSON response from the GWAS Catalog API, or None if the query fails.
    """
    base_url = f"https://www.ebi.ac.uk/gwas/rest/api/genes/{gene}/associations?projection=associationByGene"
    response = requests.get(base_url)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Associations for gene {gene} not found or error in request.")
        return None

def extract_snp_trait_associations(data):
    """
    Extract SNP-trait associations from the GWAS Catalog API response.
    
    Parameters:
        data (dict): The JSON response from the GWAS Catalog API.

    Returns:
        list of dict: A list of dictionaries containing SNP and trait information.
    """
    associations = []  # Initialize an empty list to store associations
    
    if isinstance(data, dict) and '_embedded' in data:
        # Iterate over associations in the response
        for association in data['_embedded'].get('associations', []):
            # Extract SNPs, traits, and genes
            traits = association.get('efoTraits', [])
            snp = association.get('variant', {}).get('rsId', 'N/A')
            loci = association.get('loci', [])
            
            # Iterate through loci to retrieve SNP and trait details
            for locus in loci:
                for gene in locus.get('authorReportedGenes', []):
                    for trait in traits:
                        # Append relevant details to the associations list
                        associations.append({
                            'Gene Name': gene.get('geneName', 'N/A'),
                            'SNP': snp,
                            'Trait': trait.get('trait', 'N/A')
                        })
    return associations

def query_multiple_genes(genes):
    """
    Query GWAS Catalog for multiple genes and extract SNP-trait associations.
    
    Parameters:
        genes (list of str): List of gene names (e.g., ["FTO", "EEA1"]).
        
    Returns:
        dict: A dictionary where keys are gene names and values are lists of SNP-trait associations.
    """
    results = {}  # Dictionary to store results for each gene
    for gene in genes:
        print(f"Querying Gene: {gene}...")
        result = query_gwas_catalog_by_gene(gene)
        if result:
            associations = extract_snp_trait_associations(result)
            results[gene] = associations
        else:
            results[gene] = []  # No associations found for this gene
    return results

def associations_to_dataframe(results):
    """
    Convert SNP-trait associations for multiple genes into a pandas DataFrame.

    Parameters:
        results (dict): A dictionary of genes and their associations.

    Returns:
        pd.DataFrame: A DataFrame with columns: Gene Name, SNP, Trait.
    """
    data = []
    for gene, associations in results.items():
        if associations:
            for association in associations:
                data.append({
                    "Gene Name": association['Gene Name'],
                    "SNP": association['SNP'],
                    "Trait": association['Trait']
                })
        else:
            data.append({
                "Gene Name": gene,
                "SNP": "N/A",
                "Trait": "No associations found"
            })
    return pd.DataFrame(data)

# Main execution
if __name__ == "__main__":
    # List of genes to query
    genes = ["APOE", "APOC"]
    
    # Query GWAS Catalog and extract associations
    results = query_multiple_genes(genes)
    
    # Convert the results into a DataFrame
    associations_df = associations_to_dataframe(results)
    
    # Display the DataFrame
    print(associations_df)


Querying Gene: APOE...
Associations for gene APOE not found or error in request.
Querying Gene: APOC...
Associations for gene APOC not found or error in request.
  Gene Name  SNP                  Trait
0      APOE  N/A  No associations found
1      APOC  N/A  No associations found


In [7]:
import requests
import pandas as pd

def query_gwas_catalog_by_gene(gene):
    """
    Query GWAS Catalog for variants and associations of a specific gene.
    
    Parameters:
        gene (str): The gene name (e.g., "APOE").
        
    Returns:
        dict: The JSON response from the GWAS Catalog API, or None if the query fails.
    """
    base_url = f"https://www.ebi.ac.uk/gwas/rest/api/genes/{gene}/associations?projection=associationByGene"
    response = requests.get(base_url)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"No associations found or error in request for gene {gene}.")
        return None

def extract_snp_trait_associations_from_gene(data, gene_name):
    """
    Extract SNP-trait associations for a specific gene from the GWAS Catalog API response.
    
    Parameters:
        data (dict): The JSON response from the GWAS Catalog API.

    Returns:
        list of dict: A list of dictionaries containing SNP and trait information.
    """
    associations = []  # Initialize an empty list to store associations

    # Check if the expected data structure exists
    if isinstance(data, dict) and '_embedded' in data:
        # Iterate over associations in the response
        for association in data['_embedded'].get('associations', []):
            # Extract traits, SNPs, and associated loci
            traits = association.get('efoTraits', [])
            snp = association.get('variant', {}).get('rsId', 'N/A')
            # Append relevant details to the associations list
            for trait in traits:
                associations.append({
                    'Gene Name': gene_name,
                    'SNP': snp,
                    'Trait': trait.get('trait', 'N/A')
                })
    return associations

def query_multiple_genes(genes):
    """
    Query GWAS Catalog for multiple genes and extract SNP-trait associations.
    
    Parameters:
        genes (list of str): List of gene names (e.g., ["APOE", "FTO"]).
        
    Returns:
        dict: A dictionary where keys are gene names and values are lists of SNP-trait associations.
    """
    results = {}  # Dictionary to store results for each gene
    for gene in genes:
        print(f"Querying Gene: {gene}...")
        result = query_gwas_catalog_by_gene(gene)
        if result:
            associations = extract_snp_trait_associations_from_gene(result, gene)
            results[gene] = associations
        else:
            results[gene] = []  # No associations found for this gene
    return results

def associations_to_dataframe(results):
    """
    Convert SNP-trait associations for multiple genes into a pandas DataFrame.

    Parameters:
        results (dict): A dictionary of genes and their associations.

    Returns:
        pd.DataFrame: A DataFrame with columns: Gene Name, SNP, Trait.
    """
    data = []
    for gene, associations in results.items():
        if associations:
            for association in associations:
                data.append({
                    "Gene Name": association['Gene Name'],
                    "SNP": association['SNP'],
                    "Trait": association['Trait']
                })
        else:
            data.append({
                "Gene Name": gene,
                "SNP": "N/A",
                "Trait": "No associations found"
            })
    return pd.DataFrame(data)

# Main execution
if __name__ == "__main__":
    # List of genes to query
    genes = ["APOE"]  # You can add more genes here

    # Query GWAS Catalog and extract associations
    results = query_multiple_genes(genes)

    # Convert the results into a DataFrame
    associations_df = associations_to_dataframe(results)

    # Display the DataFrame
    print(associations_df)


Querying Gene: APOE...
No associations found or error in request for gene APOE.
  Gene Name  SNP                  Trait
0      APOE  N/A  No associations found


In [8]:
results

{'APOE': []}

In [10]:
import requests
import pandas as pd

def query_gwas_catalog_by_gene(gene):
    """
    Query GWAS Catalog for variants and associations of a specific gene.
    
    Parameters:
        gene (str): The gene name (e.g., "APOE").
        
    Returns:
        dict: The JSON response from the GWAS Catalog API, or None if the query fails.
    """
    base_url = f"https://www.ebi.ac.uk/gwas/rest/api/genes/{gene}/associations?projection=associationByGene"
    print(f"Querying URL: {base_url}")  # Debug: Print the URL being queried
    response = requests.get(base_url)
    
    # Print the raw response for debugging
    print(f"Response Status Code: {response.status_code}")
    if response.status_code == 200:
        print("Response JSON:", response.json())  # Debug: Print the API response
        return response.json()
    else:
        print(f"No associations found or error in request for gene {gene}.")
        return None

def extract_snp_trait_associations_from_gene(data, gene_name):
    """
    Extract SNP-trait associations for a specific gene from the GWAS Catalog API response.
    
    Parameters:
        data (dict): The JSON response from the GWAS Catalog API.

    Returns:
        list of dict: A list of dictionaries containing SNP and trait information.
    """
    associations = []  # Initialize an empty list to store associations

    # Check if the expected data structure exists
    if isinstance(data, dict) and '_embedded' in data:
        # Iterate over associations in the response
        for association in data['_embedded'].get('associations', []):
            # Extract traits, SNPs, and associated loci
            traits = association.get('efoTraits', [])
            snp = association.get('variant', {}).get('rsId', 'N/A')
            # Append relevant details to the associations list
            for trait in traits:
                associations.append({
                    'Gene Name': gene_name,
                    'SNP': snp,
                    'Trait': trait.get('trait', 'N/A')
                })
    return associations

def query_multiple_genes(genes):
    """
    Query GWAS Catalog for multiple genes and extract SNP-trait associations.
    
    Parameters:
        genes (list of str): List of gene names (e.g., ["APOE", "FTO"]).
        
    Returns:
        dict: A dictionary where keys are gene names and values are lists of SNP-trait associations.
    """
    results = {}  # Dictionary to store results for each gene
    for gene in genes:
        print(f"Querying Gene: {gene}...")
        result = query_gwas_catalog_by_gene(gene)
        if result:
            associations = extract_snp_trait_associations_from_gene(result, gene)
            results[gene] = associations
        else:
            results[gene] = []  # No associations found for this gene
    return results

def associations_to_dataframe(results):
    """
    Convert SNP-trait associations for multiple genes into a pandas DataFrame.

    Parameters:
        results (dict): A dictionary of genes and their associations.

    Returns:
        pd.DataFrame: A DataFrame with columns: Gene Name, SNP, Trait.
    """
    data = []
    for gene, associations in results.items():
        if associations:
            for association in associations:
                data.append({
                    "Gene Name": association['Gene Name'],
                    "SNP": association['SNP'],
                    "Trait": association['Trait']
                })
        else:
            data.append({
                "Gene Name": gene,
                "SNP": "N/A",
                "Trait": "No associations found"
            })
    return pd.DataFrame(data)

# Main execution
if __name__ == "__main__":
    # List of genes to query
    genes = ["APOE"]  # You can add more genes here

    # Query GWAS Catalog and extract associations
    results = query_multiple_genes(genes)

    # Convert the results into a DataFrame
    associations_df = associations_to_dataframe(results)

    # Display the DataFrame
    print(associations_df)


Querying Gene: APOE...
Querying URL: https://www.ebi.ac.uk/gwas/rest/api/genes/APOE/associations?projection=associationByGene
Response Status Code: 404
No associations found or error in request for gene APOE.
  Gene Name  SNP                  Trait
0      APOE  N/A  No associations found


In [13]:
import requests
from bs4 import BeautifulSoup

# URL of the search page
url = "https://www.ebi.ac.uk/gwas/search?query=APOE"

# Send a GET request
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Extract relevant data - Example: Table Rows
    rows = soup.find_all("tr")  # Find all table rows (modify selector based on structure)
    
    # Loop through rows to extract data
    for row in rows:
        columns = row.find_all("td")  # Find all table columns (cells)
        data = [col.text.strip() for col in columns]  # Extract text
        print(data)
else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")
print('ready')

ready


In [15]:
# Example of making an API call (use the observed API endpoint and parameters)
api_url = "https://www.ebi.ac.uk/gwas/api/search"  # Replace with the actual API endpoint
params = {"query": "APOE"}  # Adjust parameters as needed

response = requests.get(api_url, params=params)

if response.status_code == 200:
    data = response.json()  # Parse JSON data
    print(data)
else:
    print(f"Failed to fetch API data. Status code: {response.status_code}")


Failed to fetch API data. Status code: 400


In [16]:
import requests

# API endpoint
url = "https://www.ebi.ac.uk/gwas/api/search"

# Query parameters
params = {
    "q": '(text:"APOE" OR title:"APOE" OR synonyms:"APOE") AND -resourcename:study',
    "generalTextQuery": "true"
}

# Make the GET request
response = requests.get(url, params=params)

# Check the response status
if response.status_code == 200:
    # Parse and print the JSON data
    data = response.json()
    print(data)  # You can replace this with further processing
else:
    print(f"Error: {response.status_code}")


{'responseHeader': {'status': 0, 'QTime': 0, 'params': {'q': '(text:"APOE" OR title:"APOE" OR synonyms:"APOE") AND -resourcename:study', 'facet.field': 'resourcename', 'defType': 'edismax', 'qf': 'title^2.0 synonyms^20.0 parent^2.0 text^1.0', 'start': '0', 'rows': '1000', 'wt': 'json', 'facet': 'true'}}, 'response': {'numFound': 85, 'start': 0, 'docs': [{'resourcename': 'variant', 'id': 'variant:25796', 'title': 'APOE', 'rsID': 'APOE', 'current_rsID': 'APOE', 'merged_rsID': '', 'associationCount': 3, 'studyCount': 2, 'mappedGenes': ['intergenic'], 'consequence': 'None', 'description': 'NA:NA|NA|None|intergenic', '_version_': 1816241884060712969}, {'resourcename': 'variant', 'id': 'variant:41369768', 'title': 'APOE-E3', 'rsID': 'APOE-E3', 'current_rsID': 'APOE-E3', 'merged_rsID': '', 'associationCount': 1, 'studyCount': 1, 'mappedGenes': ['intergenic'], 'consequence': 'None', 'description': 'NA:NA|NA|None|intergenic', '_version_': 1816241887914229778}, {'resourcename': 'variant', 'id': 

In [17]:
import requests
import pandas as pd

def query_gwas_catalog_by_gene(gene):
    """
    Query GWAS Catalog for data related to a specific gene using the new API.

    Parameters:
        gene (str): The gene name (e.g., "APOE").
        
    Returns:
        list of dict: A list of SNPs, traits, and other details associated with the gene.
    """
    url = "https://www.ebi.ac.uk/gwas/api/search"
    params = {
        "q": f'(text:"{gene}" OR title:"{gene}" OR synonyms:"{gene}") AND -resourcename:study',
        "generalTextQuery": "true"
    }
    
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        data = response.json()
        return data.get("response", {}).get("docs", [])
    else:
        print(f"Error querying gene {gene}: {response.status_code}")
        return []

def extract_gene_associations(data):
    """
    Extract relevant associations from the GWAS Catalog API response.

    Parameters:
        data (list): The JSON response containing SNPs, traits, and gene information.

    Returns:
        list of dict: A list of dictionaries with extracted details.
    """
    associations = []
    
    for entry in data:
        if entry.get("resourcename") == "variant":
            associations.append({
                "Gene Name": entry.get("title", "N/A"),
                "SNP": entry.get("rsID", "N/A"),
                "Trait": entry.get("description", "N/A"),
                "Study Count": entry.get("studyCount", "N/A"),
                "Association Count": entry.get("associationCount", "N/A")
            })
    return associations

def query_and_extract_for_genes(genes):
    """
    Query GWAS Catalog for multiple genes and extract associations.

    Parameters:
        genes (list of str): List of gene names to query.

    Returns:
        pd.DataFrame: A DataFrame containing gene, SNP, and trait associations.
    """
    all_associations = []
    
    for gene in genes:
        print(f"Querying Gene: {gene}...")
        data = query_gwas_catalog_by_gene(gene)
        associations = extract_gene_associations(data)
        all_associations.extend(associations)
    
    return pd.DataFrame(all_associations)

# Main execution
if __name__ == "__main__":
    # List of genes to query
    genes = ["APOE"]  # Replace with your genes of interest
    
    # Query GWAS Catalog and extract associations
    associations_df = query_and_extract_for_genes(genes)
    
    # Display the DataFrame
    print(associations_df)


Querying Gene: APOE...
      Gene Name          SNP  \
0          APOE         APOE   
1       APOE-E3      APOE-E3   
2       APOE-E4      APOE-E4   
3      rs429358     rs429358   
4        rs7412       rs7412   
5      rs769449     rs769449   
6      rs769450     rs769450   
7      rs769455     rs769455   
8   rs115299243  rs115299243   
9      rs440446     rs440446   
10     rs769452     rs769452   
11     rs769448     rs769448   
12  rs367866106  rs367866106   
13     rs439401     rs439401   
14     rs445925     rs445925   
15    rs1065853    rs1065853   
16     rs438811     rs438811   
17    rs7259620    rs7259620   
18   rs75627662   rs75627662   
19   rs10414043   rs10414043   
20    rs7256200    rs7256200   
21     rs483082     rs483082   
22     rs769446     rs769446   
23  rs376097536  rs376097536   
24    rs1081106    rs1081106   
25   rs59325138   rs59325138   
26  rs138235833  rs138235833   
27     rs449647     rs449647   
28    rs1081105    rs1081105   
29     rs390082  

In [18]:
associations_df

Unnamed: 0,Gene Name,SNP,Trait,Study Count,Association Count
0,APOE,APOE,NA:NA|NA|None|intergenic,2,3
1,APOE-E3,APOE-E3,NA:NA|NA|None|intergenic,1,1
2,APOE-E4,APOE-E4,NA:NA|NA|None|intergenic,1,1
3,rs429358,rs429358,19:44908684|19q13.32|Missense variant|APOE,616,633
4,rs7412,rs7412,19:44908822|19q13.32|Missense variant|APOE,553,601
5,rs769449,rs769449,19:44906745|19q13.32|Non coding transcript exo...,101,112
6,rs769450,rs769450,19:44907187|19q13.32|Non coding transcript exo...,27,29
7,rs769455,rs769455,19:44908783|19q13.32|Missense variant|APOE,8,9
8,rs115299243,rs115299243,19:44907291|19q13.32|Non coding transcript exo...,3,4
9,rs440446,rs440446,19:44905910|19q13.32|Missense variant|APOE,16,16
