In [2]:
import pandas as pd
import requests
import time
from IPython.display import display

# == Load your gene list ==
top_genes_df = pd.read_csv("table_individual_rts.csv")  # Make sure this contains columns "Gene" and "rTS"

# == Fetch official gene symbol and description from NCBI ==
def get_gene_info(ncbi_id):
    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
    params = {"db": "gene", "id": ncbi_id, "retmode": "json"}
    try:
        response = requests.get(url, params=params, timeout=5)
        if response.status_code == 200:
            summary = response.json()["result"].get(ncbi_id)
            if summary:
                return summary.get("name", "N/A"), summary.get("description", "N/A")
    except Exception:
        pass
    return "N/A", "N/A"

# == Optional: Check if gene exists in BiGG database ==
def search_bigg_gene(symbol):
    url = f"https://bigg.ucsd.edu/search?query={symbol}"
    try:
        response = requests.get(url, timeout=5)
        if response.status_code == 200:
            return "Found" if "Genes" in response.text else "Not Found"
    except requests.exceptions.RequestException:
        return "Connection Error"
    return "Error"

# == Process gene mapping ==
mapped_results = []

for _, row in top_genes_df.iterrows():
    gene_id = row["Gene"]
    gene_id_clean = gene_id.split("_")[0]
    symbol, fullname = get_gene_info(gene_id_clean)
    # Optionally: also include BiGG check
    # bigg_status = search_bigg_gene(symbol) if symbol != "N/A" else "N/A"
    mapped_results.append({
        "Gene ID": gene_id,
        "Symbol": symbol,
        "Full Name": fullname,
    })
    time.sleep(1)  # Respect API limits

# == Create final DataFrame ==
df_mapped = pd.DataFrame(mapped_results)
df_mapped = df_mapped[["Gene ID", "Symbol", "Full Name"]]

# == Display and export ==
print("\n== Final Gene Mapping Table ==")
display(df_mapped)
df_mapped.to_csv("gene_mapping_output.csv", index=False)
print("Archive saved as gene_mapping_output.csv")



== Final Gene Mapping Table ==


Unnamed: 0,Gene ID,Symbol,Full Name
0,21_AT1,ABCA3,ATP binding cassette subfamily A member 3
1,132158_AT1,GLYCTK,glycerate kinase
2,5230_AT1,PGK1,phosphoglycerate kinase 1
3,8781_AT1,PSPHP1,phosphoserine phosphatase pseudogene 1
4,283209_AT1,PGM2L1,phosphoglucomutase 2 like 1


Archive saved as gene_mapping_output.csv


In [3]:
def get_pubmed_articles(gene_symbol, disease="Alzheimer", max_results=5):
    query = f"{gene_symbol} AND {disease}"
    
    # Step 1: Search PubMed and get article IDs
    search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    search_params = {
        "db": "pubmed",
        "term": query,
        "retmode": "json",
        "retmax": max_results
    }
    search_response = requests.get(search_url, params=search_params)
    ids = search_response.json()["esearchresult"].get("idlist", [])
    
    if not ids:
        return []

    # Step 2: Fetch summaries (titles)
    fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
    fetch_params = {
        "db": "pubmed",
        "id": ",".join(ids),
        "retmode": "json"
    }
    fetch_response = requests.get(fetch_url, params=fetch_params)
    summaries = fetch_response.json()["result"]
    
    articles = []
    for pid in ids:
        if pid in summaries:
            title = summaries[pid].get("title", "No title")
            link = f"https://pubmed.ncbi.nlm.nih.gov/{pid}/"
            articles.append({"title": title, "url": link})
    
    return articles

for symbol in df_mapped["Symbol"]:
    print(f"\n {symbol} — Articles related to Alzheimer's:")
    for art in get_pubmed_articles(symbol):
        print(f"- {art['title']}\n  {art['url']}")


 ABCA3 — Articles related to Alzheimer's:
- A-Subclass ATP-Binding Cassette Proteins in Brain Lipid Homeostasis and Neurodegeneration.
  https://pubmed.ncbi.nlm.nih.gov/22403555/
- Role of ATP-binding cassette transporters in brain lipid transport and neurological disease.
  https://pubmed.ncbi.nlm.nih.gov/17973979/
- [ABC A-subclass transporters--key regulators of molecular lipid transport].
  https://pubmed.ncbi.nlm.nih.gov/17634870/
- ABC transporters, neural stem cells and neurogenesis--a different perspective.
  https://pubmed.ncbi.nlm.nih.gov/17088897/

 GLYCTK — Articles related to Alzheimer's:

 PGK1 — Articles related to Alzheimer's:
- Glycolysis-enhancing α1-adrenergic antagonists are neuroprotective in Alzheimer's disease.
  https://pubmed.ncbi.nlm.nih.gov/40236185/
- Bioinformatic analysis of hippocampal histopathology in Alzheimer's disease and the therapeutic effects of active components of traditional Chinese medicine.
  https://pubmed.ncbi.nlm.nih.gov/39221152/
- Decip