In [13]:
import pandas as pd
from clinvar_parser import enrich_clinvar_df

clinvar_df = pd.read_parquet("sampled_100.parquet")
clinvar_df = enrich_clinvar_df(clinvar_df)

In [14]:
from clingen_handler import load_clingen_validity, get_clingen_classification
clingen_df = load_clingen_validity("Clingen-Gene-Disease-Summary-2025-07-01.csv")

In [15]:
 # Tüm merge edilecek sütunları aynı tipe dönüştür
clinvar_df["ClinGen_Validity"] = clinvar_df["GENE"].apply(lambda g: get_clingen_classification(g, clingen_df))

In [19]:
import requests

def get_pubmed_ids_from_clinvar(variation_id):
    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi"
    params = {
        "dbfrom": "clinvar",
        "db": "pubmed",
        "id": variation_id,
        "retmode": "json"
    }
    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        linksets = data.get("linksets", [])
        if not linksets or "linksetdbs" not in linksets[0]:
            return []
        pmids = []
        for db in linksets[0]["linksetdbs"]:
            if db["dbto"] == "pubmed":
                pmids.extend(db["links"])
        return pmids
    except:
        return []

def build_pubmed_links(pmid_list):
    return [f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" for pmid in pmid_list]

In [24]:
clin = clinvar_df[0:50]

In [None]:
import pandas as pd

# clin örnek veri yüklüyse devam:
for i, row in clin.head(50).iterrows():
    variation_id = row["ID"]
    if pd.isna(variation_id):
        print(f"{i}. satır → ID boş, atlandı.")
        continue

    pubmed_ids = get_pubmed_ids_from_clinvar(variation_id)
    pubmed_links = build_pubmed_links(pubmed_ids)

for i, row in merged_df.iterrows():
    st.write(f"🔍 {i+1}. varyant işleniyor: {row['CHROM']}:{row['POS']} {row['REF']}>{row['ALT']}")

    pubmed_ids = get_pubmed_ids_from_clinvar(row['ID']) if not pd.isna(row['ID']) else []
    pubmed_links = build_pubmed_links(pubmed_ids)



🔍 0. satır - Variation ID: 1555617
🔗 PubMed IDs: ['28492532']
📚 PubMed Links: ['https://pubmed.ncbi.nlm.nih.gov/28492532/']

🔍 1. satır - Variation ID: 1455280
🔗 PubMed IDs: ['28492532']
📚 PubMed Links: ['https://pubmed.ncbi.nlm.nih.gov/28492532/']

🔍 2. satır - Variation ID: 451711
🔗 PubMed IDs: ['28492532']
📚 PubMed Links: ['https://pubmed.ncbi.nlm.nih.gov/28492532/']

🔍 3. satır - Variation ID: 2903273
🔗 PubMed IDs: []
📚 PubMed Links: Yok

🔍 4. satır - Variation ID: 1297461
🔗 PubMed IDs: ['25741868']
📚 PubMed Links: ['https://pubmed.ncbi.nlm.nih.gov/25741868/']

🔍 5. satır - Variation ID: 2962262
🔗 PubMed IDs: ['31536184', '28492532']
📚 PubMed Links: ['https://pubmed.ncbi.nlm.nih.gov/31536184/', 'https://pubmed.ncbi.nlm.nih.gov/28492532/']

🔍 6. satır - Variation ID: 2505191
🔗 PubMed IDs: ['25741868']
📚 PubMed Links: ['https://pubmed.ncbi.nlm.nih.gov/25741868/']

🔍 7. satır - Variation ID: 3594131
🔗 PubMed IDs: ['25741868', '20301643']
📚 PubMed Links: ['https://pubmed.ncbi.nlm.nih.

In [6]:
import requests
def get_pubmed_ids_from_clinvar(variation_id):
    url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi"
    params = {
        "dbfrom": "clinvar",
        "db": "pubmed",
        "id": variation_id,
        "retmode": "json"
    }
    response = requests.get(url, params=params)
    data = response.json()
    linksets = data.get("linksets", [])
    if not linksets or "linksetdbs" not in linksets[0]:
        return []
    pmids = []
    for db in linksets[0]["linksetdbs"]:
        if db["dbto"] == "pubmed":
            pmids.extend(db["links"])
    return pmids


In [16]:
print(get_pubmed_ids_from_clinvar(370969))

['28492532', '27391550', '26908613', '25741868', '21447391', '20816175', '19299310']


In [160]:
def get_pubmed_links(pmid_list):
    return [f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" for pmid in pmid_list]

# Örnek kullanım:
pmids = get_pubmed_ids_from_clinvar(3385321)
links = get_pubmed_links(pmids)
print(links)


['https://pubmed.ncbi.nlm.nih.gov/26666451/', 'https://pubmed.ncbi.nlm.nih.gov/25741868/', 'https://pubmed.ncbi.nlm.nih.gov/22234150/', 'https://pubmed.ncbi.nlm.nih.gov/20301590/']
