In [1]:
!pip -q install requests pandas
import requests, pandas as pd

In [3]:
def fetch_ensembl_variation(rsid: str) -> dict:
    url = f"https://rest.ensembl.org/variation/human/{rsid}?"
    headers = {"Content-Type": "application/json"}
    r = requests.get(url, headers=headers, timeout=20)
    r.raise_for_status()
    d = r.json()

    out = {
        "rsID": d.get("name"),
        "variant_type": d.get("var_class"),
        "most_severe_consequence": d.get("most_severe_consequence"),
        "clinical_significance_combined": ", ".join(d.get("clinical_significance", [])) if d.get("clinical_significance") else None
    }

    mappings = d.get("mappings", []) or []
    if mappings:
        m0 = mappings[0]  # first mapping
        out.update({
            "allele_string": m0.get("allele_string"),
            "chromosome": m0.get("seq_region_name"),
            "position": m0.get("start"),
            "genome_build": m0.get("assembly_name")
        })
    return out


def fetch_ensembl_vep(rsid: str) -> dict:
    url = f"https://rest.ensembl.org/vep/human/id/{rsid}?content-type=application/json"
    r = requests.get(url, timeout=20)
    r.raise_for_status()
    payload = r.json()
    if not payload:
        return {}

    v = payload[0]  # first (and usually only) result for a single rsID

    gene_symbol, consequences, impact = None, [], None
    tcs = v.get("transcript_consequences", []) or []
    if tcs:
        for tc in tcs:
            if "gene_symbol" in tc:
                gene_symbol = tc.get("gene_symbol")
                consequences = tc.get("consequence_terms", []) or []
                impact = tc.get("impact")
                break
        if gene_symbol is None and tcs:
            consequences = tcs[0].get("consequence_terms", []) or []
            impact = tcs[0].get("impact")
    else:
        consequences = v.get("consequence_terms", []) or []
        impact = v.get("impact")

    return {"gene_symbol": gene_symbol, "consequence_terms": consequences, "impact": impact}


def fetch_myvariant(rsid: str) -> dict:
    url = f"https://myvariant.info/v1/variant/{rsid}"
    r = requests.get(url, timeout=20)
    r.raise_for_status()
    d = r.json()

    af = None
    try:
        af_field = d["gnomad_exome"]["af"]
        af = af_field.get("af") if isinstance(af_field, dict) else af_field
    except Exception:
        pass

    clin_sig = None
    try:
        clin = d.get("clinvar", {})
        if isinstance(clin, dict):
            cs = clin.get("clinical_significance")
            if isinstance(cs, dict):
                clin_sig = cs.get("value")
            elif isinstance(cs, str):
                clin_sig = cs
    except Exception:
        pass

    return {"gnomad_AF": af, "clinvar_clinsig": clin_sig}


In [4]:
def triage_rule(af, consequence_terms, impact) -> str:
    rare = (af is not None) and (af < 0.01)
    protein_changing = False
    if consequence_terms:
        protein_changing = any(term in consequence_terms for term in [
            "missense_variant",
            "stop_gained",
            "frameshift_variant",
            "splice_donor_variant",
            "splice_acceptor_variant"
        ])
    impactful = (impact in ["HIGH", "MODERATE"])
    return "YES" if (rare and protein_changing and impactful) else "NO"


In [5]:
import pandas as pd

def annotate_variants(rsids) -> pd.DataFrame:
    rows = []
    for rs in rsids:
        try:
            base = fetch_ensembl_variation(rs)
            vep = fetch_ensembl_vep(rs)
            mv = fetch_myvariant(rs)

            rec = {**base, **vep, **mv}
            rec["consequence"] = ", ".join(rec.get("consequence_terms") or [])
            rec["priority"] = triage_rule(rec.get("gnomad_AF"),
                                          rec.get("consequence_terms"),
                                          rec.get("impact"))
            rows.append(rec)
        except Exception as e:
            rows.append({"rsID": rs, "error": str(e)})

    col_order = [
        "rsID", "gene_symbol", "variant_type", "allele_string",
        "chromosome", "position", "genome_build",
        "most_severe_consequence", "consequence", "impact",
        "gnomad_AF", "clinical_significance_combined", "clinvar_clinsig",
        "priority", "error"
    ]
    df = pd.DataFrame(rows)
    df = df[[c for c in col_order if c in df.columns]]
    return df


In [6]:
example_rsids = ["rs429358", "rs7412", "rs121913529"]  # APOE x2, CFTR ΔF508
df = annotate_variants(example_rsids)
df


Unnamed: 0,rsID,gene_symbol,variant_type,allele_string,chromosome,position,genome_build,most_severe_consequence,consequence,impact,gnomad_AF,clinical_significance_combined,clinvar_clinsig,priority
0,rs429358,APOE,SNP,T/C,19,44908684,GRCh38,missense_variant,missense_variant,MODERATE,0.138498,"uncertain significance, not provided, likely p...",,NO
1,rs7412,APOE,SNP,C/T,19,44908822,GRCh38,missense_variant,missense_variant,MODERATE,0.061504,"benign, likely benign, pathogenic, drug respon...",,NO
2,rs121913529,KRAS,SNP,C/A/G/T,12,25245350,GRCh38,missense_variant,missense_variant,MODERATE,,"not provided, likely pathogenic, pathogenic, a...",,NO


In [7]:
print("Enter rsIDs (one per line). Press Enter twice when done:")
user_lines = []
while True:
    try:
        line = input()
    except EOFError:
        break
    if not line.strip():
        break
    user_lines.append(line.strip())

if user_lines:
    df_user = annotate_variants(user_lines)
    display(df_user)
else:
    print("No rsIDs provided.")


Enter rsIDs (one per line). Press Enter twice when done:
rs7412



Unnamed: 0,rsID,gene_symbol,variant_type,allele_string,chromosome,position,genome_build,most_severe_consequence,consequence,impact,gnomad_AF,clinical_significance_combined,clinvar_clinsig,priority
0,rs7412,APOE,SNP,C/T,19,44908822,GRCh38,missense_variant,missense_variant,MODERATE,0.061504,"benign, likely benign, pathogenic, drug respon...",,NO


In [7]:
df.to_csv("variant_evidence_table_example.csv", index=False)
print("Saved: variant_evidence_table_example.csv")

try:
    df_user.to_csv("variant_evidence_table_user.csv", index=False)
    print("Saved: variant_evidence_table_user.csv")
except NameError:
    pass


Saved: variant_evidence_table_example.csv
