# RFMix-reader publication-quality plots

This notebook demonstrates how to use the official **rfmix-reader** visualization helpers to answer concrete ancestry-analysis questions:

- Do RFMix and FLARE recover simulated global ancestry in 2- and 3-population scenarios?
- Do RFMix global ancestry estimates match STRUCTURE (AANRI) cohort estimates?
- What do local ancestry ideograms look like at the ABCA7 locus (chr19)?


In [None]:
from pathlib import Path
import importlib.util
import pandas as pd

from rfmix_reader import (
    read_rfmix,
    read_flare,
    read_simu,
    plot_global_ancestry,
    plot_ancestry_by_chromosome,
    generate_tagore_bed,
    plot_local_ancestry_tagore,
)

module_path = Path("visualization/_h/06.publication_quality_plots.py")
spec = importlib.util.spec_from_file_location("publication_quality_plots", module_path)
plots = importlib.util.module_from_spec(spec)
spec.loader.exec_module(plots)

make_simulation_global_plots = plots.make_simulation_global_plots
compare_rfmix_structure = plots.compare_rfmix_structure
plot_aanri_rfmix_overview = plots.plot_aanri_rfmix_overview
plot_abca7_local_ancestry = plots.plot_abca7_local_ancestry
maybe_plot_flare_abca7 = plots.maybe_plot_flare_abca7
tidy_global_ancestry = plots.tidy_global_ancestry
_load_structure_ancestry = plots._load_structure_ancestry


## Part 1: Simulations (RFMix vs FLARE vs truth)

The plots below answer: *Do RFMix and FLARE reproduce the simulated global ancestry distributions for each scenario?*


In [None]:
base_dir = Path("input")
figures_dir = Path("figures")

make_simulation_global_plots(
    sim_dir=base_dir / "simulations/two_populations",
    rfmix_dir=base_dir / "simulations/two_populations/_m/rfmix-out",
    flare_dir=base_dir / "simulations/two_populations/_m/flare-out",
    out_dir=figures_dir / "simulations/two_pop",
    label="two_pop",
)

make_simulation_global_plots(
    sim_dir=base_dir / "simulations/three_populations",
    rfmix_dir=base_dir / "simulations/three_populations/_m/rfmix-files",
    flare_dir=base_dir / "simulations/three_populations/_m/flare-out",
    out_dir=figures_dir / "simulations/three_pop",
    label="three_pop",
)


## Part 2: AANRI global ancestry (RFMix vs STRUCTURE)

These figures compare cohort-level ancestry distributions and individual-level agreement between methods.


In [None]:
loci_aanri, g_anc_aanri, admix_aanri = read_rfmix(
    base_dir / "aanri_data/rfmix-version/_m"
)

plot_aanri_rfmix_overview(g_anc_aanri, figures_dir / "aanri")

structure_df = _load_structure_ancestry()
merged = compare_rfmix_structure(
    g_anc_aanri,
    structure_df,
    figures_dir / "aanri",
)
merged.head()


## Part 3: Local ancestry ideograms at ABCA7 (chr19)

Pick representative individuals by global African ancestry (low/medium/high, using the IQR) and generate TAGORE-style ideograms.


In [None]:
g_anc_df = tidy_global_ancestry(g_anc_aanri)
afr_cols = [col for col in g_anc_df.columns if "afr" in col.lower()]
afr_col = afr_cols[0]

sample_ids = {
    "low": g_anc_df.sort_values(afr_col).iloc[0]["sample_id"],
    "medium": g_anc_df.sort_values(afr_col).iloc[len(g_anc_df) // 2]["sample_id"],
    "high": g_anc_df.sort_values(afr_col).iloc[-1]["sample_id"],
}

plot_abca7_local_ancestry(
    loci_aanri,
    g_anc_aanri,
    admix_aanri,
    figures_dir / "aanri",
    sample_ids,
    prefix_label="rfmix",
)

# Optional: compare with FLARE if outputs are available
maybe_plot_flare_abca7(
    base_dir / "aanri_data/flare-version/_m",
    figures_dir / "aanri",
    sample_ids,
)
