In [10]:
import pathlib
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as patch
import numpy as np

%cd -q "/home/ebertp/work/code/cubi/project-run-hgsvc-hybrid-assemblies/notebooks"
_PROJECT_CONFIG_NB = str(pathlib.Path("00_project_config.ipynb").resolve(strict=True))
_PLOT_CONFIG_NB = str(pathlib.Path("05_plot_config.ipynb").resolve(strict=True))
_ASSM_STATS_NB = str(pathlib.Path("10_assm_stats.ipynb").resolve(strict=True))

%run $_PROJECT_CONFIG_NB
%run $_PLOT_CONFIG_NB
%run $_ASSM_STATS_NB

_MYNAME="plot-scaffold-aun"
_MYSTAMP=get_nb_stamp(_MYNAME)

_MY_OUT_PATH = PLOT_OUT_MAIN_FIG1.joinpath("panels")

show_assm = "hifiasm"

if show_assm == "verkko":
    table_file = pl.Path("/home/ebertp/work/projects/hgsvc/2024_busco/busco-summary.hgsvc3-verkko.tsv")
    sample_suffix = "vrk-ps-sseq"
if show_assm == "hifiasm":
    table_file = pl.Path("/home/ebertp/work/projects/hgsvc/2024_busco/busco-summary.hifiasm-hgsvc3.tsv")
    sample_suffix = "hsm-ps-sseq"

df = pd.read_csv(table_file, sep="\t", header=0)
df = df.loc[df["odb_name"] == "primates_odb10", :].copy()


def make_busco_barchart(ax):

    main_bars = []
    add_bars = []
    colors = []
    labels = []
    
    bar_positions = np.arange(1, HGSVC_TOTAL*2+1, 1, dtype=float)
    label_positions = np.arange(1.5, HGSVC_TOTAL*2+0.5, 2, dtype=float)
    
    for sample in HGSVC_FEMALES + HGSVC_MALES:
    
        stats = df.loc[df["sample"] == f"{sample}.{sample_suffix}", :]
        
        stat_h1 = stats.loc[stats["asm_unit"] == "asm-hap1", "singleton_pct"].values[0]
        stat_h2 = stats.loc[stats["asm_unit"] == "asm-hap2", "singleton_pct"].values[0]
        if show_assm == "verkko":
            stat_un = stats.loc[stats["asm_unit"] == "asm-unassigned", "singleton_pct"].values[0]
    
        main_bars.append(stat_h1)
        main_bars.append(stat_h2)
        if show_assm == "verkko":
            if stat_h1 > stat_h2:
                add_bars.append(0)
                add_bars.append(stat_un)
            else:
                add_bars.append(stat_un)
                add_bars.append(0)
        colors.append("skyblue")
        colors.append("dodgerblue")
        labels.append(sample.split(".")[0])

    print(show_assm, np.median(np.array(main_bars, dtype=float)))
    print(show_assm, np.mean(np.array(main_bars, dtype=float)))

    ax.bar(
        bar_positions,
        main_bars,
        width=0.7,
        align="center",
        color=colors
    )
    
    if show_assm == "verkko":
        ax.bar(
            bar_positions,
            add_bars,
            width=0.7,
            align="center",
            bottom=main_bars,
            color="orange"
        )
    
    ax.set_xticks(label_positions)
    ax.set_xticklabels(labels, rotation=90)
            
    ax.set_xlabel("Sample")
    ax.set_ylabel("BUSCO single-copy genes (%)")
    
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    
    if show_assm == "verkko":
        ax.set_ylim(90,101)
        ax.set_xlim(0.25, 131.5)
    
        ax.set_yticks([90, 92, 94, 96, 98, 100])
        ax.set_yticklabels([0, 92, 94, 96, 98, 100])
        
        rect = patch.Rectangle(
            (-1,90.5), 132, 0.5,
            zorder=3
        )
        rect.set_facecolor("white")
        rect.set_edgecolor("lightgrey")
        
        ax.add_artist(rect)
        
        ax.annotate(text="", xy=(50, 101), xytext=(90, 101), arrowprops=dict(arrowstyle="<->"), zorder=3)
        ax.text(50, 100.5, s="female")
        ax.text(85, 100.5, s="male")
        ax.axvline(70.5, 0.85, 1, ls="dashed", lw=2, color="gray")
    
        leg = build_patch_legend([("H1", "skyblue"), ("H2", "dodgerblue"), ("Un", "orange")])
        ax.legend(handles=leg, ncol=2, bbox_to_anchor=(0.7, 0.84))
    
    if show_assm == "hifiasm":
        ax.set_ylim(73,101)
        ax.set_xlim(0.25, 131.5)
    
        ax.set_yticks([73, 75, 80, 85, 90, 95, 100])
        ax.set_yticklabels([0, 75, 80, 85, 90, 95, 100])
        
        rect = patch.Rectangle(
            (-1,74), 132, 0.5,
            zorder=3
        )
        rect.set_facecolor("white")
        rect.set_edgecolor("lightgrey")
        
        ax.add_artist(rect)
        
        ax.annotate(text="", xy=(50, 101), xytext=(90, 101), arrowprops=dict(arrowstyle="<->"), zorder=3)
        ax.text(43, 100.5, s="female", fontdict={"fontsize": 8})
        ax.text(90, 100.5, s="male", fontdict={"fontsize": 8})
        ax.axvline(70.5, 0.9, 1, ls="dashed", lw=2, color="gray")
    
        leg = build_patch_legend([("H1", "skyblue"), ("H2", "dodgerblue")])
        ax.legend(handles=leg, ncol=2, bbox_to_anchor=(0.72, 0.93))
    return ax


def create_busco_panel():

    fig, ax = plt.subplots(figsize=(12,6))

    ax = make_busco_barchart(ax)
    for ext in DEFAULT_PLOT_EXT:
        out_path = _MY_OUT_PATH.joinpath(f"fig1_panel_busco.{ext}")
        save_figure(out_path, fig)
    plt.close()
    return make_busco_barchart


get_busco_panel = create_busco_panel()

hifiasm 98.98
hifiasm 96.43438461538462
