In [4]:
import pandas as pd
import pathlib as pl
import numpy as np
import collections as col

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

wd = pl.Path("/home/ebertp/work/projects/hgsvc/2023_verkko_sseq_qc")

complete = col.Counter()
file_collect = col.defaultdict(dict)
for npfile in wd.glob("*.npy"):
    sample, tig, _, atype, _ = npfile.name.split(".")
    complete[(sample, tig)] += 1
    file_collect[(sample, tig)][atype] = npfile
    
region_spec = pd.read_csv(wd.joinpath("issue_regions.spec.tsv"), sep="\t", header=0)

plot_ranges = [
    ("roi_start", "roi_end"),
    ("focus_start", "focus_end")
]

for row in region_spec.itertuples():
    if complete[(row.sample, row.unitig)] < 2:
        continue
    sample = row.sample
    tig = row.unitig
    plain_cov = np.load(file_collect[(sample, tig)]["plain"])
    smooth_cov = np.load(file_collect[(sample, tig)]["smooth"])
    
    out_file = wd.joinpath("plots", f"{sample}.{tig}.hificov.pdf")
    if out_file.is_file():
        continue
    out_file.parent.mkdir(exist_ok=True, parents=True)
    
    bp_start = row.bp_start
    bp_end = row.bp_end
    full_x_range = np.arange(plain_cov.size, dtype=int)
    
    with PdfPages(out_file) as pdf:
        for plot_start, plot_end in plot_ranges:

            region_start = getattr(row, plot_start)
            region_end = getattr(row, plot_end)
            cov_range = plain_cov[region_start:region_end]
            smooth_range = smooth_cov[region_start:region_end]

            x_vals = full_x_range[region_start:region_end]
            x_ticks = x_vals[np.mod(x_vals, 2e5) == 0]
            x_ticklabels = (x_ticks / 1e6).round(1)

            fig, ax = plt.subplots(figsize=(20, 4))

            signal_color = "green" if "roi" in plot_start else "blue"

            ax.plot(
                x_vals,
                cov_range,
                c=signal_color,
                ls="dotted",
                alpha=0.5,
                label="Coverage"

            )

            ax.plot(
                x_vals,
                smooth_range,
                c="darkgrey",
                ls="solid",
                alpha=1,
                label="(Smoothed)"
            )

            ax.vlines([bp_start, bp_end], 0, 25, color="black")

            ax.set_xticks(x_ticks)
            ax.set_xticklabels(x_ticklabels, fontsize=12)

            from_bp = round(region_start/1e6, 2)
            to_bp = round(region_end/1e6, 2)
            ax.set_xlabel(f"{sample} / {tig}: {from_bp} Mbp to {to_bp} Mbp (HPC)", fontsize=14)
            ax.set_ylabel("HiFi read coverage", fontsize=14)
            ax.spines["top"].set_visible(False)
            ax.spines["right"].set_visible(False)
            ax.legend(loc="best")
            
            pdf.savefig(bbox_inches="tight")
            plt.close()
