In [6]:
import pandas as pd
import plotly.express as px
from pathlib import Path # type: ignore
import plotly.io as pio
import sys
if (module_path:=str(Path(".").absolute().resolve().parent)) not in sys.path:
    sys.path.insert(0, module_path)
from sample_info import mixture_renames, ont_dir, plotting_dir, num_amps, mixtures2drop as mixes2drop, plate_dict
heatmap_dir = plotting_dir / "coverage_depth_heatmaps"
outdir = heatmap_dir / "decon_heatmap_out"

In [2]:
def get_coverage_data():
    for plate, (background, scheme) in plate_dict.items():
        scheme = scheme.title()
        for mixture, new_name in mixture_renames.items():
            if mixture in mixes2drop:
                # print("dropping",mixture)
                continue
            df = pd.read_csv(ont_dir / f"MixedControl-{plate}-fastqs/output/coverage_stats/{mixture}_samtools_coverage.tsv", sep="\t")
            df["batch"] = f"{background}: {scheme}"
            df["mixture"] = new_name
            df = df.rename(columns={"numreads":"read_counts"})
            df["normalized_read_counts"] = df["read_counts"].apply(lambda x: x/num_amps[scheme])
            yield df
def get_coverage_df():
    df = pd.concat(d for d in get_coverage_data())
    df = df[["batch","mixture","read_counts","coverage","normalized_read_counts"]]
    return df
df = get_coverage_df()
# df.to_csv(outdir / "coverage_info.csv")
df

Unnamed: 0,batch,mixture,read_counts,coverage,normalized_read_counts
0,WB: Artic,0adgio1o2o3o4o5,769584,99.4649,7773.575758
0,WB: Artic,0adgio1,728720,98.9065,7360.808081
0,WB: Artic,o2o3o4o5,754486,98.5486,7621.070707
0,WB: Artic,0agio1o2,797075,98.8262,8051.262626
0,WB: Artic,0o5o3o4,1028126,99.6154,10385.111111
...,...,...,...,...,...
0,PWRB: Varskip,i-2,642371,94.3517,8680.689189
0,PWRB: Varskip,d-2,741515,94.4186,10020.472973
0,PWRB: Varskip,o1-2,720699,93.4655,9739.175676
0,PWRB: Varskip,o2-3,695223,93.6428,9394.905405


In [3]:
def sort_by_name(cols):
    sample_cols = []
    nfw_cols = []
    for col in cols:
        if col.startswith("NFW"):
            nfw_cols.append(col)
        else:
            sample_cols.append(col)
    return sorted(sample_cols) + nfw_cols
def getHeatmap(df,field,title=None,labels=None,title_y=0.78):
    fig_df = df[["batch","mixture",field]].pivot(index="batch",columns="mixture",values=field)
    fig_df = fig_df[sort_by_name(fig_df.columns)]
    fig = px.imshow(fig_df, title=title, labels=labels)
    fig.update_layout(title_y=title_y)
    return fig
# def getHeatmap(df,field):
#     return df[["batch","mixture",field]].pivot(index="batch",columns="mixture",values=field)

In [9]:
fig1 = getHeatmap(df,"coverage", title="Percent coverage heatmap", labels={"y":"Batch","x":"Mixture"})
pio.write_image(fig1, outdir / "percent_coverage_heatmap.jpg", width=1400, height=500, scale=2)
fig1

In [10]:
fig2 = getHeatmap(df,"normalized_read_counts", title="Normalized read counts heatmap", labels={"y":"Batch","x":"Mixture"})
pio.write_image(fig2, outdir / "normalized_read_counts_heatmap.jpg", width=1400, height=500, scale=2)
fig2