# Coverage depth heatmaps

In [1]:
# run this with conda env: conda/env-plot
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
from pathlib import Path
# from freyja_plot import FreyjaPlotter
# import subprocess
# import vcf
import plotly.express as px
import researchpy as rp
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
import numpy as np

In [3]:
heatmap_dir = Path(os.curdir).resolve()
benchmark_dir = heatmap_dir.parents[1]
ont_dir = benchmark_dir / "ont"
outdir = heatmap_dir / "depth_heatmap_out"
outdir.mkdir(exist_ok=True)
# quality_csv = outdir / "quality_stats.csv"
mixture_renames = {'Mixture01': '0ADGIO1O2O3O4O5', 'Mixture02': '0ADGIO1', 'Mixture03': 'O2O3O4O5','Mixture04': '0AGIO1O2', 'Mixture05': '0O5O3O4', 'Mixture06': 'ADGIO1O2O3','Mixture07': 'AGIO3O4O5', 'Mixture08': 'O1O2O3O4O5', 'Mixture09': '0','Mixture10': 'O1O2', 'Mixture11': 'O3', 'Mixture12': 'O5','Mixture13': 'O4', 'Mixture14': '0-2', 'Mixture15': 'A', 'Mixture16': 'G', 'Mixture17': 'I', 'Mixture18': 'D', 'Mixture19': 'O1', 'Mixture20': 'O2', 'Mixture21': '0-3','Mixture22': 'O3-2', 'Mixture23': 'O3-3', 'Mixture24': 'O5-2', 'Mixture25': 'O5-3', 'Mixture26': 'O4-2', 'Mixture27': 'O4-3', 'Mixture28': 'O2-2', 'Mixture29': 'O2O3O4O5-2', 'Mixture30': 'O2O3O4O5-3', 'Mixture31': '0ADGIO1-2', 'Mixture32': '0AIO1O2O3O4O5', 'Mixture33': '0-4', 'Mixture34': 'A-2','Mixture35': 'G-2', 'Mixture36': 'I-2', 'Mixture37': 'D-2', 'Mixture38': 'O1-2', 'Mixture39': 'O2-3', 'Mixture40': 'O3-4','Mixture41': 'O5-4', 'Mixture42': 'O4-4'}
mixture_renames = {m:n.lower() for m,n in mixture_renames.items()}
plate_dict = {
    "05-05-23-A41": ("Control","artic"), "05-16-23-A41": ("Neg Spike-in","artic"), "06-26-23-A41": ("Pos Spike-in","artic"),
    "05-05-23-V2": ("Control","varskip"), "06-16-23-V2": ("Neg Spike-in","varskip"), "07-12-23-V2A": ("Pos Spike-in","varskip"),
}


In [4]:
def rename_mixtures(df):
    df["mixture"] = df["mixture"].apply(lambda x: mixture_renames.get(x,x))
    return df

def get_quality_df(quality_csv):
    df = pd.read_csv(quality_csv)
    df["mean_depth"] = df["mean_depth"].astype(float)
    df = rename_mixtures(df)
    return df

def get_read_counts_df_by_plate(plate):
    read_counts_csv = ont_dir / f"MixedControl-{plate}-fastqs/output/read_counts.tsv"
    df = pd.read_csv(read_counts_csv, sep="\t")
    df["read_counts"] = df["read_counts"].astype(float)
    df = df.rename(columns={"sample":"mixture"})
    df = rename_mixtures(df)
    return df
def get_read_counts_df(plates):
    return pd.concat((get_read_counts_df_by_plate(plate) for plate in plates))

In [5]:
def sort_by_name(cols):
    sample_cols = []
    nfw_cols = []
    for col in cols:
        if col.startswith("NFW"):
            nfw_cols.append(col)
        else:
            sample_cols.append(col)
    return sorted(sample_cols) + nfw_cols
def getHeatmap(df,field,title=None,labels=None,title_y=0.7):
    fig_df = df[["batch","mixture",field]].pivot(index="batch",columns="mixture",values=field)
    fig_df = fig_df[sort_by_name(fig_df.columns)]
    fig = px.imshow(fig_df, title=title, labels=labels)
    fig.update_layout(title_y=title_y)
    return fig

In [6]:
def gen_mean_coverage_df(bed_directory, name):
    for bed in bed_directory.glob("*.bed.gz"):
        plate = bed.parents[2].name.replace("MixedControl-","").replace("-fastqs","")
        batch = plate_dict[plate]
        df = pd.read_csv(bed, sep="\t", header=None, compression="gzip", names=["chrom","start","end",name,"mean_coverage"])
        df["plate"] = plate
        # df["batch"], df["primers"] = batch
        df["batch"] = ": ".join(batch)
        df["mixture"] = bed.stem.split(".",1)[0]
        df["mean_coverage"] = df["mean_coverage"].astype(float)
        yield df
def get_mean_coverage_df(bed_directory, name):
    return pd.concat(gen_mean_coverage_df(bed_directory, name))

def get_normalized_df(region_type, read_counts_df):
    df = pd.concat((get_mean_coverage_df(d, region_type) for d in ont_dir.glob(f"*/output/mosdepth-{region_type}s")))
    df = rename_mixtures(df)
    df = df.merge(read_counts_df, on=["plate","mixture"])
    df["normalized_mean_coverage"] = df["mean_coverage"] / df["read_counts"] * 1000000
    return df

In [7]:
read_counts_df = get_read_counts_df(plate_dict.keys())
read_counts_df

Unnamed: 0,mixture,read_counts,plate
0,0adgio1o2o3o4o5,769584.0,05-05-23-A41
1,0adgio1,728720.0,05-05-23-A41
2,o2o3o4o5,754486.0,05-05-23-A41
3,0agio1o2,797075.0,05-05-23-A41
4,0o5o3o4,1028126.0,05-05-23-A41
...,...,...,...
39,o3-4,622205.0,07-12-23-V2A
40,o5-4,679895.0,07-12-23-V2A
41,o4-4,806279.0,07-12-23-V2A
42,NFWA,215.0,07-12-23-V2A


In [7]:
gene_df = get_normalized_df("gene", read_counts_df)
gene_df

Unnamed: 0,chrom,start,end,gene,mean_coverage,plate,batch,mixture,read_counts,normalized_mean_coverage
0,MN908947.3,1,265,5'UTR,11450.99,05-05-23-A41,Control: artic,0adgio1o2o3o4o5,769584.0,14879.454355
1,MN908947.3,1,29903,whole genome,10277.24,05-05-23-A41,Control: artic,0adgio1o2o3o4o5,769584.0,13354.279715
2,MN908947.3,266,13468,ORF1a,9907.51,05-05-23-A41,Control: artic,0adgio1o2o3o4o5,769584.0,12873.851327
3,MN908947.3,13468,21555,ORF1b,11301.66,05-05-23-A41,Control: artic,0adgio1o2o3o4o5,769584.0,14685.414458
4,MN908947.3,21563,25384,S,8537.23,05-05-23-A41,Control: artic,0adgio1o2o3o4o5,769584.0,11093.304954
...,...,...,...,...,...,...,...,...,...,...
3691,MN908947.3,27394,27759,ORF7a,6.11,07-12-23-V2A,Pos Spike-in: varskip,NFWC,6611.0,924.217214
3692,MN908947.3,27894,28259,ORF8,6.20,07-12-23-V2A,Pos Spike-in: varskip,NFWC,6611.0,937.830888
3693,MN908947.3,28274,29533,N,11.52,07-12-23-V2A,Pos Spike-in: varskip,NFWC,6611.0,1742.550295
3694,MN908947.3,29558,29674,ORF10,10.23,07-12-23-V2A,Pos Spike-in: varskip,NFWC,6611.0,1547.420965


In [28]:
genes_of_interest = ["S","whole genome"]
# genes_of_interest = gene_df["gene"].unique()
for gene in genes_of_interest:
    getHeatmap(gene_df[gene_df["gene"] == gene], "mean_coverage", title=f"Mean Depth of Coverage: {gene.title() + (' Gene' if 'whole' not in gene else '')}", labels={"y":"Batch","x":"Mixture"}, title_y=.75).show()

# do this for whole genome and S
# order NFW at the end

In [32]:
box_df = gene_df[~gene_df["gene"].isin(["whole genome"])]
px.box(
    box_df, x="gene", y="mean_coverage", color="batch", hover_data=["mixture"],
    title="Mean Depth Distribution by Gene",
    ).update_layout(yaxis_title="Mean depth",
).show()
px.box(
    box_df, x="gene", y="normalized_mean_coverage", color="batch", hover_data=["mixture"],
    title="Normalized Mean Depth Distribution by Gene",
    ).update_layout(yaxis_title="Mean depth per million reads",
).show()

In [36]:
px.box(
    box_df, y="mean_coverage", color="gene", hover_data=["mixture","batch"],
    points="all",
    title="Mean Depth Distribution by Gene",    
    ).update_layout(yaxis_title="Mean depth",
).show()
px.box(
    box_df, y="normalized_mean_coverage", color="gene", hover_data=["mixture","batch"],
    points="all",
    title="Normalized Mean Depth Distribution by Gene",    
    ).update_layout(yaxis_title="Mean depth per million reads",
).show()

In [35]:
px.box(
    box_df, y="mean_coverage", x="gene", hover_data=["mixture","gene"],
    points="all",
    title="Mean Depth Distribution by Gene",    
    ).update_layout(yaxis_title="Mean depth",
).show()
px.box(
    box_df, y="normalized_mean_coverage", x="gene", hover_data=["mixture","gene"],
    points="all",
    title="Normalized Mean Depth Distribution by Gene",    
    ).update_layout(yaxis_title="Mean depth per million reads",
).show()

In [37]:
px.box(
    box_df, y="mean_coverage", color="batch", hover_data=["mixture"],
    points="all",
    title="Mean Depth Distribution by Gene",
    ).update_layout(yaxis_title="Mean depth",
).show()
px.box(
    box_df, y="normalized_mean_coverage", color="batch", hover_data=["mixture"],
    points="all",
    title="Normalized Mean Depth Distribution by Gene",
    ).update_layout(yaxis_title="Mean depth per million reads",
).show()

In [38]:
px.box(
    box_df, y="mean_coverage", x="batch", hover_data=["mixture","batch"],
    points="all",
    title="Mean Depth Distribution by Gene",    
    ).update_layout(yaxis_title="Mean depth",
).show()
px.box(
    box_df, y="normalized_mean_coverage", x="batch", hover_data=["mixture","batch"],
    points="all",
    title="Normalized Mean Depth Distribution by Gene",    
    ).update_layout(yaxis_title="Mean depth per million reads",
).show()

In [None]:
gene_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3696 entries, 0 to 3695
Data columns (total 10 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   chrom                     3696 non-null   object 
 1   start                     3696 non-null   int64  
 2   end                       3696 non-null   int64  
 3   gene                      3696 non-null   object 
 4   mean_coverage             3696 non-null   float64
 5   plate                     3696 non-null   object 
 6   batch                     3696 non-null   object 
 7   mixture                   3696 non-null   object 
 8   read_counts               3696 non-null   float64
 9   normalized_mean_coverage  3696 non-null   float64
dtypes: float64(3), int64(2), object(5)
memory usage: 288.9+ KB


In [40]:
anova = rp.anova("mean_coverage ~ C(gene) + C(batch) + C(gene):C(batch)", data=box_df)
desc, table = anova.results()
print(desc)
table



 Note: Effect size values for factors are partial. 


                          0
Number of obs =   3432.0000
Root MSE =        3677.9028
R-squared =          0.7125
Adj R-squared =      0.7059


Unnamed: 0,Source,Sum of Squares,Degrees of Freedom,Mean Squares,F value,p-value,Eta squared,Epsilon squared,Omega squared
0,Model,112436079436.6069,77.0,1460208823.852,107.948,0.0,0.7125,0.7059,0.7058
1,,,,,,,,,
2,gene,65819565644.9034,12.0,5484963803.742,405.4836,0.0,0.592,0.2936,0.5858
3,batch,25957384221.341705,5.0,5191476844.2683,383.7871,0.0,0.3639,0.1409,0.358
4,gene:batch,20659129570.3349,60.0,344318826.1722,25.4542,0.0,0.3129,0.1112,0.2995
5,,,,,,,,,
6,Residual,45369455072.315,3354.0,13526969.312,,,,,
7,Total,157805534508.89487,3431.0,45994035.1235,,,,,


In [41]:
anova.regression_table()

Unnamed: 0,mean_coverage,Coef.,Std. Err.,t,p-value,95% Conf. Interval
0,Intercept,922.3489,554.4647,1.6635,0.0963,"[-164.7743, 2009.472]"
1,gene,,,,,
2,3'UTR,(reference),,,,
3,5'UTR,5308.3952,784.1315,6.7698,0.0,"[3770.9709, 6845.8196]"
4,E,11316.5784,784.1315,14.432,0.0,"[9779.1541, 12854.0028]"
5,M,11835.4577,784.1315,15.0937,0.0,"[10298.0334, 13372.8821]"
6,N,12829.377,784.1315,16.3613,0.0,"[11291.9527, 14366.8014]"
7,ORF10,275.685,784.1315,0.3516,0.7252,"[-1261.7393, 1813.1093]"
8,ORF1a,9217.6384,784.1315,11.7552,0.0,"[7680.2141, 10755.0628]"
9,ORF1b,10914.4107,784.1315,13.9191,0.0,"[9376.9863, 12451.835]"


In [42]:
# same as researchpy anova
model = ols("mean_coverage ~ C(gene) + C(batch) + C(gene):C(batch)", data=gene_df).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(gene),65843630000.0,13.0,388.075798,0.0
C(batch),27055080000.0,5.0,414.595847,0.0
C(gene):C(batch),20859170000.0,65.0,24.58837,3.47531e-236
Residual,47141310000.0,3612.0,,


In [43]:
# test that above results are valid (by checking assumptions)
# Non-significant results indicate normality
stats.shapiro(model.resid)

ShapiroResult(statistic=0.8347792705243073, pvalue=3.2814181885089186e-52)

In [44]:
groups =  gene_df["batch"].unique()
print(groups)
grouped_data = [gene_df.loc[gene_df["batch"] == gene, "mean_coverage"].tolist() for gene in gene_df["batch"].unique()]
# grouped_data

['Control: artic' 'Control: varskip' 'Neg Spike-in: artic'
 'Neg Spike-in: varskip' 'Pos Spike-in: artic' 'Pos Spike-in: varskip']


In [45]:
mc = stats.multicomp.tukeyhsd(grouped_data)
res = mc.tukeyhsd()
print(res.summary())

AttributeError: module 'scipy.stats' has no attribute 'multicomp'

In [None]:
# tukey test for x="gene"
# from scipy.stats import tukey_hsd


res = stats.tukey_hsd(*grouped_data)
print(res)
# conf = res.confidence_interval(confidence_level=.99)
# conf

Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
Comparison  Statistic  p-value  Lower CI  Upper CI
 (0 - 1)  -7838.985     0.000 -9450.723 -6227.247
 (0 - 2)   -449.140     0.969 -2060.878  1162.597
 (0 - 3)  -8005.602     0.000 -9617.340 -6393.864
 (0 - 4)   -888.333     0.617 -2500.070   723.405
 (0 - 5)  -9357.743     0.000-10969.480 -7746.005
 (1 - 0)   7838.985     0.000  6227.247  9450.723
 (1 - 2)   7389.845     0.000  5778.107  9001.583
 (1 - 3)   -166.617     1.000 -1778.355  1445.121
 (1 - 4)   6950.653     0.000  5338.915  8562.390
 (1 - 5)  -1518.757     0.078 -3130.495    92.980
 (2 - 0)    449.140     0.969 -1162.597  2060.878
 (2 - 1)  -7389.845     0.000 -9001.583 -5778.107
 (2 - 3)  -7556.462     0.000 -9168.200 -5944.724
 (2 - 4)   -439.192     0.971 -2050.930  1172.546
 (2 - 5)  -8908.602     0.000-10520.340 -7296.864
 (3 - 0)   8005.602     0.000  6393.864  9617.340
 (3 - 1)    166.617     1.000 -1445.121  1778.355
 (3 - 2)   7556.462     0.000  5

In [None]:
conf = res.confidence_interval(confidence_level=.99)
conf

ConfidenceInterval(low=array([[ -1902.97331583,  -9741.95849408,  -2352.11364603,
         -9908.57562585,  -2791.30588428, -11260.71585094],
       [  5936.01186242,  -1902.97331583,   5486.87153222,
         -2069.5904476 ,   5047.67929397,  -3421.73067269],
       [ -1453.83298563,  -9292.81816388,  -1902.97331583,
         -9459.43529565,  -2342.16555408, -10811.57552075],
       [  6102.62899419,  -1736.35618406,   5653.48866399,
         -1902.97331583,   5214.29642574,  -3255.11354092],
       [ -1014.64074738,  -8853.62592563,  -1463.78107758,
         -9020.2430574 ,  -1902.97331583, -10372.38328249],
       [  7454.76921928,   -384.21595897,   7005.62888909,
          -550.83309074,   6566.43665083,  -1902.97331583]]), high=array([[ 1902.97331583, -5936.01186242,  1453.83298563, -6102.62899419,
         1014.64074738, -7454.76921928],
       [ 9741.95849408,  1902.97331583,  9292.81816388,  1736.35618406,
         8853.62592563,   384.21595897],
       [ 2352.11364603, -5486.

In [None]:
conf = res.confidence_interval(confidence_level=.99)
for ((i, j), l) in np.ndenumerate(conf.low):
    # filter out self comparisons
    if i != j:
        h = conf.high[i,j]
        print(f"({i} - {j}) {l:>6.3f} {h:>6.3f}")

(0 - 1) -9741.958 -5936.012
(0 - 2) -2352.114 1453.833
(0 - 3) -9908.576 -6102.629
(0 - 4) -2791.306 1014.641
(0 - 5) -11260.716 -7454.769
(1 - 0) 5936.012 9741.958
(1 - 2) 5486.872 9292.818
(1 - 3) -2069.590 1736.356
(1 - 4) 5047.679 8853.626
(1 - 5) -3421.731 384.216
(2 - 0) -1453.833 2352.114
(2 - 1) -9292.818 -5486.872
(2 - 3) -9459.435 -5653.489
(2 - 4) -2342.166 1463.781
(2 - 5) -10811.576 -7005.629
(3 - 0) 6102.629 9908.576
(3 - 1) -1736.356 2069.590
(3 - 2) 5653.489 9459.435
(3 - 4) 5214.296 9020.243
(3 - 5) -3255.114 550.833
(4 - 0) -1014.641 2791.306
(4 - 1) -8853.626 -5047.679
(4 - 2) -1463.781 2342.166
(4 - 3) -9020.243 -5214.296
(4 - 5) -10372.383 -6566.437
(5 - 0) 7454.769 11260.716
(5 - 1) -384.216 3421.731
(5 - 2) 7005.629 10811.576
(5 - 3) -550.833 3255.114
(5 - 4) 6566.437 10372.383


In [None]:
# # test for homogeneity of variance
# stats.levene(grouped_data)

In [None]:
rp.summary_cont(gene_df["read_counts"])





Unnamed: 0,Variable,N,Mean,SD,SE,95% Conf.,Interval
0,read_counts,3432.0,632426.1326,222912.9704,3805.0617,624965.7169,639886.5482


In [None]:
rp.summary_cont(gene_df["mean_coverage"].groupby(gene_df["gene"]))





Unnamed: 0_level_0,N,Mean,SD,SE,95% Conf.,Interval
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3'UTR,264,2035.0911,2584.4095,159.0594,1721.8992,2348.2831
5'UTR,264,3759.5708,5344.5964,328.9372,3111.8853,4407.2563
E,264,10860.9824,4721.4481,290.585,10288.8132,11433.1516
M,264,14675.8644,6972.8612,429.1499,13830.8574,15520.8713
N,264,15695.2571,5925.2204,364.6721,14977.2086,16413.3056
ORF10,264,5785.2706,6214.2505,382.4607,5032.196,6538.3453
ORF1a,264,9452.6795,3244.8793,199.7085,9059.4485,9845.9106
ORF1b,264,10374.4262,3428.0638,210.9827,9958.9959,10789.8565
ORF3a,264,10591.8746,4339.5955,267.0836,10065.9803,11117.769
ORF6,264,16274.8966,6567.8769,404.2249,15478.9677,17070.8255


In [None]:
rp.summary_cont(gene_df["mean_coverage"].groupby(gene_df["batch"]))





Unnamed: 0_level_0,N,Mean,SD,SE,95% Conf.,Interval
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Control: artic,572,9614.5105,5678.4975,237.4299,9148.1679,10080.853
Control: varskip,572,11665.3307,5568.2043,232.8183,11208.0459,12122.6155
Neg Spike-in: artic,572,5508.9308,3580.0454,149.6892,5214.9221,5802.9395
Neg Spike-in: varskip,572,9714.995,5159.3532,215.7234,9291.2868,10138.7032
Pos Spike-in: artic,572,11020.5416,8015.9678,335.1645,10362.236,11678.8472
Pos Spike-in: varskip,572,14688.4691,7997.8634,334.4075,14031.6503,15345.2879


In [None]:
gene_df.describe()

Unnamed: 0,start,end,mean_coverage,read_counts,normalized_mean_coverage
count,3432.0,3432.0,3432.0,3432.0,3432.0
mean,21804.307692,24082.307692,10368.796291,632426.1,16464.958043
std,10097.50336,8061.977357,6781.890232,222913.0,10362.006983
min,1.0,265.0,0.0,12.0,0.0
25%,21563.0,25384.0,5773.3475,486266.8,11206.596601
50%,26523.0,27191.0,10590.95,652862.5,16574.370926
75%,27894.0,28259.0,14271.88,768660.0,22201.094386
max,29675.0,29903.0,44438.66,1409051.0,138333.333333


In [8]:
amplicon_df = get_normalized_df("amplicon", read_counts_df)
amplicon_df

Unnamed: 0,chrom,start,end,amplicon,mean_coverage,plate,batch,mixture,read_counts,normalized_mean_coverage
0,MN908947.3,25,431,SARS-CoV-2_1,15126.15,05-05-23-A41,Control: artic,0adgio1o2o3o4o5,769584.0,19654.969438
1,MN908947.3,324,727,SARS-CoV-2_2,15197.27,05-05-23-A41,Control: artic,0adgio1o2o3o4o5,769584.0,19747.383002
2,MN908947.3,644,1044,SARS-CoV-2_3,14201.22,05-05-23-A41,Control: artic,0adgio1o2o3o4o5,769584.0,18453.112331
3,MN908947.3,944,1362,SARS-CoV-2_4,12142.12,05-05-23-A41,Control: artic,0adgio1o2o3o4o5,769584.0,15777.510967
4,MN908947.3,1245,1650,SARS-CoV-2_5,9317.56,05-05-23-A41,Control: artic,0adgio1o2o3o4o5,769584.0,12107.268342
...,...,...,...,...,...,...,...,...,...,...
29827,MN908947.3,28190,28598,SARS-CoV-2_95,3.95,07-12-23-V2A,Pos Spike-in: varskip,NFWC,6611.0,597.489033
29828,MN908947.3,28512,28914,SARS-CoV-2_96,10.92,07-12-23-V2A,Pos Spike-in: varskip,NFWC,6611.0,1651.792467
29829,MN908947.3,28827,29227,SARS-CoV-2_97,16.17,07-12-23-V2A,Pos Spike-in: varskip,NFWC,6611.0,2445.923461
29830,MN908947.3,29136,29534,SARS-CoV-2_98,14.10,07-12-23-V2A,Pos Spike-in: varskip,NFWC,6611.0,2132.808955


In [12]:
px.box(
    amplicon_df, y="mean_coverage", x="amplicon", hover_data=["mixture","amplicon"],
    points="all",
    title="Mean Depth Distribution by Amplicon",    
    ).update_layout(yaxis_title="Mean depth",
).show()
# px.box(
#     amplicon_df, y="normalized_mean_coverage", x="amplicon", hover_data=["mixture","amplicon"],
#     points="all",
#     title="Normalized Mean Depth Distribution by Amplicon",    
#     ).update_layout(yaxis_title="Mean depth per million reads",
# ).show()

In [None]:
# for amplicon in amplicon_df["amplicon"].unique():
#     getHeatmap(amplicon_df[amplicon_df["amplicon"] == amplicon], "normalized_mean_coverage", title=f"Mean Depth (per 1M reads) Distribution for {amplicon}", labels={"y":"Batch","x":"Mixture"}, title_y=.75).show()