In [None]:
# Copyright 2020 Gentaro Shigita (gentaro.shigita@tum.de)
# https://github.com/edgardomortiz/Captus

# This file is part of Captus. Captus is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version. Captus is distributed in
# the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details. You should have received a copy of the GNU General Public License along with Captus. If
# not, see <http://www.gnu.org/licenses/>.

# Captus-assembly: Clean (QC Report)

In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import ticker

In [None]:
# matplotlib's settings
plt.rcParams["font.family"] = "Arial" # Font
plt.rcParams["legend.fancybox"] = False # Legend box
plt.rcParams["legend.edgecolor"] = "black" # Legend box margin color
mpl.rcParams["figure.dpi"] = 300
cmap = "Spectral_r"

---
## Stats on Reads

In [None]:
df = pd.read_table("03_qc_extras/reads_bases.tsv")
colors = ["#E69F004C", "#009E734C", "#0072B24C", "#CC79A74C"]
df["reads_passed_round1_%"] = df["reads_passed_round1"] / df["reads_input"]
df["bases_passed_round1_%"] = df["bases_passed_round1"] / df["bases_input"]
df["reads_passed_round2_%"] = df["reads_passed_round2"] / df["reads_input"]
df["bases_passed_round2_%"] = df["bases_passed_round2"] / df["bases_input"]
df["reads_passed_cleaning_%"] = df["reads_passed_cleaning"] / df["reads_input"]
df["bases_passed_cleaning_%"] = df["bases_passed_cleaning"] / df["bases_input"]

df_read = df.reindex(
    columns = [
        "sample", "reads_input", 
        "reads_passed_round1", "reads_passed_round1_%",
        "reads_passed_round2", "reads_passed_round2_%", 
        "reads_passed_cleaning", "reads_passed_cleaning_%"
    ]
).sort_values("reads_passed_cleaning", ascending=False)

df_base = df.reindex(
    columns = [
        "sample", "bases_input", 
        "bases_passed_round1", "bases_passed_round1_%", 
        "bases_passed_round2", "bases_passed_round2_%", 
        "bases_passed_cleaning", "bases_passed_cleaning_%"
    ]
).sort_values("bases_passed_cleaning", ascending=False)

format_dict = {
    "reads_input": "{0:,.0f}",
    "bases_input": "{0:,.0f}",
    "reads_passed_round1": "{0:,.0f}",
    "bases_passed_round1": "{0:,.0f}",
    "reads_passed_round2": "{0:,.0f}",
    "bases_passed_round2": "{0:,.0f}",
    "reads_passed_cleaning": "{0:,.0f}",
    "bases_passed_cleaning": "{0:,.0f}",
    "reads_passed_round1_%": "{:.2%}",
    "bases_passed_round1_%": "{:.2%}",
    "reads_passed_round2_%": "{:.2%}",
    "bases_passed_round2_%": "{:.2%}",
    "reads_passed_cleaning_%": "{:.2%}",
    "bases_passed_cleaning_%": "{:.2%}",
}
(
df_read.style
    .set_properties(subset=["sample"], **{"font-weight": "bold"})
    .format(format_dict)
    .bar(subset=["reads_input"], color=colors[0])
    .bar(subset=["reads_passed_round1", "reads_passed_round1_%"], color=colors[1])
    .bar(subset=["reads_passed_round2", "reads_passed_round2_%"], color=colors[2])
    .bar(subset=["reads_passed_cleaning", "reads_passed_cleaning_%"], color=colors[3])
    .hide_index()
)

---
## Stats on Bases

In [None]:
(
df_base.style
    .set_properties(subset=['sample'], **{'font-weight': 'bold'})
    .format(format_dict)
    .bar(subset=['bases_input'], color=colors[0])
    .bar(subset=['bases_passed_round1', 'bases_passed_round1_%'], color=colors[1])
    .bar(subset=['bases_passed_round2', 'bases_passed_round2_%'], color=colors[2])
    .bar(subset=['bases_passed_cleaning', 'bases_passed_cleaning_%'], color=colors[3])
    .hide_index()
)

---
## Per Base Quality

In [None]:
df = pd.read_table("03_qc_extras/per_base_seq_qual.tsv")
sample_list = df["sample_name"].unique()
fig, ax = plt.subplots(nrows=len(sample_list), ncols=2, 
                       sharex="row", sharey="all", 
                       figsize=(15, len(sample_list) * 2.4), 
                       tight_layout=True)
  
for i in range(len(sample_list)):
    data = df[df["sample_name"]==sample_list[i]]
    ax[i,0].plot("base", "mean", 
                 data=data[(data["read"] == "R1") & (data["stage"] == "before")], 
                 color="red")
    ax[i,0].plot("base", "mean", 
                 data=data[(data["read"] == "R1") & (data["stage"] == "after")], 
                 color="blue")
    ax[i,0].fill_between("base", "upper_quartile", "lower_quartile", 
                         data=data[(data["read"] == "R1") & (data["stage"] == "before")], 
                         color="red", alpha=0.2, label="before")
    ax[i,0].fill_between("base", "upper_quartile", "lower_quartile", 
                         data=data[(data["read"] == "R1") & (data["stage"] == "after")], 
                         color="blue", alpha=0.2, label="after")
    ax[i,0].plot("base", "percentile_10", 
                 data=data[(data["read"] == "R1") & (data["stage"] == "before")], 
                 color="red", linestyle="dotted")
    ax[i,0].plot("base", "percentile_10", 
                 data=data[(data["read"] == "R1") & (data["stage"] == "after")], 
                 color="blue", linestyle="dotted")
    ax[i,0].plot("base", "percentile_90", 
                 data=data[(data["read"] == "R1") & (data["stage"] == "before")], 
                 color="red", linestyle="dotted")
    ax[i,0].plot("base", "percentile_90", 
                 data=data[(data["read"] == "R1") & (data["stage"] == "after")], 
                 color="blue", linestyle="dotted")
    
    # Background colors
    ax[i,0].axhspan( 0, 20, color="#CC79A7", alpha=0.2)
    ax[i,0].axhspan(20, 28, color="#F0E442", alpha=0.2)
    ax[i,0].axhspan(28, 41, color="#009E73", alpha=0.2)
    
    if len(data[(data["read"] == "R2") & (data["stage"] == "before")]) == 0:
        ax[i,1].axis("off")
    else:
        ax[i,1].plot("base", "mean", 
                     data=data[(data["read"] == "R2") & (data["stage"] == "before")], 
                     color="red")
        ax[i,1].plot("base", "mean", 
                     data=data[(data["read"] == "R2") & (data["stage"] == "after")], 
                     color="blue")
        ax[i,1].fill_between("base", "upper_quartile", "lower_quartile", 
                             data=data[(data["read"] == "R2") & (data["stage"] == "before")], 
                             color="red", alpha=0.2)
        ax[i,1].fill_between("base", "upper_quartile", "lower_quartile", 
                             data=data[(data["read"] == "R2") & (data["stage"] == "after")], 
                             color="blue", alpha=0.2)
        ax[i,1].plot("base", "percentile_10", 
                     data=data[(data["read"] == "R2") & (data["stage"] == "before")], 
                     color="red", linestyle="dotted")
        ax[i,1].plot("base", "percentile_10", 
                     data=data[(data["read"] == "R2") & (data["stage"] == "after")], 
                     color="blue", linestyle="dotted")
        ax[i,1].plot("base", "percentile_90", 
                     data=data[(data["read"] == "R2") & (data["stage"] == "before")], 
                     color="red", linestyle="dotted")
        ax[i,1].plot("base", "percentile_90", 
                     data=data[(data["read"] == "R2") & (data["stage"] == "after")], 
                     color="blue", linestyle="dotted")
        ax[i,1].grid()
        ax[i,1].axhspan(0, 20, color="#CC79A7", alpha=0.2)
        ax[i,1].axhspan(20, 28, color="#F0E442", alpha=0.2)
        ax[i,1].axhspan(28, 41, color="#009E73", alpha=0.2)

    # Settings
    ax[i,0].xaxis.set_major_locator(ticker.MultipleLocator(10))
    ax[i,0].set_title(sample_list[i], loc="left", fontweight="bold")
    ax[i,0].set_ylabel("PHRED Score")
    ax[i,0].set_xlim(1, data["base"].max())
    ax[i,0].grid()
    
ax[0,0].set_title("Read 1", fontsize=10)
ax[0,1].set_title("Read 2", fontsize=10)
ax[i,0].set_xlabel("Position in Read (bp)")
ax[i,1].set_xlabel("Position in Read (bp)")
ax[i,0].set_ylim(0, 41)

# Legend
legend_elements = [
    mpl.lines.Line2D([0], [0], color="b", label="Mean (Before)"),
    mpl.lines.Line2D([0], [0], color="r", label="Mean (After)"),
    mpl.patches.Patch(facecolor="b", alpha=0.2, edgecolor="b", 
                      label="Inter-quartile range (Before)"), 
    mpl.patches.Patch(facecolor="r", alpha=0.2, edgecolor="r", 
                      label="Inter-quartile range (After)"), 
    mpl.lines.Line2D([0], [0], color="b", label="10%, 90% Percentile (Before)", 
                     linestyle="dotted"),
    mpl.lines.Line2D([0], [0], color="r", label="10%, 90% Percentile (After)", 
                     linestyle="dotted"),
]
legend = fig.legend(handles=legend_elements, bbox_to_anchor=(0.51, 1), 
                    title="Stage", loc="lower center", ncol=3)

plt.savefig("03_qc_extras/per_base_seq_qual.pdf", 
            bbox_extra_artists=(legend,), bbox_inches="tight")

---
## Read Length Distribution

In [None]:
df=pd.read_table("03_qc_extras/seq_len_dist.tsv")
sample_list = df["sample_name"].unique()
fig, ax = plt.subplots(nrows=len(sample_list), ncols=2, 
                       sharex="row", sharey="row", 
                       figsize=(15, len(sample_list) * 2.4), 
                       tight_layout=True)

for i in range(len(sample_list)):
    data = df[df["sample_name"] == sample_list[i]]
    read_length = data["length"].max()

    ax[i,0].plot("length", "count", 
                 data=data[(data.read == "R1") & (data.stage == "after")], 
                 color="blue")
    ax[i,0].axvspan(0, read_length * 0.5, color="#CC79A7", alpha=0.2)
    ax[i,0].axvspan(read_length * 0.5, read_length * 0.8, color="#F0E442", alpha=0.2)
    ax[i,0].axvspan(read_length * 0.8, read_length, color="#009E73", alpha=0.2)
    
    if len(data[(data.read == "R2") & (data.stage == "after")]) == 0:
        ax[i,1].axis("off")
    else:
        ax[i,1].plot("length", "count", 
                     data=data[(data.read == "R2") & (data.stage == "after")], 
                     color="blue")

    ax[i,1].axvspan(0, read_length * 0.5, color="#CC79A7", alpha=0.2)
    ax[i,1].axvspan(read_length * 0.5, read_length * 0.8, color="#F0E442", alpha=0.2)
    ax[i,1].axvspan(read_length * 0.8, read_length, color="#009E73", alpha=0.2)
    ax[i,0].set_yscale("log")
    ax[i,0].xaxis.set_major_locator(ticker.MultipleLocator(10))
    ax[i,0].grid()
    ax[i,1].grid()
    ax[i,0].set_xlim(data["length"].min(), data["length"].max())
    ax[i,0].set_title(sample_list[i], loc="left", fontweight="bold")
    ax[i,0].set_ylabel("Count")

ax[0,0].set_title("Read 1", fontsize=10)
ax[0,1].set_title("Read 2", fontsize=10)
ax[i,0].set_xlabel("Read Length (bp)")
ax[i,1].set_xlabel("Read Length (bp)")

plt.savefig("03_qc_extras/seq_len_dist.pdf")

---
## Per Read Quality

In [None]:
df = pd.read_table("03_qc_extras/per_seq_qual_scores.tsv")
sample_list = df["sample_name"].unique()
data = df.pivot(index=["sample_name", "read", "stage"], 
                columns="quality", values="count")
fig, ax = plt.subplots(nrows=len(sample_list), ncols=2, 
                       sharex="row", sharey="all", 
                       figsize=(15, len(sample_list) * 1.1), 
                       constrained_layout=True)

for i in range(len(sample_list)):
    max_qual = df[df["sample_name"] == sample_list[i]]["quality"].max()
    a = ax[i,0].imshow(data.loc[sample_list[i], "R1", :].dropna(how="all", axis=1).fillna(0).apply(                             lambda x:x/sum(x), axis=1), aspect="auto", interpolation="nearest", 
                       origin="lower", extent=[1, max_qual, -0.5, 1.5], cmap=cmap, vmin=0, vmax=1)
    
    if len(data.loc[sample_list[i], "R2", :]) == 0:
        ax[i,1].axis("off")
    else:
        ax[i,1].imshow(data.loc[sample_list[i], "R2", :].dropna(how="all", axis=1).fillna(0).apply(
                       lambda x:x/sum(x), axis=1), aspect="auto", interpolation="nearest", 
                       origin="lower", extent=[1, max_qual, -0.5, 1.5], cmap=cmap, vmin=0, vmax=1)

    ax[i,0].grid(which="minor", color="black")
    ax[i,1].grid(which="minor", color="black")
    ax[i,0].set_title(sample_list[i], loc="left", fontweight="bold")
    ax[i,0].tick_params(axis="y", which="minor", length=0)
    ax[i,1].tick_params(axis="y", which="minor", length=0)

ylabs = ["After", "Before"]
ax[i,0].set_yticks(np.arange(len(ylabs)))
ax[i,0].set_yticklabels(ylabs)
# Minor ticks
ax[i,0].set_yticks(np.arange(-.5, 1.5, 1), minor=True)
    
ax[0,0].set_title("Read 1", fontsize=10)
ax[0,1].set_title("Read 2", fontsize=10)
ax[i,0].set_xlabel("Mean PHRED Score")
ax[i,1].set_xlabel("Mean PHRED Score")
cax = fig.add_axes([0.41, 1 + 0.5 / (i * 1.1), 0.2, 0.15 / (i * 1.1)])
cbar = plt.colorbar(a, cax=cax, orientation="horizontal")
cbar.set_label("Frequency")

plt.savefig("03_qc_extras/per_seq_qual_scores.pdf", 
            bbox_extra_artists=(cax,), bbox_inches="tight")

---
## Per Base Nucleotide Content

In [None]:
df = pd.read_table("03_qc_extras/per_base_seq_content.tsv")
sample_list = df["sample_name"].unique()
nucleotides = ["C", "G", "T", "After - A", "C", "G", "T", "Before - A"]
fig, ax = plt.subplots(nrows=len(sample_list), ncols=2, 
                       sharex="row", sharey="all", 
                       figsize=(15, len(sample_list) * 1.7), 
                       constrained_layout=True)
for i in range(len(sample_list)):
    data = df[(df["sample_name"] == sample_list[i]) & (df["read"] == "R1")]
    read_length = data["base"].max()
    before = data[(data["stage"] == "before")].reindex(
                  columns=["base", "A", "T", "G", "C"]).set_index("base")
    after = data[(data["stage"] == "after")].reindex(
                 columns=["base", "A", "T", "G", "C"]).set_index("base")
    d = pd.concat([before, after], axis=1)
    a = ax[i,0].imshow(d.T, aspect="auto", interpolation="nearest", 
                       origin="upper", vmin=0, vmax=100, 
                       extent=[1, read_length, -0.5, 7.5], cmap=cmap)
    data = df[(df["sample_name"] == sample_list[i]) & (df["read"] == "R2")]
    before = data[(data["stage"] == "before")].reindex(
                  columns=["base", "A", "T", "G", "C"]).set_index("base")
    after = data[(data["stage"] == "after")].reindex(
                 columns=["base", "A", "T", "G", "C"]).set_index("base")
    d = pd.concat([before, after], axis=1)
    
    if len(data)==0:
        ax[i,1].axis("off")
    else:
        ax[i,1].imshow(d.T, aspect="auto", interpolation="nearest", 
                       origin="upper", vmin=0, vmax=100, 
                       extent=[1, read_length, -0.5, 7.5], cmap=cmap)
    
    ax[i,0].set_title(sample_list[i], loc="left", fontweight="bold")
    ax[i,0].xaxis.set_major_locator(ticker.MultipleLocator(10))
    ax[i,0].grid(which="minor", color="black")
    ax[i,1].grid(which="minor", color="black")
    ax[i,0].tick_params(axis="y", which="minor", length=0)
    ax[i,1].tick_params(axis="y", which="minor", length=0)

ax[0,0].set_yticks(np.arange(len(nucleotides)))
ax[0,0].set_yticklabels(nucleotides)
ax[0,0].set_title("Read 1", fontsize=10)
ax[0,1].set_title("Read 2", fontsize=10)
ax[i,0].set_xlabel("Position in Read (bp)")
ax[i,1].set_xlabel("Position in Read (bp)")

# Minor ticks
ax[0,0].set_yticks(np.arange(-.5, 7.5, 4), minor=True)
cax = fig.add_axes([0.414, 1+0.5/(i*1.7), 0.2, 0.15/(i*1.7)])
cbar = plt.colorbar(a, cax=cax, orientation="horizontal")
cbar.set_label("Nucleotide Content (%)")

plt.savefig("03_qc_extras/per_base_seq_content.pdf", 
            bbox_extra_artists=(cax,), bbox_inches="tight")

---
## Per Read GC Content

In [None]:
df=pd.read_table("03_qc_extras/per_seq_gc_content.tsv")
sample_list = df["sample_name"].unique()
data = df.pivot(index=["sample_name", "read", "stage"], columns="gc_content", 
                values="count").fillna(0).apply(lambda x:x/sum(x), axis=1)
fig, ax = plt.subplots(nrows=len(sample_list), ncols=2, 
                       sharex="all", sharey="all", 
                       figsize=(15, len(sample_list) * 0.95), 
                       constrained_layout=True)

for i in range(len(sample_list)):
    a = ax[i,0].imshow(data.loc[sample_list[i], "R1", :], 
                       aspect="auto", interpolation="nearest", 
                       origin="lower", cmap=cmap, vmin=0, vmax=0.1)
    
    if len(data.loc[sample_list[i], "R2", :]) == 0:
        ax[i,1].axis("off")
    else:
        ax[i,1].imshow(data.loc[sample_list[i], "R2", :], 
                       aspect="auto", interpolation="nearest", 
                       origin="lower", cmap=cmap, vmin=0, vmax=0.1)
    
    ax[i,0].grid(which="minor", color="black")
    ax[i,1].grid(which="minor", color="black")
    ax[i,0].tick_params(axis="y", which="minor", length=0)
    ax[i,1].tick_params(axis="y", which="minor", length=0)
    ax[i,0].set_title(sample_list[i], loc="left", fontweight="bold")

ax[0,0].set_title("Read 1", fontsize=10)
ax[0,1].set_title("Read 2", fontsize=10)
ax[i,0].set_xlabel("GC Content (%)")
ax[i,1].set_xlabel("GC Content (%)")
ylabs = ["After", "Before"]
ax[0,0].set_yticks(np.arange(len(ylabs)))
ax[0,0].set_yticklabels(ylabs)

# Minor ticks
ax[0,0].set_yticks(np.arange(-.5, 1.5, 1), minor=True)

cax = fig.add_axes([0.41, 1 + 0.5 / (i * 0.95), 0.2, 0.15/(i * 0.95)])
cbar = plt.colorbar(a, cax=cax, orientation="horizontal", extend="max")
cbar.set_label("Frequency")

plt.savefig("03_qc_extras/per_seq_gc_content.pdf", 
            bbox_extra_artists=(cax,), bbox_inches="tight")

---
## Sequence Duplication Level

In [None]:
df=pd.read_table("03_qc_extras/seq_dup_levels.tsv")
sample_list = df["sample_name"].unique()
dup_lev_list = df["duplication_level"].unique()
colors = plt.get_cmap("Spectral_r", 16)
fig, ax = plt.subplots(nrows=len(sample_list), ncols=2, 
                       sharex="all", sharey="all", 
                       figsize=(15, len(sample_list)), 
                       tight_layout=True)
left=0

for i in range(len(sample_list)):
    data_R1 = df[(df["sample_name"] == sample_list[i]) & (df["read"] == "R1")]
    left_R1 = 0
    
    for j in range(len(dup_lev_list)):
        ax[i,0].barh("stage", "percentage_of_total", 
                     data=data_R1[data_R1["duplication_level"] == dup_lev_list[j]], 
                     height=1, left=left_R1, color=colors(j))
        left_R1 += (data_R1[data_R1["duplication_level"] == 
                    dup_lev_list[j]]["percentage_of_total"].values)
    
    data_R2 = df[(df["sample_name"] == sample_list[i]) & (df["read"] == "R2")]
    left_R2 = 0    
    
    if len(data_R2)==0:
            ax[i,1].axis("off")
    else:
        for j in range(len(dup_lev_list)):
            ax[i,1].barh("stage", "percentage_of_total", 
                         data=data_R2[data_R2["duplication_level"] == dup_lev_list[j]], 
                         height=1, left=left_R2, color=colors(j))
            left_R2 += (data_R2[data_R2["duplication_level"] == 
                        dup_lev_list[j]]["percentage_of_total"].values)
    
    ax[i,0].set_title(sample_list[i], loc="left", fontweight="bold")
    ax[i,0].set_xlim(0, 100)
    ax[i,0].set_ylim(-0.5, 1.5)
    ax[i,0].invert_yaxis()
    ax[i,0].grid(which="minor", color="black")
    ax[i,1].grid(which="minor", color="black")
    ax[i,0].tick_params(axis="y", which="minor", length=0)
    ax[i,1].tick_params(axis="y", which="minor", length=0)

ax[0,0].set_title("Read 1", fontsize=10)
ax[0,1].set_title("Read 2", fontsize=10)
ax[i,0].set_xlabel("Percentage (%)")
ax[i,1].set_xlabel("Percentage (%)")
ax[0,0].set_yticks(np.arange(2))
ax[0,0].set_yticklabels(["Before", "After"])

# Minor ticks
ax[0,0].set_yticks(np.arange(-.5, 1.5, 1), minor=True)

# Legend
legend = fig.legend(dup_lev_list, bbox_to_anchor=(0.51, 1), loc="lower center", title="Duplication Level", ncol=8)

plt.savefig("03_qc_extras/seq_dup_levels.pdf", 
            bbox_extra_artists=(legend,), bbox_inches="tight")

---
## Adaptor Content

In [None]:
df = pd.read_table("03_qc_extras/adaptor_content.tsv")
df["adapter_content"] = df.iloc[:,4:].sum(axis="columns")
sample_list = df["sample_name"].unique()
data = df.pivot(index=["sample_name", "read", "stage"], 
                columns="position", values="adapter_content")
fig, ax = plt.subplots(nrows=len(sample_list), ncols=2, 
                       sharex="row", sharey="all", 
                       figsize=(15, len(sample_list) * 1.1), 
                       constrained_layout=True)

for i in range(len(sample_list)):
    read_length = df[df["sample_name"] == sample_list[i]]["position"].max()
    a = ax[i,0].imshow(data.loc[sample_list[i], "R1", :].dropna(how="all", axis=1), 
                       aspect="auto", interpolation="nearest", origin="lower", 
                       vmin=0, vmax=10, extent=[1, read_length, -0.5, 1.5], cmap=cmap)
    
    if len(data.loc[sample_list[i], "R2", :]) == 0:
        ax[i,1].axis("off")
    else:
        ax[i,1].imshow(data.loc[sample_list[i], "R2", :].dropna(how="all", axis=1), 
                       aspect="auto", interpolation="nearest", origin="lower", 
                       vmin=0, vmax=10, extent=[1, read_length, -0.5, 1.5], cmap=cmap)
    
    ax[i,0].grid(which="minor", color="black")
    ax[i,1].grid(which="minor", color="black")
    ax[i,0].tick_params(axis="y", which="minor", length=0)
    ax[i,1].tick_params(axis="y", which="minor", length=0)
    ax[i,0].set_title(sample_list[i], loc="left", fontweight="bold")
    ax[i,0].xaxis.set_major_locator(ticker.MultipleLocator(10))

ylabs = ["After", "Before"]
ax[0,0].set_yticks(np.arange(len(ylabs)))
ax[0,0].set_yticklabels(ylabs)
ax[0,0].set_title("Read 1", fontsize=10)
ax[0,1].set_title("Read 2", fontsize=10)
ax[i,0].set_xlabel("Position in Read (bp)")
ax[i,1].set_xlabel("Position in Read (bp)")

# Minor ticks
ax[0,0].set_yticks(np.arange(-.5, 1.5, 1), minor=True)
cax = fig.add_axes([0.41, 1 + 0.5 / (i * 1.1), 0.2, 0.15 / (i * 1.1)])
cbar = plt.colorbar(a, cax=cax, orientation="horizontal", extend="max")
cbar.set_label("Cumulative Adaptor Content (%)")

plt.savefig("03_qc_extras/adaptor_content.pdf", 
            bbox_extra_artists=(cax,), bbox_inches="tight")