In [1]:
import subprocess
from pathlib import Path
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import ticker

In [3]:
def read_depth_result(file):
    df = pd.read_csv(file, sep='\t', header=None, names=['depth'], usecols=[2])
    return df

def plot_each_position_depth(X, y, title, ax):
    mean = np.mean(y)
    std = np.std(y)
    low_cov = np.sum(y<5)
    ax.plot(X, y, label=f'mean depth :{mean:.2f}\n' + r'$\sigma$ :%.2f'%std + f'\ndepth < 5 :{low_cov}')
    ax.legend(frameon=True, shadow=True, fontsize=16, loc='upper left')
    ax.axhline(mean, color='r', ls='--', lw=2)
    ax.set_title(title, fontsize=18)

In [4]:
basepath = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo')
dirnames = {
    'Illumina': 'Illumina',
    'Denovo': 'Nanopore normal',
    'PCR': 'Nanopore PCR',
    'PCR-2': 'Nanopore PCR-2',
    'PCR-3': 'Nanopore PCR-3',
    'PCR-4': 'Nanopore PCR-4',
}
sample_names = [
    'R19-2905',
    'R20-0026',
    'R20-0030',
    'R20-0088',
    'R20-0127',
    'R20-0131',
    'R20-0140',
    'R20-0145',
    'R20-0148',
    'R20-0150',
    'R20-0158',
    'R20-0160',
]

In [5]:
for sample_name in sample_names:
    files = {}
    for dirname, title in dirnames.items():
        file = basepath/dirname/'Depth'/(sample_name + '.txt')
        if file.exists():
            files[title] = file
    num = len(files)
    fig, axes = plt.subplots(num, 1, figsize=(24, 5*num))
    for idx, (title, file) in enumerate(files.items()):
        data = read_depth_result(file)
        ax = axes[idx]
        plot_each_position_depth(data.index, data['depth'], title, ax)
        legend = ax.legend(loc='upper left', fontsize=14, handlelength=0, handletextpad=0, fancybox=True, frameon=True, framealpha=1)
        for item in legend.legendHandles:
            item.set_visible(False)
    plt.close()
    fig.savefig(basepath/'PCR_Stats'/'Coverage'/(sample_name + '.png'), bbox_inches='tight', facecolor='w')

In [None]:
import seaborn as sns

In [None]:
df = pd.read_csv('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/PCR_Stats.csv')
df

In [None]:
df = df.drop(['date', 'sample', 'barcode_number'], axis=1)

In [None]:
corr = df.corr()

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(corr, fmt='.2f', annot=True, cmap='coolwarm', vmin=-1, ax=ax)

In [None]:
fig.savefig('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/PCR_Stats/correlation.png',
            bbox_inches='tight', facecolor='w', dpi=150)