## import sys
from uuid import uuid4
from pathlib import Path
from tempfile import TemporaryFile
from concurrent.futures import ProcessPoolExecutor
import pandas as pd
import numpy as np
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

In [2]:
sys.path.append('../src')

from utils import run_cmd

In [None]:
def calculate_each_position_depth(asm, raw, out, preset='map-ont', threads=8):
    if preset not in {'map-ont', 'ava-ont', 'asm5', 'sr'}:
        raise ValueError(f"Variable {preset} not support.")
    run_cmd(f"zcat -f {raw} | "\
            f"minimap2 -t {threads} -K 8G -ax {preset} {asm} - | "\
            f"samtools sort -@ {threads} --reference {asm} -O BAM - | "\
            f"samtools depth -a -@ {threads} - -o {out}")

# Illumina WGS

In [18]:
asm_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Unicycler/Contigs')
raw_dir = Path('/media/Central_Lab_Storage/MinION/20200925/fastq/illumina')
out_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Illumina/Depth')

In [19]:
sample_names = [
    'R19-2905',
    'R20-0026',
    'R20-0030',
    'R20-0088',
    'R20-0127',
    'R20-0131',
    'R20-0140',
    'R20-0145',
    'R20-0148',
    'R20-0150',
    'R20-0158',
    'R20-0160',
]

In [20]:
with ProcessPoolExecutor(6) as executor:
    for sample_name in sample_names:
        asm = asm_dir/(sample_name + '.fa')
        raw = raw_dir/(sample_name + '*')
        out = out_dir/(sample_name + '.txt')
        executor.submit(calculate_each_position_depth, asm, raw, out, 'sr')

# Nanopore WGS

In [21]:
asm_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Unicycler/Contigs')
raw_dir = Path('/media/ONT/20200925/Barcode')
out_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Denovo/Depth')

In [22]:
dir_map = {
    'R19-2905': 'barcode01',
    'R20-0026': 'barcode02',
    'R20-0030': 'barcode03',
    'R20-0088': 'barcode04',
    'R20-0127': 'barcode05',
    'R20-0131': 'barcode06',
    'R20-0140': 'barcode07',
    'R20-0145': 'barcode08',
    'R20-0148': 'barcode09',
    'R20-0150': 'barcode10',
    'R20-0158': 'barcode11',
    'R20-0160': 'barcode12',
}

In [23]:
with ProcessPoolExecutor(8) as executor:
    for sample_name, dirname in dir_map.items():
        asm = asm_dir/(sample_name + '.fa')
        raw = raw_dir/dirname/'*.fastq'
        out = out_dir/(sample_name + '.txt')
        executor.submit(calculate_each_position_depth, asm, raw, out)

# Nanopore WGS

In [4]:
asm_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Unicycler/Contigs')
raw_dir = Path('/media/ONT/20211209_Lis/guppy_hac')
out_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/NanoporeWGS2/Depth')

In [5]:
dir_map = {
    'R20-0030': 'barcode07',
    'R20-0140': 'barcode08',
    'R20-0145': 'barcode09',
    'R20-0148': 'barcode10',
}

In [6]:
with ProcessPoolExecutor(8) as executor:
    for sample_name, dirname in dir_map.items():
        asm = asm_dir/(sample_name + '.fa')
        raw = raw_dir/dirname/'*.fastq'
        out = out_dir/(sample_name + '.txt')
        executor.submit(calculate_each_position_depth, asm, raw, out)

# Nanopore WGA

In [24]:
asm_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Unicycler/Contigs')
out_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Amplification/Depth')

In [25]:
dir_map = {
    'R19-2905': '/media/ONT/20210902_Lis/guppy/barcode01',
    'R20-0026': '/media/ONT/20210818_Lis/guppy/barcode01',
    'R20-0030': '/media/ONT/20210902_Lis/guppy/barcode02',
    'R20-0088': '/media/ONT/20210902_Lis/guppy/barcode03',
    'R20-0127': '/media/ONT/20210818_Lis/guppy/barcode02',
    'R20-0131': '/media/ONT/20210902_Lis/guppy/barcode04',
    'R20-0140': '/media/ONT/20210902_Lis/guppy/barcode05',
    'R20-0145': '/media/ONT/20210902_Lis/guppy/barcode06',
    'R20-0148': '/media/ONT/20210818_Lis/guppy/barcode03',
    'R20-0150': '/media/ONT/20210818_Lis/guppy/barcode04',
    'R20-0158': '/media/ONT/20210902_Lis/guppy/barcode07',
    'R20-0160': '/media/ONT/20210902_Lis/guppy/barcode08',
}

In [26]:
with ProcessPoolExecutor(6) as executor:
    for sample_name, dirname in dir_map.items():
        asm = asm_dir/(sample_name + '.fa')
        raw = Path(dirname, '*.fastq')
        out = out_dir/(sample_name + '.txt')
        executor.submit(calculate_each_position_depth, asm, raw, out)

# Nanopore WGA 2

In [27]:
asm_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Unicycler/Contigs')
out_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Amplification-2/Depth')

In [28]:
dir_map = {
    'R19-2905': '/media/ONT/20210909_Lis/Guppy/barcode09',
    'R20-0030': '/media/ONT/20210909_Lis/Guppy/barcode10',
    'R20-0088': '/media/ONT/20210909_Lis/Guppy/barcode11',
    'R20-0131': '/media/ONT/20210909_Lis/Guppy/barcode12',
}

In [29]:
with ProcessPoolExecutor(8) as executor:
    for sample_name, dirname in dir_map.items():
        asm = asm_dir/(sample_name + '.fa')
        raw = Path(dirname, '*.fastq')
        out = out_dir/(sample_name + '.txt')
        executor.submit(calculate_each_position_depth, asm, raw, out)

# Nanopore WGA 3

In [30]:
asm_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Unicycler/Contigs')
out_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Amplification-3/Depth')

In [31]:
dir_map = {
    'R20-0088': '/media/ONT/20210916_Lis/Guppy/barcode09',
    'R20-0131': '/media/ONT/20210916_Lis/Guppy/barcode10',
}

In [32]:
with ProcessPoolExecutor(8) as executor:
    for sample_name, dirname in dir_map.items():
        asm = asm_dir/(sample_name + '.fa')
        raw = Path(dirname, '*.fastq')
        out = out_dir/(sample_name + '.txt')
        executor.submit(calculate_each_position_depth, asm, raw, out)

# Nanopore WGA 5

In [24]:
calculate_each_position_depth(
    '/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Unicycler/Contigs/R20-0088.fa',
    '/media/ONT/20211028_WGA/guppy_hac/barcode06/*',
    '/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Amplification-5/Depth/R20-0088.txt',
    threads=32,
)

# Nanopore WGA 4

In [23]:
# calculate_each_position_depth(
#     '/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Unicycler/Contigs/R20-0088.fa',
#     '/media/ONT/20211007_WGA/guppy_hac/barcode12/*',
#     '/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Amplification-4/Depth/R20-0088.txt',
#     threads=24,
# )

# Nanopore WGA 6

In [14]:
asm_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/Unicycler/Contigs')
out_dir = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/NanoporeWGA6/Depth')

In [20]:
dir_map = {
    'R19-2905': '/media/ONT/20211223_Lis_WGA/guppy_hac/barcode09',
    'R20-0026': '/media/ONT/20211223_Lis_WGA/guppy_hac/barcode10',
    'R20-0030': '/media/GenomicResearch/Issue/20210909_estimate_nanopore_depth/wga/20211220_Lis_WGA_barcode05_R20-0030/reads.fastq.gz',
    'R20-0088': '/media/ONT/20211228_Lis_WGA/guppy_hac/barcode01',
    'R20-0127': '/media/ONT/20211228_Lis_WGA/guppy_hac/barcode02',
    'R20-0131': '/media/ONT/20211227_Lis_WGA/guppy_hac/barcode11',
    'R20-0140': '/media/ONT/20211220_Lis_WGA_2/guppy_hac/barcode06',
    'R20-0145': '/media/GenomicResearch/Issue/20210909_estimate_nanopore_depth/wga/20211220_Lis_WGA_barcode07_R20-0145/reads.fastq.gz',
    'R20-0148': '/media/ONT/20211220_Lis_WGA_2/guppy_hac/barcode08',
    'R20-0150': '/media/ONT/20211227_Lis_WGA/guppy_hac/barcode12',
    'R20-0158': '/media/ONT/20211229_Lis_WGA/guppy_hac/barcode03',
    'R20-0160': '/media/ONT/20211229_Lis_WGA/guppy_hac/barcode04',
}

In [21]:
with ProcessPoolExecutor(8) as executor:
    for sample_name, dirname in dir_map.items():
        dirname = Path(dirname)
        asm = asm_dir/(sample_name + '.fa')
        if dirname.is_dir():
            raw = dirname/'*.fastq'
        else:
            raw = dirname
        out = out_dir/(sample_name + '.txt')
        executor.submit(calculate_each_position_depth, asm, raw, out)