In [1]:
import os
import sys
import shutil
import subprocess
from pathlib import Path
from multiprocessing import Pool
from concurrent.futures import ProcessPoolExecutor

In [2]:
sys.path.append('../src')
from utils import run_cmd

In [3]:
program = '/media/GenomicResearch/Tools/homopolish/homopolish.py'
polish_model = '/media/GenomicResearch/Tools/homopolish/R9.4.pkl'
mash_sketch = '/media/GenomicResearch/Tools/homopolish/bacteria.msh'
medaka_model = 'r941_min_hac_g507'

In [4]:
def fn(reads, outdir, medaka_model, polish_model, mash_sketch, threads=8, nofilter=False, minreadlen=500, minquality=7, opts=''):
    flye_dirname = os.path.join(outdir, 'flye')
    flye_asm = os.path.join(flye_dirname, 'assembly.fasta')
    racon_dirname = os.path.join(flye_dirname, 'polish', 'racon')
    racon_asm = os.path.join(racon_dirname, 'consensus.fasta')
    paf_filename = os.path.join(racon_dirname, 'aligments.paf')
    medaka_dirname = os.path.join(flye_dirname, 'polish', 'medaka')
    medaka_asm = os.path.join(medaka_dirname, 'consensus.fasta')
    for dirname in (racon_dirname, medaka_dirname):
        os.makedirs(dirname, exist_ok=True)
    ###
    
    if nofilter is False:
        raw = os.path.join(outdir, 'READS.fastq.gz')
        cmd = f"nanoq -i {reads} -l {minreadlen} -q {minquality} | pigz -1 -p {threads} > {raw}"
        run_cmd(cmd)
    else:
        if os.path.splitext(reads)[-1] == '.gz':
            raw = os.path.join(outdir, 'READS.fastq.gz')
        elif os.path.splitext(reads)[-1] == '.fastq':
            raw = os.path.join(outdir, 'READS.fastq')
        else:
            return
        os.symlink(reads, raw)
    ###
    run_cmd(f"conda run -n dragonflye flye --nano-raw {raw} -o {flye_dirname} -t {threads} {opts}")
    if os.path.exists(flye_asm) is False:
        os.remove(raw)
        return
    shutil.copyfile(
        os.path.join(flye_dirname, 'assembly_info.txt'),
        os.path.join(outdir, 'flye_info.txt')
    )
    shutil.copyfile(
        os.path.join(flye_dirname, 'assembly_graph.gfa'),
        os.path.join(outdir, 'flye-unpolished.gfa')
    )
    run_cmd(f"minimap2 -t {threads} -x map-ont {flye_asm} {raw} > {paf_filename}")
    run_cmd(f"racon -t {threads} {raw} {paf_filename} {flye_asm} > {racon_asm}")
    run_cmd(f"conda run -n dragonflye medaka_consensus -i {raw} -d {racon_asm} -o {medaka_dirname} -m {medaka_model} -t {threads}")
    os.remove(raw)
    homopolish_dirname = os.path.join(outdir, 'homopolish')
    homopolish_asm = os.path.join(homopolish_dirname, 'consensus_homopolished.fasta')
    run_cmd(f"conda run -n homopolish python {program} polish -a {medaka_asm} -s {mash_sketch} -m {polish_model} "\
            f"-o {homopolish_dirname} -t {threads}")
    shutil.copyfile(
            homopolish_asm,
            os.path.join(outdir, 'contigs.fasta')
        )
    for dirname in (flye_dirname, homopolish_dirname):
        shutil.rmtree(dirname)

In [30]:
dirpath = Path('/media/GenomicResearch/Issue/20210909_estimate_nanopore_depth/wgs-2/20211209_Lis_barcode10_R20-0148/fastq')
outpath = Path('/media/GenomicResearch/Issue/20210909_estimate_nanopore_depth/wgs-2/20211209_Lis_barcode10_R20-0148/denovo/')

with Pool(20) as p:
    for i in dirpath.iterdir():
        outdir = outpath/(i.stem.replace('.fastq', ''))
        p.apply_async(fn, (i, outdir, medaka_model, polish_model, mash_sketch), {'nofilter': False, 'threads': 12})
    p.close()
    p.join()

In [None]:
fn(i, outdir, medaka_model, polish_model, mash_sketch, nofilter=False)

In [None]:
fq = '/media/Central_Lab_Storage/MinION_2022/20220113_Sal/fastq/barcode10-100x.fastq.gz'
out = '/media/Central_Lab_Storage/MinION_2022/20220113_Sal/denovo/barcode10-100x'
fn(
    fq, out, medaka_model, polish_model, mash_sketch, 32, nofilter=True,
  )

In [12]:
dirpath = Path('/media/GenomicResearch/Issue/20210909_estimate_nanopore_depth/wgs/20211209_Lis_barcode08/fastq')
outpath = Path('/media/GenomicResearch/Issue/20210909_estimate_nanopore_depth/wgs/20211209_Lis_barcode08/denovo')

kwds = {''}

with Pool(5) as p:
    try:
        for file in dirpath.iterdir():
            outdir = outpath/file.stem.replace('.fastq', '')
            p.apply_async(
                fn,
                args=(file, outdir, medaka_model, polish_model, mash_sketch, 8),
            )
        p.close()
        p.join()
    except KeyboardInterrupt:
        p.terminate()

In [9]:
namamap = {
    'barcode01': 'R19-2905',
    'barcode02': 'R20-0026',
    'barcode03': 'R20-0030',
    'barcode04': 'R20-0088',
    'barcode05': 'R20-0127',
    'barcode06': 'R20-0131',
    'barcode07': 'R20-0140',
    'barcode08': 'R20-0145',
    'barcode09': 'R20-0148',
    'barcode10': 'R20-0150',
    'barcode11': 'R20-0158',
    'barcode12': 'R20-0160',
}

In [10]:
dirpath = Path('/media/Central_Lab_Storage/MinION/20200925/denovo')
outpath = Path('/media/GenomicResearch/Issue/20201221_hybrid_and_denovo/NanoporeWGS/Contigs')

for i, j in namamap.items():
    src = dirpath/i/'contigs.fasta'
    dst = outpath/(j + '.fa')
    shutil.copyfile(src, dst)