In [1]:
import os

In [4]:
nano_raw = '/media/NGS/Nanopore_1/20210520_campy/fastq/nanopore/barcode06.fastq'
outdir = '/media/NGS/Nanopore_1/20210520_campy/hybrid/R19-1007'
os.makedirs(outdir, exist_ok=True)

# Clear reads

In [5]:
clean_nano_raw = os.path.join(outdir, 'clean.fastq')
!nanoq -f {nano_raw} -o {clean_nano_raw} -q 7 -l 50

56846 356709202 13715 73902 50 6275 3052 11.0 11.1


# Flye assemblies

In [6]:
flye_dir = os.path.join(outdir, 'flye')
!conda run -n flye flye --nano-raw {clean_nano_raw} -o {flye_dir} -t 64 --plasmids

[2021-06-25 16:53:10] INFO: Starting Flye 2.8.3-b1695
[2021-06-25 16:53:10] INFO: >>>STAGE: configure
[2021-06-25 16:53:10] INFO: Configuring run
[2021-06-25 16:53:12] INFO: Total read length: 356709202
[2021-06-25 16:53:12] INFO: Reads N50/N90: 13715 / 3222
[2021-06-25 16:53:12] INFO: Minimum overlap set to 3000
[2021-06-25 16:53:12] INFO: >>>STAGE: assembly
[2021-06-25 16:53:12] INFO: Assembling disjointigs
[2021-06-25 16:53:12] INFO: Reading sequences
[2021-06-25 16:53:20] INFO: Counting k-mers:
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
[2021-06-25 16:54:41] INFO: Filling index table (1/2)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
[2021-06-25 16:54:52] INFO: Filling index table (2/2)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
[2021-06-25 16:55:08] INFO: Extending reads
[2021-06-25 16:58:03] INFO: Overlap-based coverage: 159
[2021-06-25 16:58:03] INFO: Median overlap divergence: 0.0993884
0% 90% 100% 
[2021-06-25 16:58:44] INFO: Assembled 3 disjointigs
[2021-06-25 16:58:44] I

#### Medaka polish

In [7]:
flye_asm = os.path.join(flye_dir, 'assembly.fasta')
medaka_dir = os.path.join(outdir, 'medaka')

!conda run -n medaka medaka_consensus -i {clean_nano_raw} -d {flye_asm} -o {medaka_dir} -t 64 

Checking program versions
This is medaka 1.3.4
Program    Version    Required   Pass     
bcftools   1.12       1.11       True     
bgzip      1.12       1.11       True     
minimap2   2.17       2.11       True     
samtools   1.12       1.11       True     
tabix      1.12       1.11       True     
Aligning basecalls to draft
Removing previous index file /media/NGS/Nanopore_1/20210520_campy/hybrid/R19-1007/flye/assembly.fasta.mmi
Removing previous index file /media/NGS/Nanopore_1/20210520_campy/hybrid/R19-1007/flye/assembly.fasta.fai
Running medaka consensus
Polished assembly written to /media/NGS/Nanopore_1/20210520_campy/hybrid/R19-1007/medaka/consensus.fasta, have a nice day.

Constructing minimap index.
[M::mm_idx_gen::0.088*0.96] collected minimizers
[M::mm_idx_gen::0.125*1.28] sorted minimizers
[M::main::0.158*1.22] loaded/built the index for 3 target sequence(s)
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 3
[M::mm_idx_stat::0.164*1.22] distinct minimizers: 32

In [2]:
import sys
import shutil
from Bio import SeqIO

sys.path.append('../src')
from utils import run_cmd

def pilon_polish(fq_1, fq_2, asm, outdir, prefix, round_num, threads=4):
    os.makedirs(outdir, exist_ok=True)
    paired_bam_file = os.path.join(outdir, 'alignments.sort.bam')
    pilon_changes_file = os.path.join(outdir, prefix + '.changes')
    run_cmd(f"bwa index {asm} && samtools faidx {asm}")
    cmd = f"bwa mem -v 3 -x intractg -t {threads} {asm} {fq_1} {fq_2} | "\
          f"samclip --ref {asm}.fai | "\
          f"samtools sort --threads {threads} -m 500m --reference {asm} -T /tmp/ -o {paired_bam_file}"
    run_cmd(cmd)
    run_cmd(f"samtools index {paired_bam_file}")
    cmd = f"pilon --genome {asm} --frags {paired_bam_file} --minmq 60 --minqual 3 --fix bases --output {prefix} "\
          f"--outdir {outdir} --threads {threads} --changes --mindepth 0.25"
    run_cmd(cmd)
    
    change_count = 0
    with open(pilon_changes_file) as pilon_change:
        for line in pilon_change:
            change_count += 1
    print(f'Pilon polish round {round_num}\nTotal number of changes: {change_count}', sep='\n')
    return change_count

def rename_contig_id(fasta):
    rename_records = []
    for idx, record in enumerate(SeqIO.parse(fasta, 'fasta'), 1):
        record.id = record.name = f"{idx}" 
        record.description = ''
        rename_records.append(record)
    return rename_records


def clear_dir(dirpath):
    shutil.rmtree(dirpath)
    os.makedirs(dirpath)

# Short-read polishing

In [14]:
sample_name = 'R20-0160'
fq_1 = f'/media/NGS/Nanopore_1/20200925/fastq/illumina/{sample_name}_R1.fastq.gz'
fq_2 = f'/media/NGS/Nanopore_1/20200925/fastq/illumina/{sample_name}_R2.fastq.gz'
asm = f'/media/NGS/Nanopore_1/20200925/hybrid/{sample_name}/medaka/consensus.fasta'
# asm = os.path.join(medaka_dir, 'consensus.fasta')

In [15]:
outdir = f'/media/NGS/Nanopore_1/20200925/hybrid/{sample_name}'
pilon_dir = os.path.join(outdir, 'pilon')

for i in range(10):
    round_num = i+1
    prefix = f'{round_num}_pilon'
    change_count = pilon_polish(fq_1, fq_2, asm, pilon_dir, prefix, round_num, threads=64)
    pilon_fasta_file = os.path.join(pilon_dir, prefix + '.fasta')
    pilon_fasta_records = rename_contig_id(pilon_fasta_file)
    if not change_count:
        assembly = os.path.join(outdir, 'assembly.fasta')
        SeqIO.write(pilon_fasta_records, assembly, 'fasta')
        break
    clear_dir(pilon_dir)
    asm = os.path.join(pilon_dir, prefix + '.fa')
    SeqIO.write(pilon_fasta_records, asm, 'fasta')

Pilon polish round 1
Total number of changes: 110
Pilon polish round 2
Total number of changes: 0
