In [1]:
import os
import sys
import json
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory
from multiprocessing import Pool
import pandas as pd

In [2]:
sys.path.append('../../src')
from utils import run_cmd
from application import MlstCommandline, \
                        PlasmidFinderCommandline, \
                        ResfinderCommandline, \
                        AmrfinderCommandline, \
                        VirulencefinderCommandline, \
                        SerotypefinderCommandline

In [11]:
def run_mlst(infile, outdir, database, species):
    os.makedirs(outdir, exist_ok=True)
    with TemporaryDirectory(dir='/tmp/') as tmp:
        cline = MlstCommandline(
            infile=infile, outdir=outdir, database=database, species=species, tmp=tmp, extented_output=True
        )
        cline()

def run_plasmidfinder(infile, outdir, database):
    os.makedirs(outdir, exist_ok=True)
    with TemporaryDirectory(dir='/dev/shm/') as tmp:
        cline = PlasmidFinderCommandline(
            infile=infile, outdir=outdir, database=database, tmp=tmp, extented_output=True
        )
        cline()

def run_virulencefinder(infile, outdir, db_path, database):
    os.makedirs(outdir, exist_ok=True)
    with TemporaryDirectory(dir='/tmp/') as tmpdir:
        cline = VirulencefinderCommandline(
            infile=infile, outdir=outdir, db_path=db_path, database=database, tmp=tmpdir, extented_output=True
        )
        cline()

def run_serotypefinder(infile, outdir, db_path):
    os.makedirs(outdir, exist_ok=True)
    with TemporaryDirectory(dir='/tmp/') as tmpdir:
        cline = SerotypefinderCommandline(
            infile=infile, outdir=outdir, db_path=db_path, tmp=tmpdir, extented_output=True
        )
        cline()

def run_resfinder(infile, outdir, db_res, db_point, species):
    os.makedirs(outdir, exist_ok=True)
    cline = ResfinderCommandline(
        infasta=infile, outdir=outdir, db_res=db_res, db_point=db_point, species=species, point=True
    )
    cline()
    shutil.rmtree(os.path.join(outdir, 'pointfinder_blast'))

def run_amrfinder(infile, outfile, database, **kwargs):
    cline = AmrfinderCommandline(cmd='/home/chen1i6c04/miniconda3/envs/amrfinder/bin/amrfinder',
        nuc_fasta=infile, output_file=outfile, database=database, **kwargs
    )
    cline()

In [4]:
mlst_db = '/media/NGS/Data_Analysis/CGE/mlst_db'
plasmidfinder_db = '/media/NGS/Data_Analysis/CGE/plasmidfinder_db/'
virulencefinder_db = '/media/NGS/Data_Analysis/CGE/virulencefinder_db/'
serotypefinder_db = '/media/NGS/Data_Analysis/CGE/serotypefinder_db/'
amrfinder_db = '/home/chen1i6c04/Tools/amr_db/latest/'

In [12]:
def pipeline(infile, outdir, threads=4):
    mlst_dirname = os.path.join(outdir, 'mlst')
    plasmidfinder_dirname = os.path.join(outdir, 'plasmidfinder')
    virulencefinder_dirname = os.path.join(outdir, 'virulencefinder')
    serotypefinder_dirname = os.path.join(outdir, 'serotypefinder')
    resfinder_dirname = os.path.join(outdir, 'resfinder')
    amrfinder_filename = os.path.join(outdir, 'amrfinder.txt')
    amrfinder_nucl_filename = os.path.join(outdir, 'amrfinder.fna')
    
    run_mlst(infile, mlst_dirname, mlst_db, 'ecoli')
    run_amrfinder(
        infile,
        amrfinder_filename,
        amrfinder_db,
        threads=threads,
        organism='Escherichia',
        nucleotide_output=amrfinder_nucl_filename,
    )
    run_plasmidfinder(infile, plasmidfinder_dirname, plasmidfinder_db)
    run_virulencefinder(infile, virulencefinder_dirname, virulencefinder_db, 'virulence_ecoli')
    run_serotypefinder(infile, serotypefinder_dirname, serotypefinder_db)
    run_resfinder(
        infile,
        resfinder_dirname,
        '/media/NGS/Data_Analysis/CGE/resfinder_db/',
        '/media/NGS/Data_Analysis/CGE/pointfinder_db/',
        'escherichia_coli',
    )

In [7]:
dirpath = Path('/media/NGS/MiSeqAnalysis/Shigella/Contigs')
outpath = Path('/media/NGS/MiSeqAnalysis/Shigella/Analysis')

In [14]:
with Pool(4) as p:
    for i in dirpath.iterdir():
        outdir = outpath/i.stem
        p.apply_async(pipeline, (i, outdir, 12))
    p.close()
    p.join()

In [None]:
pipeline(i, outdir, 16)

In [13]:
pipeline(
    '/media/NGS/MiSeqAnalysis/Shigella/Contigs/R16.0013.fa',
    '/media/NGS/MiSeqAnalysis/Shigella/Analysis/R16.0013', 32)