In [1]:
import os
import sys
import json
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory
from multiprocessing import Pool
import pandas as pd

In [2]:
sys.path.append('../../src')
from application import MlstCommandline, \
                        PlasmidFinderCommandline, \
                        ResfinderCommandline, \
                        AmrfinderCommandline, \
                        VirulencefinderCommandline, \
                        SerotypefinderCommandline, \
                        ribosomal_mlst

In [3]:
def run_mlst(infile, outdir, database, species):
    os.makedirs(outdir, exist_ok=True)
    with TemporaryDirectory(dir='/tmp/') as tmp:
        cline = MlstCommandline(
            infile=infile, outdir=outdir, database=database, species=species, tmp=tmp, extented_output=True
        )
        cline()

def run_plasmidfinder(infile, outdir, database):
    os.makedirs(outdir, exist_ok=True)
    with TemporaryDirectory(dir='/dev/shm/') as tmp:
        cline = PlasmidFinderCommandline(
            infile=infile, outdir=outdir, database=database, tmp=tmp, extented_output=True
        )
        cline()

def run_resfinder(infile, outdir, db_res, db_point, species):
    os.makedirs(outdir, exist_ok=True)
    cline = ResfinderCommandline(
        infasta=infile, outdir=outdir, db_res=db_res, db_point=db_point, species=species, point=True, acquired=True,
    )
    cline()
    shutil.rmtree(os.path.join(outdir, 'pointfinder_blast'))

def run_amrfinder(infile, outfile, database, **kwargs):
    cline = AmrfinderCommandline(
        cmd='/home/chen1i6c04/miniconda3/envs/amrfinder/bin/amrfinder',
        nuc_fasta=infile, output_file=outfile, database=database, **kwargs
    )
    cline()

In [6]:
def pipeline(infile, outdir, threads=4):
    os.makedirs(outdir, exist_ok=True)
    mlst_dirname = os.path.join(outdir, 'mlst')
    plasmidfinder_dirname = os.path.join(outdir, 'plasmidfinder')
    amrfinder_filename = os.path.join(outdir, 'amrfinder.txt')
#     amrfinder_nucl_filename = os.path.join(outdir, 'amrfinder.fna')
    resfinder_dirname = os.path.join(outdir, 'resfinder')
    rmlst_filename = os.path.join(outdir, 'rmlst.json')
    run_amrfinder(
        infile,
        amrfinder_filename,
        database='/media/GenomicResearch/Tools/amrfinder_database/latest',
        threads=threads,
        organism='Campylobacter',
#         nucleotide_output=amrfinder_nucl_filename,
    )
    run_mlst(infile, mlst_dirname, '/media/GenomicResearch/Tools/CGE/mlst_db', 'cjejuni')
    run_plasmidfinder(infile, plasmidfinder_dirname, '/media/GenomicResearch/Tools/CGE/plasmidfinder_db')
    run_resfinder(
        infile, resfinder_dirname,
        '/media/GenomicResearch/Tools/CGE/resfinder_db',
        '/media/GenomicResearch/Tools/CGE/pointfinder_db',
        'campylobacter',
    )
    ribosomal_mlst(infile, rmlst_filename)

In [None]:
dirpath = Path('/media/NAS/Central_Lab_Storage/MiSeq/Campylobacter/New')
outpath = Path('/home/chen1i6c04/test')

In [None]:
with Pool(16) as p:
    try:
        for filepath in dirpath.iterdir():
            outdir = outpath/filepath.stem
            p.apply_async(pipeline, (filepath, outdir, 4))
        p.close()
        p.join()
    except:
        p.terminate()

In [9]:
pipeline('/home/chen1i6c04/test/assembly.fasta',
         '/home/chen1i6c04/test', 32)