In [1]:
import os
import sys
import json
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory
from multiprocessing import Pool
import pandas as pd

In [2]:
sys.path.append('../../src')

from run import run_amrfinder, run_mlst, run_plasmidfinder, run_resfinder
from utils import run_cmd

In [3]:
def run_sistr(infile, output, sample_name, threads=2):
    run_cmd(f'conda run -n sistr sistr -i {infile} {sample_name} -f json -o {output} -T /tmp -t {threads}')

In [4]:
def pipeline(infile, outdir, prefix, threads=4):
    os.makedirs(outdir, exist_ok=True)
    
    amrfinder_filename = os.path.join(outdir, 'amrfinder.txt')
    sistr_filename = os.path.join(outdir, 'sistr')
    mlst_dirname = os.path.join(outdir, 'mlst')
    plasmidfinder_dirname = os.path.join(outdir, 'plasmidfinder')
    resfinder_dirname = os.path.join(outdir, 'resfinder')
    
    run_amrfinder(
        infile,
        amrfinder_filename,
        database='/media/GenomicResearch/Tools/amrfinder_database/latest/',
        threads=threads,
        organism='Salmonella',
        plus=True,
    )
    run_mlst(
        infile, mlst_dirname, '/media/GenomicResearch/Tools/CGE/mlst_db', 'senterica'
    )
    run_plasmidfinder(
        infile, plasmidfinder_dirname, '/media/GenomicResearch/Tools/CGE/plasmidfinder_db'
    )
    run_resfinder(
        infile, resfinder_dirname,
        db_res='/media/GenomicResearch/Tools/CGE/resfinder_db',
        db_point='/media/GenomicResearch/Tools/CGE/pointfinder_db',
        point=True,
        acquired=False,
        species='salmonella',
    )
    run_sistr(infile, sistr_filename, prefix, threads)

In [5]:
dirpath = Path('/media/GenomicResearch/MiSeq/Salmonella_enterica/New')
outpath = Path('/media/GenomicResearch/MiSeq/Salmonella_enterica/Analysis')

In [6]:
with Pool(8) as p:
    try:
        for filepath in dirpath.iterdir():
            outdir = outpath/filepath.stem
            sample_name = filepath.stem
            p.apply_async(pipeline, (filepath, outdir, sample_name), {'threads':4})
        p.close()
        p.join()
    except:
        p.terminate()

In [7]:
pipeline('/media/GenomicResearch/Issue/20211026_沙鹿童綜合Sal/Complete_genome/5A-sal-05.fasta',
         '/media/GenomicResearch/Issue/20211026_沙鹿童綜合Sal/Analysis/5A-sal-05_complete', 16)