In [1]:
import os
from pathlib import Path
from collections import defaultdict
from concurrent.futures import ProcessPoolExecutor

import pandas as pd
from Bio.Application import AbstractCommandline, _Option, _Switch

In [2]:
class SeqSeroCommandline(AbstractCommandline):
    def __init__(self, cmd, **kwargs):
        self.parameters = [
            _Option(
                ['-i', 'i'], '', equate=False), 
            _Option(
                ['-d', 'd'], '', equate=False),
            _Option(
                ['-t', 't'], '', equate=False),
            _Option(
                ['-p', 'p'], '', equate=False),
            _Option(
                ['-m', 'm'],
                "which workflow to apply, 'a'(raw reads allele \
                micro-assembly), 'k'(raw reads and genome assembly k-mer),\
                default=a", equate=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)

In [3]:
def fn(**kwargs):
    cline = SeqSeroCommandline(cmd='/home/chen1i6c04/Tools/SeqSero2-1.1.1/bin/SeqSero2_package.py', **kwargs)
    cline()

In [4]:
dirpath = Path("/media/NGS/SRA_1/NCBI_Salmonella_enterica_serovar_Goldcoast_SRA/Contigs")
outpath = Path("/media/NGS/SRA_1/NCBI_Salmonella_enterica_serovar_Goldcoast_SRA/SeqSero")

pairwise = defaultdict(list)

for i in dirpath.iterdir():
    prefix = i.stem
    pairwise[prefix].append(i.as_posix())

In [6]:
with ProcessPoolExecutor(24) as executor:
    for prefix, seqfiles in pairwise.items():
        outdir = outpath/prefix
        os.makedirs(outdir, exist_ok=True)
        executor.submit(fn, i=' '.join(seqfiles), d=outdir, p=8, t=4, m='k')

In [7]:
dirPath = Path('/media/NGS/SRA_1/NCBI_Salmonella_enterica_serovar_Goldcoast_SRA/SeqSero')

In [8]:
usecols = ['Sample name', 'Predicted antigenic profile', 'Predicted serotype']

In [9]:
pfs = []
for i in dirPath.iterdir():
    pf = pd.read_csv(i/"SeqSero_result.tsv", sep='\t', usecols=usecols)
    pfs.append(pf)

In [10]:
profile = pd.concat(pfs)

In [11]:
profile['Sample name'] = [Path(i).stem for i in profile['Sample name']]

In [12]:
profile.to_csv('/media/NGS/SRA_1/NCBI_Salmonella_enterica_serovar_Goldcoast_SRA/SeqSero.csv', index=False)