In [1]:
import os
from pathlib import Path
from tempfile import TemporaryDirectory
from multiprocessing import Pool
import pandas as pd

In [2]:
import sys
sys.path.append('../src')
from run import run_amrfinder
from parse import parse_amrfinder_result

In [6]:
run_amrfinder(
    '/media/GenomicResearch/MiSeq/Neisseria_meningitidis/Contigs/R18.2594.fa',
    '/home/chen1i6c04/test.txt',
    '/media/GenomicResearch/Tools/amrfinder_database/latest',
    **{'threads': 16, 'organism': 'Neisseria'}
)

In [None]:
with open('/media/Central_Lab_Storage/NcbiASM/Burkholderia_cepacia/selected.txt') as f:
    accs = set(f.read().splitlines())

In [3]:
dirpath = Path('/media/GenomicResearch/Issue/20211223_plasmid_HI2/contigs')
outpath = Path('/media/GenomicResearch/Issue/20211223_plasmid_HI2/amrfinder')
database = '/media/GenomicResearch/Tools/amrfinder_database/latest'
# organism = 'Neisseria'

In [4]:
with Pool(16) as p:
    try:
        for i in dirpath.iterdir():
            outfile = outpath/(i.stem + '.txt')
            p.apply_async(
                run_amrfinder,
                args=(i, outfile, database),
                kwds={'threads': 4}
            )
#             infile = i
#             outfile = outpath/i.stem/('amrfinder.txt')
#             p.apply_async(
#                 run_amrfinder,
#                 args=(infile, outfile, database),
#                 kwds={'organism': organism, 'threads': 8}
#             )
        p.close()
        p.join()
    except KeyboardInterrupt:
        p.terminate()

In [6]:
dirpath = Path('/media/GenomicResearch/Issue/20211223_plasmid_HI2/amrfinder')

summaries = dict()
for result_file in dirpath.iterdir():
    summaries[result_file.stem] = ', '.join(sorted(set(record['gene_symbol'] for record in parse_amrfinder_result(result_file))))

In [27]:
s = pd.Series(summaries, name='AMR').sort_index()

In [28]:
s.index.name = 'sample_name'

In [29]:
s

sample_name
5A-sal-05_300k    aac(3)-IId, aadA22, aph(3')-Ia, aph(6)-Id, arr...
5A-sal-07_294k    aac(3)-IId, aadA22, aph(3')-Ia, aph(6)-Id, arr...
AP023191.1        aac(3)-IId, aadA22, aph(3')-Ia, aph(6)-Id, arr...
AP023198.1        aac(3)-IId, aadA22, aph(3')-Ia, aph(6)-Id, arr...
CP033347.2        aac(3)-IId, aadA22, aph(3')-Ia, aph(6)-Id, arr...
CP034785.1            aph(3')-Ia, blaTEM-135, floR, mcr-1.1, tet(M)
CP037959.1        aac(3)-IId, aadA22, aph(3')-Ia, aph(6)-Id, arr...
CP039170.1        aac(3)-IId, aadA22, aph(3')-Ia, aph(6)-Id, arr...
CP039861.1        aac(3)-IId, aadA22, aph(3')-Ia, arr-2, dfrA14,...
CP048776.1        aac(3)-IId, aadA22, aph(3')-Ia, aph(6)-Id, arr...
CP063719.1        aac(3)-IId, aadA22, aph(3')-Ia, arr-2, blaCTX-...
CP068042.1        aac(3)-IId, aadA22, aph(3')-Ia, arr-2, blaCTX-...
CP072803.1        aadA8, aph(3')-Ia, aph(6)-Id, arr-2, dfrA14, f...
CP080122.1        aadA22, aph(3')-Ia, aph(6)-Id, arr-2, dfrA14, ...
LC511658.1        aac(3)-IId, aadA22

In [30]:
s.to_csv('/media/GenomicResearch/Issue/20211223_plasmid_HI2/amrfinder_results.tsv', sep='\t')