In [1]:
import os
from pathlib import Path
import pandas as pd

In [2]:
import sys
sys.path.append('../../src')
from parse import parse_amrfinder_result, parse_mlst_result, parse_lissero_result, parse_plasmidfinder_result

In [3]:
def parse_text_result(path):
    amrfinder_filename = os.path.join(path, 'amrfinder.txt')
    mlst_filename = os.path.join(path, 'mlst', 'data.json')
    lissero_filename = os.path.join(path, 'lissero.txt')
    plasmidfinder_filename = os.path.join(path, 'plasmidfinder', 'results_tab.tsv')
#     virulencefinder_filename = os.path.join(path, 'virulencefinder', 'data.json')
    
    summary = dict()
    records = parse_amrfinder_result(amrfinder_filename)
    summary['AMR'] = ', '.join(sorted(record['gene_symbol'] for record in records if record['element_subtype'] == 'AMR'))
    summary.update(parse_mlst_result(mlst_filename))
    summary['Serotype'] = parse_lissero_result(lissero_filename)
    records = parse_plasmidfinder_result(plasmidfinder_filename)
    summary['Inc type'] = ', '.join(sorted(records))
#     summary['Virulence'] = ', '.join(parse_virulencefinder_result(virulencefinder_filename))
    return summary

In [4]:
dirpath = Path('/media/GenomicResearch/MiSeq/Listeria_monocytogenes/Analysis')
summeries = {i.name: parse_text_result(i) for i in dirpath.iterdir()}

In [5]:
df = pd.DataFrame(summeries).T.sort_index()
df.index.name = 'Key'
df.head()

Unnamed: 0_level_0,AMR,ST,lhkA,dat,abcZ,cat,dapE,bglA,ldh,Serotype,Inc type
Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
R14.0043,"fosX, lin",5,7,3,2,11,3,1,1,"1/2b, 3b, 7",
R14.0214,"fosX, lin",5,7,3,2,11,3,1,1,"1/2b, 3b, 7",
R14.0254,"fosX, lin",155,1,5,7,16,7,10,2,"1/2a, 3a",
R14.0365,"fosX, lin",5,7,3,2,11,3,1,1,"1/2b, 3b, 7",rep26
R14.0372,"fosX, lin",87,4,3,12,4,14,1,39,"1/2b, 3b, 7",


In [6]:
df.to_csv('/media/GenomicResearch/MiSeq/Listeria_monocytogenes/summaries.txt', sep='\t')

In [None]:
dirpath = Path('/media/GenomicResearch/MiSeq/Listeria_monocytogenes/Analysis')

summaries = dict()
for i in dirpath.iterdir():
    summary = dict()
    records = parse_amrfinder_result(i/'amrfinder.txt')
    for rec in records:
        if rec['element_subtype'] == 'AMR':
            if rec['coverage_of_reference_sequence'] >= 90:
                summary[rec['gene_symbol']] = 1
            else:
                summary[rec['gene_symbol']] = 0.5
    summaries[i.name] = summary
summaries_tab = pd.DataFrame(summaries).T.sort_index().sort_index(axis=1)
summaries_tab.index.name = 'Key'
summaries_tab.to_csv('/media/GenomicResearch/MiSeq/Listeria_monocytogenes/amr2bns.txt', sep='\t')

In [None]:
dirpath = Path('/media/GenomicResearch/MiSeq/Listeria_monocytogenes/Analysis')

summaries = dict()
for i in dirpath.iterdir():
    summary = {x: 1 for x in parse_plasmidfinder_result(i/'plasmidfinder'/'results_tab.tsv')}
    summaries[i.name] = summary
summaries_tab = pd.DataFrame(summaries).T.sort_index().sort_index(axis=1)
summaries_tab.index.name = 'Key'
summaries_tab.to_csv('/media/GenomicResearch/MiSeq/Listeria_monocytogenes/plasmid2bns.txt', sep='\t')