In [1]:
import os
from pathlib import Path
import pandas as pd

In [3]:
import sys
sys.path.append('../../src')

from parse import parse_amrfinder_result, parse_mlst_result, parse_plasmidfinder_result, parse_pointfinder_result

In [6]:
def parse_text_result(path):
    amrfinder_filename = os.path.join(path, 'amrfinder.txt')
    mlst_filename = os.path.join(path, 'mlst', 'data.json')
    pointfinder_filename = os.path.join(path, 'resfinder', 'PointFinder_results.txt')
    plasmidfinder_filename = os.path.join(path, 'plasmidfinder', 'results_tab.tsv')
    
    summary = dict()
    records = parse_amrfinder_result(amrfinder_filename)
    summary['AMR'] = ', '.join(sorted(record['gene_symbol'] for record in records if record['element_subtype'] == 'AMR'))
    summary.update(parse_mlst_result(mlst_filename))
    points = [rec['gene_symbol'] for rec in records if rec['element_subtype'] == 'POINT'] + parse_pointfinder_result(pointfinder_filename)
    summary['Point'] = ', '.join(sorted(set(points)))
    records = parse_plasmidfinder_result(plasmidfinder_filename)
    summary['Inc type'] = ', '.join(sorted(records))
    return summary

In [8]:
dirpath = Path('/media/GenomicResearch/MiSeq/Neisseria_meningitidis/Analysis')
summeries = {i.name: parse_text_result(i) for i in dirpath.iterdir()}

In [9]:
df = pd.DataFrame(summeries).T.sort_index()
df.index.name = 'Key'
df.head()

Unnamed: 0_level_0,AMR,ST,gdh,pdhC,adk,pgm,abcZ,aroE,fumC,Point,Inc type
Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
C04.0272,,23,11,9,5,17,10,18,9,"folP_R228S, rpsJ_V57M",
C04.0443,,41,9,6,6,9,3,9,5,folP_R228S,
C04.0545,,23,11,9,5,17,10,18,9,"folP_R228S, rpsJ_V57M",
C04.0975,,3192,6,22,3,12,46,79,25,"folP_R228S, penA_A510V, penA_F504L, penA_N512Y",
C04.1089,,4690,259,116,2,2,311,53,259,folP_R228S,


In [10]:
df.to_csv('/media/GenomicResearch/MiSeq/Neisseria_meningitidis/summeries.txt', sep='\t')

In [6]:
dirpath = Path('/media/GenomicResearch/MiSeq/Neisseria_meningitidis/Analysis')

summaries = dict()
for i in dirpath.iterdir():
    summary = dict()
    records = parse_amrfinder_result(i/'amrfinder.txt')
    for rec in records:
        if rec['element_subtype'] == 'AMR':
            if rec['coverage_of_reference_sequence'] >= 90:
                summary[rec['gene_symbol']] = 1
            else:
                summary[rec['gene_symbol']] = 0.5
    summaries[i.name] = summary
summaries_tab = pd.DataFrame(summaries).T.sort_index().sort_index(axis=1)
summaries_tab.index.name = 'Key'
# summaries_tab.to_csv('/media/GenomicResearch/MiSeq/Salmonella_enterica/amr2bns.txt', sep='\t')

In [7]:
summaries_tab

Unnamed: 0_level_0,aph(3')-Ia,bleO,catP,tet(B)
Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C04.0272,,,,
C04.0443,,,,
C04.0545,,,,
C04.0975,,,,
C04.1089,,,,
...,...,...,...,...
R20.0341,,,,
S04.0248,,,,
S04.0250,,,,1.0
S04.0360,,,,


In [14]:
dirpath = Path('/media/GenomicResearch/MiSeq/Neisseria_meningitidis/Analysis')
summaries = dict()
for i in dirpath.iterdir():
    amrfinder_records = [rec['gene_symbol'] for rec in parse_amrfinder_result(i/'amrfinder.txt') if rec['element_subtype'] == 'POINT']
#     resfinder_records = parse_resfinder_result(i/'resfinder'/'PointFinder_results.txt')
    summaries[i.name] = {x: 1 for x in set(amrfinder_records)}
summaries_tab = pd.DataFrame(summaries).T.sort_index().sort_index(axis=1)
summaries_tab.index.name = 'Key'
# summaries_tab.to_csv('/media/GenomicResearch/MiSeq/Salmonella_enterica/point2bns.txt', sep='\t')

In [15]:
summaries_tab

Unnamed: 0_level_0,folP_R228S,mtrR_A39T,penA_A501T,penA_A510V,penA_F504L,penA_I312M,penA_N512Y,penA_P551S,penA_V316T,rpsJ_V57M
Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
C04.0272,1.0,,,,,,,,,1.0
C04.0443,1.0,,,,,,,,,
C04.0545,1.0,,,,,,,,,1.0
C04.0975,1.0,,,1.0,1.0,,1.0,,,
C04.1089,1.0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
R20.0341,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
S04.0248,1.0,,,,,,,,,
S04.0250,1.0,,,,,,,,,
S04.0360,,,,,,,,,,
