In [1]:
import os

import pandas as pd
import sqlalchemy as sa

In [2]:
engine = sa.create_engine("postgresql+psycopg2://a504082002:i34984047@localhost:5432/Neisseria_meningitidis")
meta = sa.MetaData(bind=True)
con = engine.connect()

In [3]:
table = sa.Table('alleles', meta)

In [4]:
query = sa.select('*', from_obj=table)

In [5]:
alleles = pd.read_sql(query, con=con)

In [6]:
seq_trans = dict(zip(alleles['allele_id'], alleles['peptide_seq']))

In [7]:
profile = pd.read_csv("/media/NGS/Data_Analysis/20191128_CDC_MiSeq/Neisseria_meningitidis/Profile/profile.tsv", sep='\t', index_col=0, low_memory=False)

In [8]:
profile = profile.applymap(lambda x: seq_trans.get(x))

In [9]:
record = pd.DataFrame()

In [None]:
iteritems = profile.loc['gyrA'].iteritems()

for items in iteritems:
    if items[1]:
        mutation = []
        if items[1][90] != 'T':
            mutation.append(f"T91{items[1][90]}")
        if items[1][94] != 'D':
            mutation.append(f"D95{items[1][94]}")
        record.loc[items[0], 'gyrA'] = ', '.join(mutation)

In [None]:
iteritems = profile.loc['parC'].iteritems()

for items in iteritems:
    if items[1]:
        mutation = []
        if items[1][85] != 'D':
            mutation.append(f"D86{items[1][85]}")
        if items[1][86] != 'S':
            mutation.append(f"S87{items[1][86]}")
        if items[1][90] != 'E':
            mutation.append(f"E91{items[1][90]}")
        record.loc[items[0], 'parC'] = ', '.join(mutation)

In [None]:
record.to_csv('gyrA_and_parC_mutation.csv')

In [20]:
set(seq[85] for seq in profile.loc['parC'].dropna())

{'D', 'S'}

In [2]:
import re
import sys
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import AlignIO
from Bio.Align.Applications import ClustalwCommandline

sys.path.append('../../src')
from tree import Dendrogram, Distance

In [None]:
records = [SeqRecord(Seq(j), id=i) for i, j in profile.loc['gyrA'].dropna().iteritems()]

In [None]:
path = "/media/NGS/Data_Analysis/20191128_CDC_MiSeq/Neisseria_meningitidis/gyrA"

In [None]:
for i in os.listdir(path):
    record = next(SeqIO.parse(os.path.join(path, i), 'fasta'))
    record.id = os.path.splitext(i)[0].replace('_gyrA', '')
    records.append(record)

In [None]:
SeqIO.write(records, '/media/NGS/Data_Analysis/20191128_CDC_MiSeq/Neisseria_meningitidis/gyrA/gyrA_aln.fa', 'fasta')

In [None]:
clustalw_cline = ClustalwCommandline(infile='/media/NGS/Data_Analysis/20191128_CDC_MiSeq/Neisseria_meningitidis/gyrA/gyrA_aln.fa')

In [None]:
stdout, stderr = clustalw_cline()

In [4]:
align = AlignIO.read('/media/NGS/Data_Analysis/20191128_CDC_MiSeq/Neisseria_meningitidis/gyrA/gyrA_aln.aln', 'clustal')

In [14]:
align_matrix = pd.DataFrame()

for record in align:
    align_matrix[record.id] = list(record)

align_matrix = align_matrix.replace('-', float('nan'))

In [32]:
distance = Distance(align_matrix)
distance.calculate()

dendrogram = Dendrogram(distance.dataframe)
dendrogram.cluster(show_node_info=True)
dendrogram.figure.ax.set_title('Neisseria gyrA compare', fontsize=16)

In [33]:
dendrogram.figure.savefig('/media/NGS/Data_Analysis/20191128_CDC_MiSeq/Neisseria_meningitidis/gyrA/phylo.pdf')