In [1]:
from pathlib import Path
import pandas as pd

In [17]:
clonal_complex = 'ST-32 complex'

In [18]:
filepath = '/media/NGS/SRA_2/pubMLST_Neisseria/BIGSdb_179004_1119847573_26598.xlsx'
pubmlst_isolate_info = pd.read_excel(filepath, dtype=object, usecols=['id', 'country', 'year', 'clonal_complex (MLST)'])

In [19]:
specific_cc_isolate_info = pubmlst_isolate_info[pubmlst_isolate_info['clonal_complex (MLST)'] == clonal_complex]
specific_cc_isolate_info = specific_cc_isolate_info[['id', 'country', 'year']]
specific_cc_isolate_info['id'] = specific_cc_isolate_info['id'].astype(str)
specific_cc_isolate_info.shape

(1381, 3)

In [20]:
specific_cc_isolate_ids = set(specific_cc_isolate_info['id'])

In [21]:
dirpath = Path('/media/NGS/SRA_2/pubMLST_Neisseria/Profile')
generator = (pd.read_csv(filepath, sep='\t', index_col=0, header=0, names=[filepath.stem]) for
             filepath in dirpath.iterdir() if filepath.stem in specific_cc_isolate_ids)
pubmlst_profile = pd.concat(generator, axis=1)
pubmlst_profile.shape

(1241, 1381)

In [22]:
filepath = "/media/NGS/MiSeqAnalysis/Neisseria_meningitidis/MLST.tsv"
mlst = pd.read_csv(filepath, sep='\t', index_col=0, usecols=['Key', 'clonal_complex'])
ids = set(mlst[mlst['clonal_complex'] == clonal_complex].index)

In [23]:
dirpath = Path('/media/NGS/MiSeqAnalysis/Neisseria_meningitidis/Profile')
generator = (pd.read_csv(filepath, sep='\t', index_col=0, header=0, names=[filepath.stem]) for
             filepath in dirpath.iterdir() if filepath.stem in ids)
profile = pd.concat(generator, axis=1)
profile.shape

(1241, 32)

In [24]:
total_profile = pd.concat([pubmlst_profile, profile], axis=1)
total_profile.shape

(1241, 1413)

In [10]:
import sys
sys.path.append('../src')

from tree import PairwiseDistanceMatrix, Dendrogram, clustering

In [25]:
# pdist = PairwiseDistanceMatrix(total_profile.filter(clusters[109], axis=1))
pdist = PairwiseDistanceMatrix(total_profile)

In [26]:
distmatrix = pdist()

In [27]:
dendrogram = Dendrogram(distmatrix)

In [28]:
dendrogram(labels_color={label: 'red' for label in profile.columns}, show_node_info=True, xlim=100)

In [29]:
_metadata = specific_cc_isolate_info.set_index('id').reindex(dendrogram.labels[::-1]).fillna('')

table = dendrogram.figure.ax.table(
    cellLoc='left', colLoc='left',
    cellText=_metadata.values,
    colLabels=_metadata.columns,
    bbox=[1.2, 0, .3, 1+1/len(dendrogram.labels)]
)

for key, cell in table._cells.items():
    cell.set_linewidth(0)
    cell.PAD = 0

table.auto_set_font_size(False)
table.set_fontsize(12)
table.auto_set_column_width(range(len(_metadata.columns)))

In [30]:
dendrogram.savefig('/media/NGS/Data_Analysis/20210326_pubMLST/Neisseria_meningitidis_ST32_complex.pdf')