In [1]:
from itertools import chain
from pathlib import Path
import pandas as pd

In [2]:
filepaths = (
    '/media/NAS/Synology_222/2_Research/03_cgMLST/Enterobacter_hormaechei/Enterobacter_hormaechei_SampleInfo.csv',
    '/media/NGS/Data_Analysis/20201026_Enterobacter_cloacae_complex/SampleInfo.csv',
)

In [3]:
sample_info = pd.concat([pd.read_csv(filepath, dtype=object) for filepath in filepaths])

In [4]:
sample_info = sample_info[['Source', 'BioSample', 'ST', 'Year', 'Country', 'ResistanceGene', 'Inc type']]

In [13]:
sample_info.to_csv('Enterobacter_hormaechei_SampleInfo.csv', index=False)

In [5]:
dirpaths = (
    '/media/NGS/SRA_1/NCBI_Enterobacter_hormaechei_SRA/Profiles',
    '/media/NGS/Sequence/Enterobacter_hormaechei/Profiles',
    '/media/NGS/Data_Analysis/20201026_Enterobacter_cloacae_complex/Profiles',
)

dirpaths = (Path(dirpath) for dirpath in dirpaths)
filepaths = list(chain.from_iterable(dirpath.iterdir() for dirpath in dirpaths))
profiles = pd.concat([pd.read_csv(filepath, sep='\t', index_col=0) for filepath in filepaths], axis=1)

In [6]:
import sys
sys.path.append('../src')

from tree import Dendrogram, calculate_distance

In [7]:
pdist = calculate_distance(profiles)

In [8]:
dendrogram = Dendrogram(pdist)
dendrogram.cluster(no_labels=True)

metadata = sample_info.set_index('Source').reindex(dendrogram.labels[::-1]).fillna('').reset_index()

table = dendrogram.plot.ax.table(
    cellLoc='left', colLoc='left',
    cellText=metadata.values,
    colLabels=metadata.columns,
    bbox=[1.05, 0, 5, 1+1/len(dendrogram.labels)]
)

table.auto_set_font_size(False)
table.set_fontsize(12)

for key, cell in table._cells.items():
    cell.set_linewidth(0)
    cell.PAD = 0
    

table.auto_set_column_width(range(len(metadata.columns)))

In [9]:
labels = {'TUECL19004', 'TUECL19002', 'TUECL19005', 'TUECL19003', 'TUECL19001'}

In [10]:
for key, cell in table._cells.items():
    text = cell.get_text()
    if text.get_text() in labels:
        text.set_color('red')

In [11]:
dendrogram.savefig('dendrogram.svg')