In [None]:
%load_ext autoreload
%autoreload 2
import os
import pandas as pd
import numpy as np
from topicpy.hsbmpy import get_max_available_L
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram

In [None]:
directory = "/home/jovyan/work/phd/topics/datasets/gtexall/"
algorithm = "topsbm"
L=get_max_available_L(directory, algorithm)-1
os.chdir(directory)

In [None]:
df_topics = pd.read_csv("%s/%s_level_%d_topic-dist.csv"%(algorithm,algorithm,L), index_col=1).drop("i_doc", 1)
print("Working with", df_topics.shape[1],"topics")
df_files = pd.read_csv("files.dat", index_col=0).reindex(index=df_topics.index)
df_files.head(2)

In [None]:
df_topics["tissue"]=df_files["SMTS"]
df_tissues = df_topics.groupby("tissue").mean().transpose()
correlations = df_tissues.corr()
classes = df_tissues.columns

In [None]:
cm = sns.clustermap(correlations, 
                    vmax=1,  
                    row_cluster=False, 
                    col_cluster=True, 
                    xticklabels=classes, 
                    yticklabels=classes, 
                    annot=False,
                    cbar_pos=(0.99,0.05,0.05,0.7))
ax = cm.ax_heatmap
fig = ax.get_figure()
ax.set_ylabel("Tissue", fontsize=35, rotation=90)
ax.set_yticklabels(labels=classes, rotation=0)
ax.yaxis.tick_left()
ax.yaxis.set_label_position("left")

ax.set_xticklabels(labels=classes[cm.dendrogram_col.reordered_ind], rotation=80)
ax.set_xlabel("Tissue",fontsize=35)
ax.tick_params(labelsize=15)

cax = cm.ax_cbar
cax.tick_params(labelsize=30)
cax.set_title("P()", fontsize=30)
plt.tight_layout()
cm.savefig(f"topic_correlation_{algorithm}_level{L}.pdf")

plt.show()

In [None]:
fig,ax = plt.subplots(figsize=(10,10))
cm.dendrogram_col.rotate=False
cm.dendrogram_col.plot(ax, {"linewidths":10})
ax.set_ylabel("Distance", fontsize=35, rotation=90)

ax.set_xticks(np.linspace(5, len(classes)*10-5, len(classes)))
ax.set_xticklabels(labels=classes[cm.dendrogram_col.reordered_ind], rotation=90)
ax.set_xlabel("Tissue",fontsize=35)
ax.tick_params(labelsize=15)
plt.tight_layout()
fig.savefig(f"topic_correlation_{algorithm}_level{L}_dendogram_vertical.pdf")

In [None]:
fig,ax = plt.subplots(figsize=(10,10))

with plt.rc_context({'lines.linewidth': 10}):
    dendrogram(cm.dendrogram_col.linkage, ax=ax, orientation="right")

ax.set_xlabel("Distance", fontsize=35, rotation=0)

ax.set_yticks(np.linspace(5, len(classes)*10-5, len(classes)))
ax.set_yticklabels(labels=classes[cm.dendrogram_col.reordered_ind], rotation=0)
ax.set_ylabel("Tissue",fontsize=35)
ax.tick_params(labelsize=15)
plt.tight_layout()
fig.savefig(f"topic_correlation_{algorithm}_level{L}_dendogram.pdf")