In [None]:
import networkx as nx
from hierarchy_pos import hierarchy_pos
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
import pandas as pd
import numpy as np
import os
from hsbmpy import get_max_available_L, colors_cycle

In [None]:
directory = "/home/jovyan/work/phd/datasets/paper/gtex10seed"

os.chdir(directory)

In [None]:
color_iterator = (color for color in colors_cycle)
L = get_max_available_L(directory)

df_hier = pd.read_csv("topsbm/topsbm_labels.csv")
df_hier[f"l{L+1}"] = np.ones(df_hier.shape[0], dtype=int)
L=L+1
label_name="tissue"
labels = df_hier[label_name].unique()

color_labels = np.sort(df_hier["tissue"].unique())

#create a color palette with the same number of colors as unique values in the Source column
#network_pal = sns.color_palette('husl',n_colors=len(color_labels))
network_pal = [next(color_iterator) for label in color_labels]

#Create a dictionary where the key is the category and the values are the
#colors from the palette we just created
network_lut = dict(zip(color_labels, network_pal))
df_hier.head()

In [None]:
get_fracs("l4_1")

In [None]:
def get_fracs(node, label_name=label_name, plot_proportions=True):
    '''
    params
    - node: node name
    - label_name: df_hier column
    - plot_proportions: wheter return the ratio of the size of the node
    
    returns
    - uniques: number of labels of each label in this node
    - colors: color for each of the labels presents
    - ration: fraction of global elements in this node
    '''
    node = node.split("_")
    uniques = np.unique(df_hier[df_hier[node[0]]==int(node[1])][label_name], return_counts=True)
    counts = df_hier.shape[0]
    epsilon = 1e-7
    ratio = (uniques[1].sum())/float(epsilon+counts) * 0.25 + 0.75 if plot_proportions else 1
    return uniques[1], [network_lut[label] for label in uniques[0]], ratio

In [None]:
hierarchy = nx.Graph()

In [None]:
# add nodes
for l in range(L+1):
    if l==0:
        continue
    hierarchy.add_nodes_from([f"l{l}_{n}" for n in df_hier[f"l{l}"].unique()])

In [None]:
# add edges
for l in range(L+1):
    if l<=0:
        continue
    for node in df_hier[f"l{l}"].unique():
        hierarchy.add_edges_from([(f"l{l}_{node}", f"l{l-1}_{low_node}") for low_node in df_hier[df_hier[f"l{l}"]==node][f"l{l-1}"].unique()])
if not nx.is_tree(hierarchy):
    raise ValueError("Graph is not a tree!")

In [None]:
pos = hierarchy_pos(hierarchy, root=f"l{L}_1")
fig, ax = plt.subplots(figsize=(50,20))

nx.draw(hierarchy, pos=pos, ax=ax)
nx.draw_networkx_nodes(hierarchy,pos)
nx.draw_networkx_edges(hierarchy, pos=pos, ax=ax)
#nx.draw_networkx_labels(hierarchy, pos=pos, ax=ax, font_size=25)

trans=ax.transData.transform
trans2=fig.transFigure.inverted().transform

piesize=0.08
p2=piesize/2.0
for n in hierarchy:
    xx,yy=trans(pos[n]) # figure coordinates
    xa,ya=trans2((xx,yy)) # axes coordinates
    fracs, colors, ratio = get_fracs(n, label_name, True)
    a = plt.axes([xa-p2*ratio,ya-p2*ratio, piesize*ratio, piesize*ratio])
    a.set_aspect('equal')
    a.pie(fracs, colors=colors)
    
legend_elements = [Line2D([0], [0], 
                          marker='o', 
                          ms = 20,
                          lw = 0,
                          color=network_lut[label], 
                          label=label,
                          markerfacecolor=network_lut[label],
                          markersize=15) for label in np.sort(labels)]


# Put a legend to the right of the current axis
ax.legend(handles=legend_elements, fontsize=25, ncol=4, loc='upper left')
    
plt.show()
fig.savefig("cluster_network.pdf")

In [None]:
for tissue,color in network_lut.copy().items():
    color = network_lut[tissue]
    network_lut[tissue+"_healthy"] = color
    network_lut[tissue+"_tumor"] = [c*0.4 for c in sns.set_hls_values(color)]

In [None]:
df_hier["subtissue"].replace("Brain - Cerebellar Hemisphere", "Brain - Cerebellum", inplace=True)
for subtissue in df_hier["subtissue"].unique():
    tissue = df_hier[df_hier["subtissue"]==subtissue]["tissue"].values[0]
    color = network_lut[tissue]
    new_color = np.array(sns.set_hls_values(color))
    new_color[2] +=  0.8 * np.random.random() - 0.4
    np.clip(new_color, 0, 1, out=new_color)
    network_lut[subtissue] = new_color
label_name = "subtissue"
labels = df_hier[label_name].unique()

In [None]:
network_lut