In [None]:
## This notebook will cluster and draw GI map data using seaborn as well as calculate shared clustering information

In [4]:
## Import packages

import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy as sp
import gseapy as gp
from sklearn import metrics
import networkx as nx



In [5]:
## Define Functions

## The function "deconstruct_tree"  will take an integer value representing the highest node of a hierarchical
## clustering and a distance matrix for that clustering
def deconstruct_tree(start,tree):
    node = tree[start]
    if node[0] > len(tree):
        deconstruct_tree(int(node[0]-(len(tree)+1)),tree)
    if node[0] <= len(tree):
        branch_identity.append(node[0])
    if node[1] > len(tree):
        deconstruct_tree(int(node[1]-(len(tree)+1)),tree)
    if node[1] <= len(tree):
        branch_identity.append(node[1])

## enrichDf takes an Enrichr enrichment result and a significance threshold and returns a DataFrame 
## containing significant go-terms and their associated -log10 adjusted p-values over the threshold
def enrichDf(result,sig):
    result['Adjusted P-value'] = -np.log10(result['P-value'])
    termList =[]
    scoreList = []
    for go in result.loc[result['Adjusted P-value'] > sig]['Term']:
        goterm = go.split(' (')[0]
        termList.append(goterm)
    for score in result.loc[result['Adjusted P-value'] > sig]['Adjusted P-value']:
        scoreList.append(score)
    outDf = pd.DataFrame(index=termList)
    outDf['Score'] = scoreList
    return outDf

## rand_identifier takes a cluster in format [gene1,gene2,...,genex] and an index and
## return a list of size index with 0 or 1 denoting if the gene is in the cluster or not, for use with adjusted rand
def rand_identifier(cluster,index):
    out_list = []
    for item in index:
        if item in cluster:
            out_list.append(1)
        if item not in cluster:
            out_list.append(0)
    return out_list


## A function that takes two lists and outputs 'True' if all elements in the first list exist in the second
def all_shared(list1,list2):
    for item in list1:
        if item not in list2:
            return False
            break
    return True


## annotate_tree will be used to fully annotate each map with clusters from a list of shared cluster between maps. It
## will take an empty input DataFrame with index equal to the unclustered GI map index, the GI map index itself, the 
## shared list, the GI cluster matrix, "layer" which should always be 0, "go_terms_analyzed", which should always be 
## an empty list and a dictionary where cluster identities are keys and values are color codes. Outputs a DataFrame 
## with index equal to the unclustered map index and columns equal to the number of layers of overlapping GO term
## annotated clusters. 

def annotation_tree(annotate_df, map_index, shared_list, cluster_matrix,layer,go_terms_analyzed,paired_dict):    
    total_gene_list = list(map_index)
    in_layer = pd.DataFrame(index = range(len(map_index)),columns =['fill'])
    in_layer = list(in_layer['fill'])
    for cluster in shared_list:
        if cluster in go_terms_analyzed:
            continue
        gene_list = list(cluster_matrix.loc[cluster_matrix[cluster]==True].index)
        if all_shared(gene_list,total_gene_list):
            for i,gene in enumerate(annotate_df.index):
                if gene in gene_list:
                    if cluster in paired_dict.keys():
                        in_layer[i] = paired_dict[cluster]
                        #in_layer[i] = 'lightcoral'
                    else:
                        in_layer[i] = 'black'
            total_gene_list = [item for item in total_gene_list if item not in gene_list]
            go_terms_analyzed.append(cluster)
    annotate_df[layer] = in_layer
    layer += 1 
    if all_shared(list(shared_list),go_terms_analyzed) == False:
        annotation_tree(annotate_df,map_index,shared_list,cluster_matrix,layer,go_terms_analyzed,paired_dict)
    return annotate_df

In [None]:
## Import data, these are GI/eGI matrices generated from the notebook "calculate_genetic_interaction" called as the
## variables gamma_map and tau_map respectively

gamma_map = pd.read_excel('',index_col = 0)
tau_map = pd.read_excel('',index_col = 0)

In [None]:


################ Cluster and draw GI/eGI heatmaps ################



In [None]:
## A color map from Max Horlbeck in his 14.33 notebook for drawing GI heatmaps in the traditional blue/yellow scheme

cdict = {'red':((0.0,0.125,0.125),
                (0.5,0.0,0.0),
                (1.0,0.957,0.957)),
        'green':((0.0,0.713,0.713),
                (0.5,0.0,0.0),
                (1.0,0.918,0.918)),
        'blue': ((0.0,0.886,0.886),
                (0.5,0.0,0.0),
                (1.0,0.094,0.094))}

blue_yellow = matplotlib.colors.LinearSegmentedColormap('BlueYellow',cdict)
blue_yellow.set_bad('w')

plt.register_cmap(cmap=blue_yellow)

In [None]:
## Gene names used to index our maps are sometimes not exactly matching the names used by GO term enrichment software
## Here, we fix this manually by changing names of select genes to a version accepted by Enrichr
gammaIndex = list(gamma_map.index)
for i,item in enumerate(gammaIndex):
    if item == 'C19orf40':
        gammaIndex[i] = 'FAAP24'
    if item =='OBFC1':
        gammaIndex[i] = 'STN1'
    if item =='GCN1L1':
        gammaIndex[i] = 'GCN1'
    if item =='MRE11A':
        gammaIndex[i] = 'MRE11'
    if item =='CSRP2BP':
        gammaIndex[i] = 'KAT14'
    if item =='SRPR':
        gammaIndex[i] = 'SRPRA'    
    if item =='C11orf82':
        gammaIndex[i] = 'DDIAS'     
    if item =='C16orf80':
        gammaIndex[i] = 'CFAP20'
    if item =='MLF1IP':
        gammaIndex[i] = 'CENPU'
    if item =='MTERFD1':
        gammaIndex[i] = 'MTERF3'
    if item =='ICT1':
        gammaIndex[i] = 'MRPL58'
gamma_map.index = gammaIndex
gamma_map.columns = gammaIndex
tau_map.index = gammaIndex
tau_map.columns = gammaIndex


In [None]:
## Normalize each map's values by their standard deviation, then concatenate maps

gamma_std_map = gamma_map/np.std(gamma_map.values)
tau_std_map = tau_map/np.std(tau_map.values)

concat_map = pd.concat((gamma_std_map,tau_std_map),axis=1)


In [None]:
## Cluster concatenated map using seaborn's clustermap function

concat_cluster = sns.clustermap(concat_map,method = 'average',metric = 'correlation',
                               cmap = blue_yellow,
                               vmin=-4,vmax = 4,
                               cbar_pos=(1, .2, .03, .4),
                                dendrogram_ratio= (0,0.12),
                                row_colors = tau_cluster_annotations,
                               col_cluster=False,
                               yticklabels=False,xticklabels=False)
concensus_distance = sp.cluster.hierarchy.linkage(concat_map, method='average', metric='correlation')

In [None]:
## Make the distance matrix for the concatenated map, this is used to make Table S5

distance_matrix = pd.DataFrame(index = range(len(concensus_distance)),columns = ['left_branch','right_branch','distance','num_elements'])
for i,item in enumerate(concensus_distance):
    distance_matrix.loc[i,'left_branch'] = item[0]
    distance_matrix.loc[i,'right_branch'] = item[1]
    distance_matrix.loc[i,'distance'] = item[2]
    distance_matrix.loc[i,'num_elements'] = item[3]
    

In [None]:
## Cluster and draw independent gamma(GI) map, this is used to make Table S3

gamma_cluster = sns.clustermap(gamma_map,method = 'average',metric = 'correlation',
                               cmap = blue_yellow,
                               vmin=-4,vmax = 4,
                               cbar_pos=(1, .2, .03, .4),
                               yticklabels=False,xticklabels=False)

## Save the distance matrix under a new variable
gamma_distance = sp.cluster.hierarchy.linkage(gamma_map, method='average', metric='correlation')


In [None]:
## Make the distance matrix for the gamma independent clustering, this is used to make Table S6

gamma_distance_matrix = pd.DataFrame(index = range(len(gamma_distance)),columns = ['left_branch','right_branch','distance','num_elements'])
for i,item in enumerate(gamma_distance):
    gamma_distance_matrix.loc[i,'left_branch'] = item[0]
    gamma_distance_matrix.loc[i,'right_branch'] = item[1]
    gamma_distance_matrix.loc[i,'distance'] = item[2]
    gamma_distance_matrix.loc[i,'num_elements'] = item[3]
    

In [None]:
## Same steps as in the cells above, but for the tau (eGI) map

tau_cluster = sns.clustermap(tau_map,method = 'average',metric = 'correlation',
                             cmap = blue_yellow,
                             vmin=-4,vmax = 4,
                             cbar_pos=(1, .2, .03, .4),
                             yticklabels=False,xticklabels=False)

tau_distance = sp.cluster.hierarchy.linkage(tau_map, method='average', metric='correlation')



In [None]:
## Make the distance matrix for the tau independent clustering, this is used to make Table S7

tau_distance_matrix = pd.DataFrame(index = range(len(tau_distance)),columns = ['left_branch','right_branch','distance','num_elements'])
for i,item in enumerate(tau_distance):
    tau_distance_matrix.loc[i,'left_branch'] = item[0]
    tau_distance_matrix.loc[i,'right_branch'] = item[1]
    tau_distance_matrix.loc[i,'distance'] = item[2]
    tau_distance_matrix.loc[i,'num_elements'] = item[3]



In [None]:
## I would also like to draw the upper and lower triangles for the gamma and tau map respectively, for figure creation
## purposes. 

## First, I need to reorder the indices of the maps to have them exist in their clustered form
gamma_clustered = gamma_map.iloc[gamma_cluster.dendrogram_row.reordered_ind,gamma_cluster.dendrogram_row.reordered_ind]
tau_clustered = tau_map.iloc[tau_cluster.dendrogram_row.reordered_ind,tau_cluster.dendrogram_row.reordered_ind]

## Next, I want to draw the upper/lower triangle for each map respectively, this required making masks

gamma_mask = pd.DataFrame(index = gamma_clustered.index,columns = gamma_clustered.columns)
tau_mask = pd.DataFrame(index = tau_clustered.index,columns = tau_clustered.columns)

gamma_mask.loc[:,:] = False
tau_mask.loc[:,:] = False

for i in range(304):
    for j in range(i,304):
        if i != j:
            gamma_mask.iloc[i,j] = True
            tau_mask.iloc[j,i] = True




In [None]:
## Draw masked GI map

gamma_masked_cluster = sns.clustermap(gamma_clustered,method = 'average',metric = 'correlation',
                               cmap = blue_yellow,
                               vmin=-4,vmax = 4,
                               cbar_pos=(1, .2, .03, .4),
                               yticklabels=False,xticklabels=False,
                              mask = gamma_mask)


In [None]:
## Draw masked eGI map

tau_masked_cluster = sns.clustermap(tau_clustered,method = 'average',metric = 'correlation',
                               cmap = blue_yellow,
                               vmin=-4,vmax = 4,
                               cbar_pos=(1, .2, .03, .4),
                               yticklabels=False,xticklabels=False,
                              mask = tau_mask)


In [None]:
################ Annotating GI and eGI maps with shared clustering information ################

## Here I would like to iterate through every cluster in the gamma map and return a matrix with index equal to all the
## genes in the map, and columns equal to every cluster, with bools filled in to represent if that gene belongs to 
## that cluster

gamma_cluster_matrix = pd.DataFrame(index = gamma_map.index, columns = range(303))

for cluster in gamma_cluster_matrix.columns:
    branch_identity = []
    deconstruct_tree(cluster,gamma_distance)
    branch_names = gamma_map.iloc[branch_identity].index
    for gene in gamma_cluster_matrix.index:
        if gene in branch_names:
            gamma_cluster_matrix.loc[gene,cluster] = True
        else:
            gamma_cluster_matrix.loc[gene,cluster] = False


In [None]:
## Same things as in the cell above, but with tau

tau_cluster_matrix = pd.DataFrame(index = tau_map.index, columns = range(303))

for cluster in tau_cluster_matrix.columns:
    branch_identity = []
    deconstruct_tree(cluster,tau_distance)
    branch_names = tau_map.iloc[branch_identity].index
    for gene in tau_cluster_matrix.index:
        if gene in branch_names:
            tau_cluster_matrix.loc[gene,cluster] = True
        else:
            tau_cluster_matrix.loc[gene,cluster] = False


In [None]:
## Make rand_df, a dataframe that is a matrix of cluster identities with values being the rand score between those 
## clusters in the GI and eGI maps, this is used to make Table S8

rand_df = pd.DataFrame(index = gamma_cluster_matrix.columns,columns = tau_cluster_matrix.columns)
for row in rand_df.index:
    gamma_list = list(gamma_cluster_matrix.loc[gamma_cluster_matrix[row]==True].index)
    gamma_rand = rand_identifier(gamma_list,gamma_map.index)
    for col in rand_df.columns:
        tau_list = list(tau_cluster_matrix.loc[tau_cluster_matrix[col]==True].index)
        tau_rand = rand_identifier(tau_list,tau_map.index)
        rand_df.loc[row,col] = metrics.adjusted_rand_score(gamma_rand,tau_rand)
        

In [None]:
## Find all the cases where a cluster in the GI map and a cluster in the eGI map find their maximum ars across all
## clusters in the complimentary map is each other. This will find all perfectly matched clusters between maps and all
## clusters similar enough under the threshold that have no better fits. The maximum ars for either cluster must be 
## greater than the threshold. Display this data in the format "gamma cluster:tau cluster"

ars_thresh = 0.7

paired_total_list = []
for gamma_cluster in rand_df.index[:-1]:
    cluster1_max = rand_df.loc[gamma_cluster,:].max()
    if cluster1_max < ars_thresh:
        continue
    for tau_cluster in rand_df.columns[:-1]:
        cluster2_max = rand_df.loc[:,tau_cluster].max()
        if cluster2_max < ars_thresh:
            continue
        if rand_df.loc[gamma_cluster,tau_cluster] == cluster1_max:
            if rand_df.loc[gamma_cluster,tau_cluster] == cluster2_max:
                paired_total_list.append(str(gamma_cluster)+':'+str(tau_cluster))
            
        
        

In [None]:
## Taking the output from the previous cell, a list of matched clusters in the format "gamma cluster:tau cluster" and 
## compare each gamma cluster in the list to every other gamma cluster. If we find that two clusters are similar (ars 
## above threshold) we add the smaller of the two clusters and its paired tau cluster to "drop_list". The same is 
##repeated for all the tau clusters 

ars_thresh = 0.5
drop_list = []
for i in range(len(paired_total_list)):
    gamma_cluster1, tau_cluster1 = paired_total_list[i].split(':')
    gamma_cluster1 = int(gamma_cluster1)
    tau_cluster1 = int(tau_cluster1)
    for j in range(i,len(paired_total_list)):
        if i == j:
            continue
        gamma_cluster2, tau_cluster2 = paired_total_list[j].split(':')
        gamma_cluster2 = int(gamma_cluster2)
        tau_cluster2 = int(tau_cluster2)
        gamma1_list = list(gamma_cluster_matrix.loc[gamma_cluster_matrix[gamma_cluster1]==True].index)
        gamma2_list = list(gamma_cluster_matrix.loc[gamma_cluster_matrix[gamma_cluster2]==True].index)
        gamma1_rand = rand_identifier(gamma1_list,gamma_map.index)
        gamma2_rand = rand_identifier(gamma2_list,gamma_map.index)
        gamma_ars = metrics.adjusted_rand_score(gamma1_rand,gamma2_rand)
        if gamma_ars > ars_thresh:
            if len(gamma1_list) > len(gamma2_list):  
                drop_list.append(str(gamma_cluster2)+':'+str(tau_cluster2))
            if len(gamma1_list) < len(gamma2_list):  
                drop_list.append(str(gamma_cluster1)+':'+str(tau_cluster1))
        tau1_list = list(tau_cluster_matrix.loc[tau_cluster_matrix[tau_cluster1]==True].index)
        tau2_list = list(tau_cluster_matrix.loc[tau_cluster_matrix[tau_cluster2]==True].index)
        tau1_rand = rand_identifier(tau1_list,tau_map.index)
        tau2_rand = rand_identifier(tau2_list,tau_map.index)
        tau_ars = metrics.adjusted_rand_score(tau1_rand,tau2_rand)
        if tau_ars > ars_thresh:
            if len(tau1_list) > len(tau2_list):  
                drop_list.append(str(gamma_cluster2)+':'+str(tau_cluster2))
            if len(tau1_list) < len(tau2_list):  
                drop_list.append(str(gamma_cluster1)+':'+str(tau_cluster1))
                
## Items in the paired list are removed if they exist in drop_list 

paired_final_list = []
for item in paired_total_list:
    if item in drop_list:
        continue
    paired_final_list.append(item)
    
## Break the paired clusters from "paired_final_list" into GI and eGI clusters, save those in their own lists, then
## add clusters for each map as keys in dictionaries, with a unique colormap signiture as values so that corresponding
## clusters between maps will be colored the same

cmap = sns.color_palette("hls", len(paired_final_list))
gamma_shared_list = []
tau_shared_list = []
gamma_shared_dict = dict()
tau_shared_dict = dict()
for i in range(len(paired_final_list)):
    gamma_cluster, tau_cluster = paired_final_list[i].split(':')
    gamma_shared_list.append(int(gamma_cluster))
    tau_shared_list.append(int(tau_cluster))
    gamma_shared_dict[int(gamma_cluster)] = cmap[i]
    tau_shared_dict[int(tau_cluster)] = cmap[i]

In [None]:
## Get gene identities for all conserved clusters

for item in paired_final_list:
    gamma_cluster, tau_cluster = item.split(':')
    print(gamma_cluster,list(gamma_cluster_matrix.loc[gamma_cluster_matrix[int(gamma_cluster)] == True].index),
          tau_cluster,
         list(tau_cluster_matrix.loc[tau_cluster_matrix[int(tau_cluster)] == True].index))

In [None]:
## Generate GO term enrichment results by feeding each cluster from the shared lists into Enrichr. We will only accept
## GO terms where the -log10 adjusted p-values is greater than 6. Collate these results into a DataFrame where index
## equals every GO term found in at least one cluster and columns equal to the clusters analyzed. This will take a bit

gs = ['GO_Biological_Process_2021','GO_Molecular_Function_2021']
print('There are '+str(len(gamma_shared_list))+' cluster to analyze!')
for i in range(len(gamma_shared_list)):
    if i == 0:
        enrichment = gp.enrichr(gene_list=list(gamma_cluster_matrix.loc[gamma_cluster_matrix[gamma_shared_list[i]]==True].index),
                            gene_sets=gs,
                            #organism = 'human',
                            #description = 'test'+str(i),
                            no_plot=True)
        activeDf = enrichDf(enrichment.results,6)
    if i > 0:
        enrichment = gp.enrichr(gene_list=list(gamma_cluster_matrix.loc[gamma_cluster_matrix[gamma_shared_list[i]] == True].index),
                            gene_sets=gs,
                            #organism = 'human',
                            #description = 'test'+str(i),
                            no_plot=True)
        outDf = enrichDf(enrichment.results,6)
        activeDf = pd.concat((activeDf,outDf),axis=1)
    print(str(i+1)+' clusters analyzed!')
activeDf.columns = gamma_shared_list
gamma_go_df = activeDf


In [None]:
## Same as above, but with eGI map

gs = ['GO_Biological_Process_2021','GO_Molecular_Function_2021']
print('There are '+str(len(tau_shared_list))+' cluster to analyze!')
for i in range(len(tau_shared_list)):
    if i == 0:
        enrichment = gp.enrichr(gene_list=list(tau_cluster_matrix.loc[tau_cluster_matrix[tau_shared_list[i]]==True].index),
                            gene_sets=gs,
                            #organism = 'human',
                            #description = 'test'+str(i),
                            no_plot=True)
        activeDf = enrichDf(enrichment.results,6)
    if i > 0:
        enrichment = gp.enrichr(gene_list=list(tau_cluster_matrix.loc[tau_cluster_matrix[tau_shared_list[i]] == True].index),
                            gene_sets=gs,
                            #organism = 'human',
                            #description = 'test'+str(i),
                            no_plot=True)
        outDf = enrichDf(enrichment.results,6)
        activeDf = pd.concat((activeDf,outDf),axis=1)
    print(str(i+1)+' clusters analyzed!')
activeDf.columns = tau_shared_list
tau_go_df = activeDf


In [None]:
## Generate color dictionaries which will be used to signify clusters shared between maps that also have a coherent GO
## term associated with them


cmap = sns.color_palette("tab20_r",20)

go_enriched_gamma = dict()
go_enriched_tau = dict()
count = 0
for i in range(len(gamma_go_df.columns)):
    gamma_cluster = gamma_go_df.columns[i]
    tau_cluster = tau_go_df.columns[i]
    max_gamma_go = gamma_go_df[gamma_cluster].max()
    max_tau_go = tau_go_df[tau_cluster].max()
    if max_gamma_go > 6.0 and max_tau_go > 6.0:
        go_enriched_gamma[gamma_cluster] = cmap[count]
        go_enriched_tau[tau_cluster] = cmap[count]
        count += 1

## Upon closer analysis of the clusters not annotated using the methods above, there are a number of legible groupings
## that are oddly omitted by Enrichr. To get a richer idea of functional connections between ontologies, I will manually
## add these clusters back

## Mitotic Inhibition
go_enriched_gamma[27] = cmap[count]
go_enriched_tau[61] = cmap[count]
count+=1 
## ER-Golgi Translocation
go_enriched_gamma[41] = cmap[count]
go_enriched_tau[75] = cmap[count]
count+=1 
## SCF Core Complex
go_enriched_gamma[48] = cmap[count]
go_enriched_tau[62] = cmap[count]
count+=1 
## FA Complex
go_enriched_gamma[133] = cmap[count]
go_enriched_tau[141] = cmap[count]
count+=1  
    

In [None]:
gamma_nongo = [col for col in gamma_go_df.columns if col not in go_enriched_gamma.keys()]
tau_nongo = [col for col in tau_go_df.columns if col not in go_enriched_tau.keys()]
for i in range(len(gamma_nongo)):
    print(gamma_nongo[i],tau_nongo[i])

In [None]:
## Generate annotations by calling the annotation_tree function, see function documentation above 

gamma_annotate_df = pd.DataFrame(index = gamma_map.index)
gamma_cluster_annotations = annotation_tree(gamma_annotate_df,
                                            gamma_map.index,
                                            gamma_shared_list,
                                            gamma_cluster_matrix,
                                            0,
                                            [],
                                           go_enriched_gamma)
gamma_final_annotate_matrix = pd.DataFrame(index = gamma_cluster_annotations.index)
for i in range(len(gamma_cluster_annotations.columns)):
    gamma_final_annotate_matrix[i] = gamma_cluster_annotations.iloc[:,-i-1]
    


In [None]:
## Draw upper/lower triangle maps with cluster annotations

gamma_cluster = sns.clustermap(gamma_clustered,method = 'average',metric = 'correlation',
                               cmap = blue_yellow,
                               vmin=-4,vmax = 4,
                               cbar_pos=(1, .2, .03, .4),
                               yticklabels=False,xticklabels=False,
                               mask = gamma_mask,
                               row_colors = gamma_final_annotate_matrix,
                               figsize = (10,10),
                               dendrogram_ratio= (0,0.15),
                               #colors_ratio = (-0.2,0)

In [None]:
tau_annotate_df = pd.DataFrame(index = tau_map.index)
tau_cluster_annotations = annotation_tree(tau_annotate_df,
                                            tau_map.index,
                                            tau_shared_list,
                                            tau_cluster_matrix,
                                            0,
                                            [],
                                           go_enriched_tau)


In [None]:
tau_cluster = sns.clustermap(tau_clustered,method = 'average',metric = 'correlation',
                               cmap = blue_yellow,
                               vmin=-4,vmax = 4,
                               cbar_pos=None,
                               yticklabels=False,xticklabels=False,
                              mask = tau_mask,
                            row_colors = tau_cluster_annotations,
                            dendrogram_ratio= (0,0.12),
                            #colors_ratio=0.03,
                            figsize=(10,10))

In [None]:
#####----------------------Measuring rewired Interactions between shared Ontologies----------------------#####

## Here I would like to make networkX plots of the shared ontologies between the two maps, represented as nodes with
## edges drawn between nodes according to the strength of the average GI between those two ontologies. This will serve
## to illustrate the changing genetic landscape between clusters

In [None]:
gamma_enriched_list = []
tau_enriched_list = []
for cluster in go_enriched_gamma.keys():
    gamma_enriched_list.append(cluster)
for cluster in go_enriched_tau.keys():
    tau_enriched_list.append(cluster)



In [None]:
gamma_graph = nx.Graph()
for i in range(len(gamma_enriched_list)):
    for j in range(i,len(gamma_enriched_list)):
        if i != j:
            index1 = gamma_enriched_list[i]
            index2 = gamma_enriched_list[j]
            if index1 == 247 or index2 == 247:
                continue
            list1 = list(gamma_cluster_matrix.loc[gamma_cluster_matrix[index1]== True].index)
            list2 = list(gamma_cluster_matrix.loc[gamma_cluster_matrix[index2]== True].index)
            if index1 not in gamma_graph.nodes:
                gamma_graph.add_node(index1,color = go_enriched_gamma[index1])
            if index2 not in gamma_graph.nodes:
                gamma_graph.add_node(index2,color = go_enriched_gamma[index2])
            gamma_graph.add_edge(index1,index2,weight = (np.mean(abs(gamma_map.loc[list1,list2].values))**2)/10)

In [None]:
plt.figure(figsize=(6,6))
plt.axis('off')
nx.draw_networkx(gamma_graph,
                pos = nx.circular_layout(gamma_graph),
                node_color = [item[1] for item in gamma_graph.nodes.data('color')],
                edge_color = 'dimgray',
                node_size = 500,
                width = [item[2] for item in gamma_graph.edges.data('weight')],
                with_labels = False
                )
plt.savefig('/Users/benh/Desktop/new_maps/gamma_nodeedgeplot.png',dpi=300)

In [None]:
tau_graph = nx.Graph()
for i in range(len(tau_enriched_list)):
    for j in range(i,len(tau_enriched_list)):
        if i != j:
            index1 = tau_enriched_list[i]
            index2 = tau_enriched_list[j]
            if index1 == 118 or index2 == 118:
                continue
            list1 = list(tau_cluster_matrix.loc[tau_cluster_matrix[index1]== True].index)
            list2 = list(tau_cluster_matrix.loc[tau_cluster_matrix[index2]== True].index)
            if index1 not in tau_graph.nodes:
                tau_graph.add_node(index1,color = go_enriched_tau[index1])
            if index2 not in tau_graph.nodes:
                tau_graph.add_node(index2,color = go_enriched_tau[index2])
            tau_graph.add_edge(index1,index2,weight = (np.mean(abs(tau_map.loc[list1,list2].values))**2)/10)

In [None]:
plt.figure(figsize=(6,6))
plt.axis('off')
nx.draw_networkx(tau_graph,
                pos = nx.circular_layout(tau_graph),
                node_color = [item[1] for item in tau_graph.nodes.data('color')],
                edge_color = 'dimgray',
                node_size = 500,
                width = [item[2] for item in tau_graph.edges.data('weight')],
                with_labels = False
                )
plt.savefig('/Users/benh/Desktop/new_maps/tau_nodeedgeplot.png',dpi=300)

In [None]:
gamma_go_df

In [None]:
plt.axis('off')
#cmap = sns.color_palette("tab20", as_cmap=True)
nx.draw_networkx(gamma_graph,
                pos = nx.bipartite_layout(gamma_graph,list(gamma_graph.nodes)[0:10]),
                 node_color = [item[1] for item in gamma_graph.nodes.data('color')],
                #node_color = [item[1] for item in gamma_graph.nodes.data('color')],
                edge_color = 'dimgray',
                node_size = 250,
                width = 0,
                with_labels = True
            )
#plt.savefig('/Users/benh/Desktop/new_maps/nodes_connectPlot.png',dpi=300)

In [None]:
cluster = 100
list(gamma_cluster_matrix.loc[gamma_cluster_matrix[cluster]==True].index)

In [None]:
list(gamma_cluster_matrix.loc[gamma_cluster_matrix[100]==True].index)

In [None]:
gamma_go_matrix

In [None]:
tau_go_matrix

In [None]:
cluster = 270
tau_go_matrix.loc[cluster,'go_term']

In [None]:
list(tau_cluster_matrix.loc[tau_cluster_matrix[cluster]==True].index)

In [None]:
for i in range(len(gamma_paired_dict)):
    print(i, gamma_go_matrix.loc[gamma_paired_list[i],'go_term']+' and '+tau_go_matrix.loc[tau_paired_list[i],'go_term'])
    


In [None]:
list(gamma_cluster_matrix.loc[gamma_cluster_matrix[gamma_paired_list[12]]==True].index)

In [None]:
list(gamma_cluster_matrix.loc[gamma_cluster_matrix[gamma_paired_list[11]]==True].index)

In [None]:
## Here I would like to pull out specific clusters from the gamma_map and plot them in a heatmap against each other
clusters = [177,6]
first_go = list(gamma_cluster_matrix.loc[gamma_cluster_matrix[clusters[0]] == True,clusters[0]].index)
second_go = list(gamma_cluster_matrix.loc[gamma_cluster_matrix[clusters[1]] == True,clusters[1]].index)

#gamma_map.loc[first_go,second_go]

fig, axis = plt.subplots(figsize=(5,5))
# gs = plt.GridSpec(1,3, width_ratios=(.5,5,.5))

# axis = plt.subplot(gs[0])
axis.spines['top'].set_visible(False)
axis.spines['right'].set_visible(False)
axis.spines['left'].set_visible(False)
axis.spines['bottom'].set_visible(False)
axis.yaxis.set_tick_params(left='off', right='off', labelsize='8')
axis.xaxis.set_tick_params(bottom='off', top='off', labelsize='8')

im = axis.imshow(gamma_map.loc[first_go,second_go], interpolation = 'none', cmap='BlueYellow', vmin=-3.5, vmax=3.5)
axis.set_xticks([])
axis.set_yticks([])
gamma_map.loc[first_go,second_go]
#plt.savefig('/Users/benh/Desktop/'+gamma_go_matrix.loc[clusters[0],'go_term']+'-vs-'+gamma_go_matrix.loc[clusters[1],'go_term']+'_gamma.png',dpi=300)


In [None]:
## Here I would like to pull out specific clusters from the tau_map and plot them in a heatmap against each other
clusters = [43,198]
first_go = list(tau_cluster_matrix.loc[tau_cluster_matrix[clusters[0]] == True,clusters[0]].index)
second_go = list(tau_cluster_matrix.loc[tau_cluster_matrix[clusters[1]] == True,clusters[1]].index)

#tau_map.loc[first_go,second_go]

fig, axis = plt.subplots(figsize=(5,5))
# gs = plt.GridSpec(1,3, width_ratios=(.5,5,.5))

# axis = plt.subplot(gs[0])
axis.spines['top'].set_visible(False)
axis.spines['right'].set_visible(False)
axis.spines['left'].set_visible(False)
axis.spines['bottom'].set_visible(False)
axis.yaxis.set_tick_params(left='off', right='off', labelsize='8')
axis.xaxis.set_tick_params(bottom='off', top='off', labelsize='8')

im = axis.imshow(tau_map.loc[first_go,second_go], interpolation = 'none', cmap='BlueYellow', vmin=-3.5, vmax=3.5)
axis.set_xticks([])
axis.set_yticks([])
tau_map.loc[first_go,second_go]
plt.savefig('/Users/benh/Desktop/'+tau_go_matrix.loc[clusters[0],'go_term']+'-vs-'+tau_go_matrix.loc[clusters[1],'go_term']+'_tau.png',dpi=300)
