In [1]:
import numpy as np
import pandas as pd

import networkx as nx 
from pyvis import network as net

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib as mpl

from sklearn.preprocessing import MinMaxScaler

# Utility functions

In [2]:
def add_node_colors(network, col_attribute, cmap_name="Accent"):
    """
    Function to add node colors based on existing node attribute.
    
    network: networkx object
    col_attribute: Name of attribute to use for determining node colors
    cmap_name: Name of matplotlib colormap to use (default: 'Accent')
    
    """
    source_att = nx.get_node_attributes(network, col_attribute)
    
    # Get colors
    cmap = cm.get_cmap(cmap_name, len(set(source_att.values())))    
    source_dict = dict(zip(list(set(source_att.values())),
                           np.arange(len(set(source_att.values())))))
    node_colors = {n:mpl.colors.rgb2hex(cmap(source_dict[v]), keep_alpha=True) for (n, v) in source_att.items()}

    # Set colors
    nx.set_node_attributes(network, node_colors, 'color')
    
    
    

# Example with new networkx object

In [9]:
# Generate fake adjacency matrix from proteomics data with real data attributes

proteomics = pd.read_csv('/nfs/answer/fraenkel_internal/current_omics/proteomics/normalized_imp_corrected_proteomics204_03292022.csv',
                        index_col=0)
metadata = pd.read_csv('/nfs/answer/fraenkel_internal/current_omics/proteomics/full_prot_metadata203_03082023.csv',
                      index_col=0)
proteomics = proteomics.loc[:,metadata.GUID.values]

num_guids = proteomics.shape[1]
print(num_guids)
proteomics

203


Unnamed: 0,NEUAB000NKC,NEUAE993EPR,NEUAF553MJ3,NEUAG241NUD,NEUAG603XLK,NEUAJ928PAA,NEUAL076FCE,NEUAP285GGU,NEUAT234RK6,NEUAW717TN6,...,NEUZK054DP5,NEUZL045YD3,NEUZN936HJ9,NEUZP278MR4,NEUZT902WVB,NEUZV656DD1,NEUZW701NNF,NEUZX521TKK,NEUZX847VWV,NEUZY975XKL
A0FGR8|ESYT2,12.472100,12.642798,12.742081,12.481972,12.552721,12.579313,12.850728,12.639461,12.698320,12.550657,...,12.262520,12.450807,12.641901,12.717533,12.910621,12.714050,12.735538,12.424176,12.612128,12.579888
A0MZ66|SHOT1,13.842471,13.975695,13.699891,13.856147,13.816688,13.735516,14.050555,13.870213,14.164351,13.929244,...,14.325582,13.957713,13.991507,13.892948,13.952639,13.839152,13.886002,13.890692,14.014689,13.789265
A1L020|MEX3A,13.296923,13.293507,13.398688,13.544040,13.342870,13.575677,13.495929,13.411647,13.205646,13.362286,...,13.528092,13.310814,13.350540,13.616407,13.326455,13.594600,13.468686,13.333949,13.380199,13.520812
A1X283|SPD2B,12.470363,12.411420,12.515813,12.641331,12.584407,12.361021,12.528692,12.220873,12.444753,12.631653,...,12.512905,12.302379,12.490169,12.335576,12.773617,12.639890,12.464928,12.403512,12.578225,12.743562
A2RRP1|NBAS,11.340363,10.749392,11.003630,10.760671,10.877956,10.477362,10.272972,10.272276,10.897402,10.853940,...,10.376216,10.993136,10.851952,10.741054,10.714822,10.526593,10.841943,10.685329,11.093619,10.819350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Q9Y6R0|NUMBL,11.483020,11.542400,11.533970,11.655710,11.382800,11.350170,11.779620,11.489790,11.358130,11.576580,...,12.050160,11.571300,11.879640,11.437920,11.575610,11.470200,11.451410,11.440140,11.730790,11.560100
Q9Y6V0|PCLO,9.456830,9.557025,9.794867,9.345021,9.363582,9.752151,9.579164,9.806852,9.462755,9.562161,...,9.621430,9.502433,9.548516,9.563470,9.338708,8.864950,9.773427,9.418286,9.690135,9.387186
Q9Y6V7|DDX49,11.058880,11.030750,11.481060,11.313340,11.329190,11.151840,11.214950,11.275310,10.891840,11.190700,...,11.304200,11.231960,11.141490,11.126730,11.470240,11.299180,11.152710,10.914820,11.464200,11.410380
Q9Y6X4|F169A,10.503000,10.836080,10.431840,10.841550,10.704220,10.887090,10.954850,10.658110,10.977600,10.921100,...,11.143870,11.052500,10.983980,10.591580,10.502900,10.801800,10.800560,10.860360,10.696900,10.541000


In [11]:
metadata

Unnamed: 0,GUID_orig,GUID_vial,GUID,attribute_ExperimentalGroup,attribute_SampleType,attribute_TechnicalGroup,attribute_Species,attribute_DataType,attribute_BiologicalGroup,Level2,...,Age At Symptom Onset,Age At Death,ALSFRS-R Baseline,ALSFRS-R Latest,ALSFRS-R Progression Slope,Sex,Primary Tissue,progressor,estimated_slope_alsfrsr,SOD1_gen
NEUAB000NKC,NEUAB000NKC,NEUAB000NKC_5730,NEUAB000NKC,Case,diMN,1,Human,DIA,NEUAB000NKC_5730_MS2-A01,Case_NEUAB000NKC_5730_MS2-A01,...,50.0,55.0,40.0,31.0,-0.5590,Female,PBMC/T-Cell,AMBIGUOUS,-0.512430,NEG
NEUAE993EPR,NEUAE993EPR,NEUAE993EPR_6291,NEUAE993EPR,Case,diMN,1,Human,DIA,NEUAE993EPR-6291-P-MS3-C02,Case_NEUAE993EPR-6291-P-MS3-C02,...,55.0,,40.0,14.0,-0.8191,Female,PBMC/T-Cell,fast,-0.822744,NEG
NEUAF553MJ3,NEUAF553MJ3,NEUAF553MJ3_5888,NEUAF553MJ3,Case,diMN,1,Human,DIA,NEUAF553MJ3-5889-P_F12,Case_NEUAF553MJ3-5889-P_F12,...,59.0,,27.0,,,Female,PBMC/NT-Cell,,,NEG
NEUAG241NUD,NEUAG241NUD,NEUAG241NUD_6685,NEUAG241NUD,Case,diMN,1,Human,DIA,NEUAG241NUD-6685-P_B07,Case_NEUAG241NUD-6685-P_B07,...,57.0,,38.0,38.0,,Male,PBMC/NT-Cell,,,NEG
NEUAG603XLK,NEUAG603XLK,NEUAG603XLK_5758,NEUAG603XLK,Case,diMN,1,Human,DIA,NEUAG603XLK-5759-P_F13,Case_NEUAG603XLK-5759-P_F13,...,54.0,58.0,40.0,29.0,-0.7397,Female,PBMC/NT-Cell,AMBIGUOUS,-0.720656,NEG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NEUZV656DD1,NEUZV656DD1,NEUZV656DD1_6056,NEUZV656DD1,Case,diMN,1,Human,DIA,NEUZV656DD1_6056_MS2-C10,Case_NEUZV656DD1_6056_MS2-C10,...,35.0,38.0,7.0,6.0,,Male,PBMC/T-Cell,,,NEG
NEUZW701NNF,NEUZW701NNF,NEUZW701NNF_6885,NEUZW701NNF,Case,diMN,1,Human,DIA,NEUZW701NNF-6885-P_E02,Case_NEUZW701NNF-6885-P_E02,...,64.0,66.0,41.0,32.0,,Male,,,,NEG
NEUZX521TKK,NEUZX521TKK,NEUZX521TKK_5797,NEUZX521TKK,Case,diMN,1,Human,DIA,NEUZX521TKK_5797_MS2-B01,Case_NEUZX521TKK_5797_MS2-B01,...,70.0,74.0,23.0,16.0,-0.6519,Male,PBMC/T-Cell,AMBIGUOUS,-0.617298,NEG
NEUZX847VWV,NEUZX847VWV,NEUZX847VWV_6091,NEUZX847VWV,Case,diMN,1,Human,DIA,NEUZX847VWV_6091_MS2-D02,Case_NEUZX847VWV_6091_MS2-D02,...,55.0,57.0,34.0,34.0,,Male,PBMC/NT-Cell,,,UNK


In [4]:
# Compute cross subject correlations and estimate "edge weights" from them
corr_matrix = proteomics.corr()

scaled = MinMaxScaler().fit_transform(corr_matrix.values.reshape(-1, 1)).reshape((num_guids, num_guids))
# setting arbitrary threshold for testing purposes
scaled[scaled < 0.75] = 0
np.fill_diagonal(scaled, 0)
scaled

array([[0.        , 0.7866374 , 0.        , ..., 0.87267007, 0.        ,
        0.78108293],
       [0.7866374 , 0.        , 0.        , ..., 0.85873462, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.78990535,
        0.8203059 ],
       ...,
       [0.87267007, 0.85873462, 0.        , ..., 0.        , 0.78633743,
        0.81359922],
       [0.        , 0.        , 0.78990535, ..., 0.78633743, 0.        ,
        0.83201771],
       [0.78108293, 0.        , 0.8203059 , ..., 0.81359922, 0.83201771,
        0.        ]])

In [5]:
# Create networkx object
guid_nt = nx.from_numpy_matrix(scaled)
guid_nt = nx.relabel_nodes(guid_nt, dict(zip(np.arange(num_guids), proteomics.columns)))

# Set some attributes from metadata
# Can set other attributes as well
nx.set_node_attributes(guid_nt, metadata.loc[:, ['Case', 'Sex']].to_dict('index'))

In [6]:
# Set node colors 
add_node_colors(guid_nt, col_attribute='Case')

In [7]:
guid_nt.nodes['NEUAB000NKC']


{'Case': 'CASE', 'Sex': 'Female', 'color': '#7fc97fff'}

In [10]:
# Subgraph for ease of visualization
sub_nt = guid_nt.subgraph(list(guid_nt.nodes)[:20])

nt = net.Network('750px', '750px', notebook=True)
# nt.barnes_hut()
nt.from_nx(sub_nt)

nt.show('guids.html')

guids.html


# Example with imported networkx object

In [12]:
nx_file = 'oi_testing/oi_outs/overall_pipeline/promoter_TFs_276_randomization_results_02152023_W_10.00_B_10.00_G_5.00_filtered_5000_res1.0/lv_2.graphml'
test_nx = nx.read_graphml(nx_file)

In [13]:
# what attributes do we have?

set([k for n in test_nx.nodes for k in test_nx.nodes[n].keys()])

{'Direction',
 'betweenness',
 'degree',
 'direction',
 'gene_type',
 'general_function',
 'general_process',
 'location',
 'log2FC',
 'louvain_clusters',
 'pathway',
 'prize',
 'robustness',
 'source',
 'specific_function',
 'specific_process',
 'specificity',
 'terminal',
 'type'}

In [14]:
# Set node colors 
add_node_colors(test_nx, col_attribute='source')

# Set some additional visual attributes
# Like gene type (whether it's known ALS gene or not) for shape
shape_map = {'ALS_gene': 'square',
             'non_ALS_gene': 'dot'}

node_shapes = {n: shape_map[val] for (n,val) in nx.get_node_attributes(test_nx, 'gene_type').items()}
nx.set_node_attributes(test_nx, node_shapes, 'shape')

# Set all edges to grey
nx.set_edge_attributes(test_nx, values = 'grey', name='color')

In [15]:
test_nx.nodes['SYNCRIP']

{'prize': 0.0,
 'type': 'protein',
 'direction': nan,
 'terminal': False,
 'degree': 11,
 'betweenness': 0.04831280956838569,
 'louvain_clusters': '2',
 'location': 'nucleus',
 'general_process': 'cellular process',
 'specific_process': 'cellular response to stimulus',
 'general_function': 'binding',
 'specific_function': 'protein binding',
 'robustness': 0.63,
 'specificity': 0.05,
 'gene_type': 'non_ALS_gene',
 'source': 'Steiner Node',
 'Direction': nan,
 'log2FC': nan,
 'pathway': 'NA',
 'color': '#386cb0ff',
 'shape': 'dot'}

In [16]:
# Add some labels for mouseover/clickover

clusters = nx.get_node_attributes(test_nx, 'louvain_clusters')
locations = nx.get_node_attributes(test_nx, 'location')
robusts = nx.get_node_attributes(test_nx, 'robustness')

node_titles = {n:f"cluster:{v}\nlocation:{locations[n]}\nrobustness:{robusts[n]}" for (n,v) in clusters.items()}
nx.set_node_attributes(test_nx, node_titles, 'title')

In [17]:
# Plot the interactive network
nt = net.Network('750px', '750px', notebook=True)
# nt.barnes_hut()
nt.from_nx(test_nx)

# Can set some additional features this way

# nt.set_options("""
# var options = {
#   "nodes": {
#     "font": {
#       "size": 25
#     },
#     "color": {
#         "border": "red"
            
#     },
#     "opacity": 0.1
#   }
# }
# """) 

nt.show('nx.html')

nx.html
