### Visualization for Iva's Sub PPI 

In [1]:
####################
# L I B R A R I E S 
####################

import ast 

from Bio import Entrez

import collections
from collections import defaultdict as dd
from collections import Counter as ct
from collections import OrderedDict
import colorsys
from colormath.color_objects import sRGBColor, LabColor

import itertools as it

import math
import matplotlib.pyplot as plt

import numpy.linalg as la
import networkx as nx
from networkx.algorithms.flow import shortest_augmenting_path
from networkx.generators.degree_seq import expected_degree_graph
from networkx.algorithms.community import greedy_modularity_communities
import numpy as np
from numpy import pi, cos, sin, arccos, arange
import numpy.linalg as la

import os
import os.path

import pandas as pd
import pickle
import plotly
import plotly.graph_objs as pgo
import plotly.offline as py
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
import plotly.io as pio
import pylab
import pymysql as mysql

import random as rd

from scipy.spatial import distance
from scipy.cluster.hierarchy import fcluster
import scipy.stats as st
from scipy import stats
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as dist
from scipy.interpolate import interpn
from scipy.stats import gaussian_kde
import seaborn as sns
import sklearn
from sklearn import preprocessing
from sklearn.manifold import TSNE
from sklearn import datasets
from sklearn.preprocessing import normalize
from sklearn import (manifold, datasets, decomposition, ensemble,discriminant_analysis, random_projection,cluster)
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.cluster import SpectralClustering
from sklearn.metrics import pairwise_distances
from sklearn.linear_model import LinearRegression
import sys 

import time

import umap 

import warnings

In [143]:
####################
# F U N C T I O N S 
####################

# the RWR function by Felix: 
def rnd_walk_matrix2(A, r, a, num_nodes):
    '''
    Random Walk Operator with restart probability.
    Return Matrix.
    ''' 
    num = 1*num_nodes
    n = num_nodes
    factor = float((1-a)/n) # = 0 if alpha = 1.0 

    E = np.multiply(factor,np.ones([n,n]))              # prepare 2nd scaling term
    A_tele = np.multiply(a,A) + E  
    M = normalize(A_tele, norm='l1', axis=0)                                 # column wise normalized Markov matrix

    # mixture of Markov chains
    del A_tele
    del E

    U = np.identity(n,dtype=int) 
    H = (1-r)*M
    H1 = np.subtract(U,H)
    del U
    del M
    del H    

    W = r*np.linalg.inv(H1)   

    return W


def color_nodes(l_genes, color):
    ''' 
    Set color for specific nodes.

    Input: 
    - l_genes: list of genes to be colored
    - color: string or hex of color e.g. 'red' or '#ff0000'
    
    Return: 
    A dict with nodeID as keys and colors as values.
    '''
    d_col = {}
    for node in l_genes:
        d_col[str(node)] = color
    
    return d_col


def embed_umap_sphere(Matrix, n_neighbors, spread, min_dist, metric='cosine'):
    ''' 
    Generate spherical embedding of nodes in matrix input using UMAP.
    Input: 
    - Matrix = Feature Matrix with either all or specific  nodes (rows) and features (columns) or symmetric (nodes = rows and columns)
    - n_neighbors/spread/min_dist = floats; UMAP parameters.
    - metric = string; e.g. havervine, euclidean, cosine ,.. 
    
    Return sphere embedding. 
    '''
    
    model = umap.UMAP(
        n_neighbors = n_neighbors, 
        spread = spread,
        min_dist = min_dist,
        metric = metric)

    sphere_mapper = model.fit(Matrix)

    return sphere_mapper



def get_posG_sphere(l_genes, sphere_mapper):
    '''
    Generate coordinates from embedding. 
    Input:
    - l_genes = list of genes
    - sphere_mapper = embedding from UMAP spherical embedding 
    
    Return dictionary with nodes as keys and coordinates as values in 3D. 
    '''
    
    x = np.sin(sphere_mapper.embedding_[:, 0]) * np.cos(sphere_mapper.embedding_[:, 1])
    y = np.sin(sphere_mapper.embedding_[:, 0]) * np.sin(sphere_mapper.embedding_[:, 1])
    z = np.cos(sphere_mapper.embedding_[:, 0])
    
    posG = {}
    cc = 0
    for entz in l_genes:
        posG[entz] = (x[cc],y[cc], z[cc])
        cc += 1
    
    return posG


def get_posG_sphere_norm(G, l_genes, sphere_mapper, d_param, radius_rest_genes = 20):
    '''
    Generate coordinates from embedding. 
    Input:
    - G = Graph 
    - DM = matrix 
    - sphere_mapper = embedding from UMAP spherical embedding 
    - d_param = dictionary with nodes as keys and assigned radius as values 
    - radius_rest_genes = int; radius in case of genes e.g. not function associated if genes not all G.nodes()
    
    Return dictionary with nodes as keys and coordinates as values in 3D. 
    '''
    
    x = np.sin(sphere_mapper.embedding_[:, 0]) * np.cos(sphere_mapper.embedding_[:, 1])
    y = np.sin(sphere_mapper.embedding_[:, 0]) * np.sin(sphere_mapper.embedding_[:, 1])
    z = np.cos(sphere_mapper.embedding_[:, 0])
    
    genes = []
    for i in l_genes:
        if str(i) in G.nodes():
            genes.append(str(i))

    genes_rest = [] 
    for g in G.nodes():
        if str(g) not in genes:
            genes_rest.append(g)
            
    posG_3Dsphere = {}
    cc = 0
    for entz in genes:
        posG_3Dsphere[entz] = (x[cc],y[cc], z[cc])
        cc += 1

    posG_3Dsphere_radius = {}
    for node,rad in d_param.items():
        for k,v in posG_3Dsphere.items():
            if k == node:
                posG_3Dsphere_radius[k] = (v[0]*rad, v[1]*rad, v[2]*rad)
 
    # generate spherical coordinates for rest genes (without e.g. GO term or Disease Annotation)
    indices = arange(0, len(genes_rest))
    phi = arccos(1 - 2*indices/len(genes_rest))
    theta = pi * (1 + 5**0.5) * indices

    r_rest = radius_rest_genes # radius for rest genes (e.g. if functional layout)
    x, y, z = r_rest*cos(theta) * sin(phi), r_rest*sin(theta) * sin(phi), r_rest*cos(phi)

    rest_points = []
    for i,j,k in zip(x,y,z):
        rest_points.append((i,j,k))

    posG_rest = dict(zip(genes_rest, rest_points))

    posG_all = {**posG_3Dsphere_radius, **posG_rest}
    posG_complete_sphere = {key:posG_all[key] for key in G.nodes()}

    # normalize coordinates 
    x_list = []
    y_list = []
    z_list = []
    for k,v in posG_complete_sphere.items():
        x_list.append(v[0])
        y_list.append(v[1])
        z_list.append(v[2])

    xx_norm = sklearn.preprocessing.minmax_scale(x_list, feature_range=(0, 1), axis=0, copy=True)
    yy_norm = sklearn.preprocessing.minmax_scale(y_list, feature_range=(0, 1), axis=0, copy=True)
    zz_norm = sklearn.preprocessing.minmax_scale(z_list, feature_range=(0, 1), axis=0, copy=True)

    posG_complete_sphere_norm = dict(zip(list(G.nodes()), zip(xx_norm,yy_norm,zz_norm)))
    
    return posG_complete_sphere_norm




def get_trace_nodes_3D(posG, info_list, color_list, size, opac=0.9):
    '''
    Get trace of nodes for plotting in 3D. 
    Input: 
    - posG = dictionary with nodes as keys and coordinates as values.
    - info_list = hover information for each node, e.g. a list sorted according to the initial graph/posG keys
    - color_list = list of colours obtained from any color function (see above sections).
    - opac = transparency of edges e.g. 0.2
    
    Return a trace for plotly graph objects plot. 
    '''
    
    key_list=list(posG.keys())
    trace = pgo.Scatter3d(x=[posG[key_list[i]][0] for i in range(len(key_list))],
                           y=[posG[key_list[i]][1] for i in range(len(key_list))],
                           z=[posG[key_list[i]][2] for i in range(len(key_list))],
                           mode = 'markers',
                           text = info_list,
                           hoverinfo = 'text',
                           #textposition='middle center',
                           marker = dict(
                color = color_list,
                size = size,
                symbol = 'circle',
                line = dict(width = 1.0,
                        color = color_list),
                opacity = opac,
            ),
        )
    
    return trace


def get_trace_edges_3D(G, posG, color_list, opac = 0.2, linewidth=0.2):
    '''
    Get trace of edges for plotting in 3D. 
    Input: 
    - G = Graph
    - posG = dictionary with nodes as keys and coordinates as values.
    - color_list = list of colours obtained from any color function (see above sections).
    - opac = transparency of edges e.g. 0.2
    
    Return a trace for plotly graph objects plot. 
    '''
    
    edge_x = []
    edge_y = []
    edge_z = []
    for edge in G.edges():
            x0, y0, z0 = posG[edge[0]]
            x1, y1, z1 = posG[edge[1]]
            edge_x.append(x0)
            edge_x.append(x1)
            edge_x.append(None)
            edge_y.append(y0)
            edge_y.append(y1)
            edge_y.append(None)
            edge_z.append(z0)
            edge_z.append(z1)
            edge_z.append(None)


    trace_edges = pgo.Scatter3d(
                            x = edge_x, 
                            y = edge_y, 
                            z = edge_z,
                            mode = 'lines', hoverinfo='none',
                            line = dict(width = linewidth, color = color_list),
                            opacity = opac
                    )
    return trace_edges



def plot_3D(data, fname, scheme, annotat=None):
    '''
    Create a 3D plot from traces using plotly.
    Input: 
    - data = list of traces
    - filename = string
    - scheme = 'light' or 'dark'
    - annotations = None or plotly annotations
    
    Return plot in 3D and file, saved as html.
    '''

    fig = pgo.Figure()
    
    for i in data:
        fig.add_trace(i)

    if scheme == 'dark' and annotat==None:
        fig.update_layout(template='plotly_dark', showlegend=False, autosize = True,
                          scene=dict(
                              xaxis_title='',
                              yaxis_title='',
                              zaxis_title='',
                              xaxis=dict(nticks=0,tickfont=dict(
                                    color='black')),
                              yaxis=dict(nticks=0,tickfont=dict(
                                    color='black')),
                              zaxis=dict(nticks=0,tickfont=dict(
                                    color='black')),
                            dragmode="turntable"
                        ))
        
    elif scheme == 'dark':    
        fig.update_layout(template='plotly_dark', showlegend=False, autosize = True,
                                  scene=dict(
                                      xaxis_title='',
                                      yaxis_title='',
                                      zaxis_title='',
                                      xaxis=dict(nticks=0,tickfont=dict(
                                            color='black')),
                                      yaxis=dict(nticks=0,tickfont=dict(
                                            color='black')),
                                      zaxis=dict(nticks=0,tickfont=dict(
                                            color='black')),
                                    dragmode="turntable",
                                    annotations=annotat,
                                ))

    elif scheme == 'light' and annotat==None:
        fig.update_layout(template='plotly_white', showlegend=False, width=1200, height=1200,
                          scene=dict(
                              xaxis_title='',
                              yaxis_title='',
                              zaxis_title='',
                              xaxis=dict(nticks=0,tickfont=dict(
                                    color='white')),
                              yaxis=dict(nticks=0,tickfont=dict(
                                    color='white')),
                              zaxis=dict(nticks=0,tickfont=dict(
                                    color='white')),    
                            dragmode="turntable",
                        ))    
        
    elif scheme == 'light':
         fig.update_layout(template='plotly_white', showlegend=False, width=1200, height=1200,
                          scene=dict(
                              xaxis_title='',
                              yaxis_title='',
                              zaxis_title='',
                              xaxis=dict(nticks=0,tickfont=dict(
                                    color='white')),
                              yaxis=dict(nticks=0,tickfont=dict(
                                    color='white')),
                              zaxis=dict(nticks=0,tickfont=dict(
                                    color='white')),    
                            dragmode="turntable",
                            annotations = annotat
                        ))    


    return plotly.offline.plot(fig, filename = fname+'.html', auto_open=True)



def color_edges_from_genelist(G, l_genes, color):
    '''
    Color (highlight) edges from specific node list.
    Input: 
    - G = Graph 
    - l_nodes = list of nodes 
    - color = string; color to hightlight
    
    Return edge list for selected edges. 
    '''
    
    edge_lst = [(u,v) for u,v in G.edges(l_genes) if u in l_genes or v in l_genes]

    d_col_edges = {}
    for e in edge_lst:
        d_col_edges[e]=color

    return d_col_edges


def get_trace_edges_from_genelist3D(l_spec_edges, posG, col, opac=0.2):
    '''
    Get trace of edges for plotting in 3D only for specific edges. 
    Input: 
    - G = Graph
    - posG = dictionary with nodes as keys and coordinates as values.
    - color = string; specific color to highlight specific edges 
    
    Return a trace of specific edges. 
    '''
    
    edge_x = []
    edge_y = []
    edge_z = []
    for edge in l_spec_edges:
            x0, y0,z0 = posG[edge[0]]
            x1, y1,z1 = posG[edge[1]]
            edge_x.append(x0)
            edge_x.append(x1)
            edge_x.append(None)
            edge_y.append(y0)
            edge_y.append(y1)
            edge_y.append(None)
            edge_z.append(z0)
            edge_z.append(z1)
            edge_z.append(None)
            
    trace_edges = pgo.Scatter3d(
                        x = edge_x, 
                        y = edge_y, 
                        z = edge_z,
                        mode = 'lines', hoverinfo='none',
                        line = dict(width = 1.0, color = [col]*len(edge_x)),
                        opacity = opac
                )
    return trace_edges




def export_to_csv3D(layout_namespace, posG, colours):
    '''
    Generate csv for upload to VRnetzer plaform for 3D layouts. 
    Return dataframe with ID,X,Y,Z,R,G,B,A,layout_namespace.
    '''
    
    colours_hex2rgb = []
    for j in colours: 
        k = hex_to_rgb(j)
        colours_hex2rgb.append(k)
        
    colours_r = []
    colours_g = []
    colours_b = []
    colours_a = []
    for i in colours_hex2rgb:
        colours_r.append(int(i[0]))#*255)) # colour values should be integers within 0-255
        colours_g.append(int(i[1]))#*255))
        colours_b.append(int(i[2]))#*255))
        colours_a.append(100) # 0-100 shows normal colours in VR, 128-200 is glowing mode
        
    df_3D = pd.DataFrame(posG).T
    df_3D.columns=['X','Y','Z']
    df_3D['R'] = colours_r
    df_3D['G'] = colours_g
    df_3D['B'] = colours_b
    df_3D['A'] = colours_a

    df_3D[layout_namespace] = layout_namespace
    df_3D['ID'] = list(posG.keys())

    cols = df_3D.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df_3D_final = df_3D[cols]
    
    return df_3D_final.to_csv(r''+layout_namespace+'.csv',index=False, header=False)


def hex_to_rgb(hx):
    hx = hx.lstrip('#')
    hlen = len(hx)
    return tuple(int(hx[i:i+hlen//3], 16) for i in range(0, hlen, hlen//3))




### Define Graph 

In [3]:
G = nx.read_edgelist("Iva_input/nf_edges.txt", delimiter=',')

print('Number of nodes i.e. genes: %s' %len(list(G.nodes())))
print('Number of edges: %s'%len(list(G.edges())))
print('Network density: %.1f%%' %(200.*len(list(G.edges()))/(len(list(G.nodes()))*len(list(G.nodes()))-1)))

Number of nodes i.e. genes: 565
Number of edges: 10666
Network density: 6.7%


### Random Walk based Layout

In [4]:
A = nx.adjacency_matrix(G)
DM_adj = A.toarray()

r = 0.8 # restart probability
alpha = 1.0 # indicating "randomness" 

DM_m = rnd_walk_matrix2(A, r, alpha, len(G.nodes()))

DM = pd.DataFrame(DM_m).T

# set gene list (= G.nodes())
genes = []
for i in DM.index:
    if str(i) in G.nodes():
        genes.append(str(i))
    
genes_rest = [] 
for g in G.nodes():
    if g not in genes:
        genes_rest.append(g)

### GENERAL NODE + EDGE PARAMETERS

In [165]:
# Node, Edge colors
edge_width = 0.1

edge_colorlight = '#d3d3d3' # 'lightgrey'
edge_colordark = '#696969' 

opacity_edges = 0.1 
opacity_nodes = 0.7

size3d = 5.0

l_features = list(G.nodes())

### NODE COLOR SETTINGS according to specific gene lists

In [148]:
nf_goi = str(int(np.loadtxt('Iva_input/nf_gene_of_interest.txt', unpack=False)))
nf_goi

'4763'

In [149]:
nf_path_ = np.loadtxt('Iva_input/nf_pathway_genes.txt', unpack=False)
nf_pathw = []
for i in nf_path_:
    nf_pathw.append(str(int(i)))
    
nf_pathw

['3845', '22882', '4893', '3265', '5609', '5594']

In [152]:
len(nf_pathw)

6

In [150]:
df_nf_neigh = pd.read_csv('Iva_input/nf_neighbors.txt', header = None,index_col=False)
nf_neigh = [str(i) for i in list(df_nf_neigh[0])]
nf_neigh[:5]

['55800', '9462', '283208', '2100', '10482']

In [153]:
len(nf_neigh)

125

In [175]:
# set node-color-choice based on gene lists 

color_method = 'nf'

col_goi = '#ffa500' # orange
col_pathw = '#ff0000' # red
col_neigh = '#8b0000' # darkred
col_rest = '#696969' # dark grey

nf_colors = color_nodes([nf_goi], col_goi)
path_colors = color_nodes(nf_pathw, col_pathw)
neigh_colors = color_nodes(nf_neigh, col_neigh)
all_colors = {**neigh_colors,**nf_colors, **path_colors}

rest_colors = {}
for i in G.nodes():
    if i not in all_colors.keys():
        rest_colors[i] = col_rest

all_nodes_colors_unsort = {** all_colors, **rest_colors}
all_nodes_colors = {key:all_nodes_colors_unsort[key] for key in G.nodes()}

colorlist = list(all_nodes_colors.values())

d_edge_col = color_edges_from_genelist(G, [nf_goi], col_goi)

In [176]:
# three nodes are both in nf_pathw and nf_neigh
# (this is not of an issue in the visualization, due some quick fix in the order of color assignment, but might be of interest to you ?)

for i in nf_pathw:
    for k in nf_neigh:
        if i==k:
            print(i)

3845
4893
3265


____________
# 4 | LAYOUTS
____________

----
### Define r-parameter
----

In [167]:
# set radius size 
xs_rad = 1
s_rad = 5
m_rad = 20
l_rad = 50

d_param = {}
for i in G.nodes():
    if str(i) in nf_goi:
        d_param[str(i)] = xs_rad
    elif i in nf_pathw:
        d_param[str(i)] = s_rad
    elif i in nf_neigh:
        d_param[str(i)] = m_rad
    else:
        d_param[str(i)] = l_rad 
    
# check how many shells of spheres there will be :
len(set(d_param.values()))

4

______

## UMAP

In [168]:
%%time 

n_neighbors = 20 # balance between local and global structure in the data
spread = 2.0
min_dist = 0.1 # defines how dense points are stacked together 
metric='cosine'

umap_sphere = embed_umap_sphere(DM, n_neighbors, spread, min_dist, metric)

CPU times: user 4.73 s, sys: 130 ms, total: 4.86 s
Wall time: 2.26 s


In [169]:
genes_rad = list(G.nodes())

posG_sphere = get_posG_sphere(genes, umap_sphere)
posG_complete_sphere_norm = get_posG_sphere_norm(G, genes_rad, umap_sphere, d_param, 40)

# PRINT Sphere

In [177]:
print('Color method: ', color_method)

print('# Neighbours:', n_neighbors)
print('Spread:',spread)
print('Min. Distance:', min_dist)

umap_nodes = get_trace_nodes_3D(posG_complete_sphere_norm, l_features, colorlist, 
                                size3d,
                                opacity_nodes)

umap_edges = get_trace_edges_3D(G, posG_complete_sphere_norm, edge_colorlight, 0.15)
umap_edges_spec = get_trace_edges_from_genelist3D(list(d_edge_col.keys()),posG_complete_sphere_norm, col_goi, 0.4)

umap_data_edges = [umap_edges_spec, umap_nodes]

scheme = 'dark'

data_edges = umap_data_edges
fname = 'Iva_test'
plot_3D(data_edges, fname, scheme)

Color method:  nf
# Neighbours: 20
Spread: 2.0
Min. Distance: 0.1


'Iva_test.html'

#### UMAP SPHERE EXPORT TO CSV

In [144]:
namespace='iva_nf'
export_to_csv3D(namespace, posG_complete_sphere_norm, colorlist)