# Graph Analysis - II

## Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import networkx as nx
%matplotlib inline

## Centrality measures for the nodes 

In [None]:
Gk=nx.karate_club_graph()

https://networkx.github.io/documentation/latest/reference/algorithms.centrality.html
    
https://networkx.github.io/documentation/latest/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html

We start by computing the different centrality measures for our graph.

In [None]:
degree_c = nx.degree_centrality(Gk)
pagerank_c = nx.pagerank(Gk)
eigenvector_c = nx.eigenvector_centrality(Gk)
betweenness_c = nx.betweenness_centrality(Gk)
#print pagerank_c

In [None]:
n = len(Gk.nodes())
deg = np.zeros(n)
pr = np.zeros(n)
eig = np.zeros(n)
bw = np.zeros(n)
i=0
for node in Gk:
    deg[i] = degree_c[node]
    pr[i] = pagerank_c[node]
    eig[i] = eigenvector_c[node]
    bw[i] = betweenness_c[node]
    i+=1
    
measures = pd.DataFrame()
measures['nodes'] = Gk.nodes()
measures.set_index(['nodes'], inplace=True)
measures['eigenvector_c'] = pd.DataFrame.from_dict(eigenvector_c, orient='index')
measures['pagerank_c'] = pd.DataFrame.from_dict(pagerank_c, orient='index')
measures['degree_c'] = pd.DataFrame.from_dict(degree_c, orient='index')
measures['betweenness_c'] = pd.DataFrame.from_dict(betweenness_c, orient='index')

We can plot the correlation of the different centralities. Notice the strong positive correlation between the degree centrality and the pagerank centrality.

In [None]:
sns.corrplot(measures)

We can also do a scatterplot for all the different pairs of centralities measures and try to see if there are any strong trends.

In [None]:
with sns.axes_style('white'):
    sns.pairplot(measures)    

In [None]:
plt.scatter(deg,pr)
plt.show()

In [None]:
plt.scatter(deg,bw)
plt.show()

When plotting the graph, we can choose to represent the centrality of each node as its size. 

In [None]:
# plotting the graph 
scaler = MinMaxScaler((50,800))
eig_scaled = scaler.fit_transform(bw)
node_size = eig_scaled
nx.draw(Gk, node_size=node_size, node_color='#6699cc')

Let's see how the above apply to directed graphs.

In [None]:
print len(Gk.nodes()), len(Gk.edges())

In [None]:
G = nx.read_gml('celegansneural.gml')

In [None]:
print nx.is_strongly_connected(G)

If the graph is **not** strongly connected, we can keep its largest strongly connected component.

In [None]:
scc = nx.strongly_connected_component_subgraphs(G)
sizemax = 0
Gmax = G
for g in scc:
    if len(g.nodes())>sizemax:
        Gmax = g
        sizemax = len(Gmax.nodes())
print len(Gmax.nodes())

In [None]:
Gmax = nx.DiGraph(Gmax)
degree_c = nx.degree_centrality(Gmax)
pagerank_c = nx.pagerank(Gmax)
eigenvector_c = nx.eigenvector_centrality(Gmax)
betweenness_c = nx.betweenness_centrality(Gmax)

In [None]:
n = len(Gmax.nodes())
deg = np.zeros(n)
pr = np.zeros(n)
eig = np.zeros(n)
bw = np.zeros(n)
i=0
for node in Gmax:
    deg[i] = degree_c[node]
    pr[i] = pagerank_c[node]
    eig[i] = eigenvector_c[node]
    bw[i] = betweenness_c[node]
    i+=1
measures = pd.DataFrame()
measures['nodes'] = Gmax.nodes()
measures.set_index(['nodes'], inplace=True)
measures['eigenvector_c'] = pd.DataFrame.from_dict(eigenvector_c, orient='index')
measures['pagerank_c'] = pd.DataFrame.from_dict(pagerank_c, orient='index')
measures['degree_c'] = pd.DataFrame.from_dict(degree_c, orient='index')
measures['betweenness_c'] = pd.DataFrame.from_dict(betweenness_c, orient='index')

In [None]:
sns.corrplot(measures)

In [None]:
with sns.axes_style('white'):
    sns.pairplot(measures)    

In [None]:
# Code for setting the style of the notebook
from IPython.core.display import HTML
def css_styling():
    styles = open("../theme/custom.css", "r").read()
    return HTML(styles)
css_styling()