In [1]:
import networkx as nx
import matplotlib.pyplot as plot
%matplotlib inline

#This data set was taken from http://www-personal.umich.edu/~mejn/netdata/ and is described as
#an undirected, unweighted network representing the topology of the Western States Power Grid of the United States. 
#Data compiled by D. Watts and S. Strogatz and made available on the web here
#(http://cdg.columbia.edu/cdg/datasets). Please cite D. J. Watts and S. H. Strogatz, Nature 393, 440-442 (1998).

g = nx.read_gml('./power/power.gml')
#I am only taking a small subset of the nodes because graphical representations of the largest netowrk 
#(diameter = 46) takes an extremely long time to generate.
nodelist = [i for i in range(0, 500)]
g = g.subgraph(nodelist)

In [2]:
import pandas as pd
from IPython.display import display
#calcualte diameter.  We must pick a subset that is entirely connected and check that diameter.  
#We will use the biggest network
bn = sorted(nx.connected_components(g), key=len, reverse=True)[0]
g = g.subgraph(bn)

#Diameter of largest network
print "Diameter = " + str(nx.diameter(g))
#Degrees of centrality for top ten nodes.  The first number in each pair is the node number and the second number
#is the normalized value which is obtained by dividing by the maximum possible degree in a simple graph n-1 
#where n is the number of nodes in G.
nodes = []
centrality = []
for i, j in sorted(nx.degree_centrality(g).iteritems(), key=lambda(k, v): (-v, k))[0:11]:
    nodes.append(i)
    centrality.append(j)
d = {"node":nodes, "centrality": centrality}
data = pd.DataFrame(d)
print display(data[["node","centrality"]])

Diameter = 25


Unnamed: 0,node,centrality
0,98,0.036
1,88,0.032
2,94,0.028
3,129,0.028
4,140,0.028
5,153,0.028
6,154,0.028
7,205,0.028
8,9,0.024
9,36,0.024


None


In [3]:
power_nodes = g.nodes()
power_edges = g.edges()

In [4]:
from graphlab import SGraph, Vertex, Edge, canvas
canvas.set_target('ipynb')
powergraph = SGraph()
nodelist = []
for i in power_nodes:
    nodelist.append(Vertex(i))
powergraph = powergraph.add_vertices(nodelist)
for i, j in power_edges:
    powergraph = powergraph.add_edges(Edge(i,j))
#Graph top ten centrality nodes
highlightlist = data['node'].tolist()
powergraph.show(vlabel='id', highlight=highlightlist)

This non-commercial license of GraphLab Create for academic use is assigned to john.grando@spsmail.cuny.edu and will expire on June 05, 2018.


[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1497627623.log


In [5]:
#Check closeness centrality
nodes = []
closeness = []
for i, j in sorted(nx.closeness_centrality(g).iteritems(), key=lambda(k, v): (-v, k))[0:11]:
    nodes.append(i)
    closeness.append(j)
d = {"node":nodes, "closeness": closeness}
data = pd.DataFrame(d)
print display(data[["node","closeness"]])

Unnamed: 0,node,closeness
0,207,0.173611
1,205,0.172891
2,129,0.169837
3,95,0.167336
4,171,0.165673
5,138,0.16469
6,47,0.164042
7,36,0.162443
8,98,0.161708
9,94,0.161186


None


In [6]:
#Graph top ten closeness nodes
highlightlist = data['node'].tolist()
powergraph.show(vlabel='id', highlight=highlightlist)

In [7]:
#check betweenness
nodes = []
betweenness = []
for i, j in sorted(nx.betweenness_centrality(g).iteritems(), key=lambda(k, v): (-v, k))[0:11]:
    nodes.append(i)
    betweenness.append(j)
d = {"node":nodes, "betweenness": betweenness}
data = pd.DataFrame(d)
print display(data[["node","betweenness"]])

Unnamed: 0,node,betweenness
0,129,0.280545
1,207,0.266609
2,205,0.247581
3,113,0.224129
4,112,0.21812
5,171,0.175574
6,36,0.170618
7,98,0.165183
8,47,0.164668
9,95,0.163307


None


In [8]:
#Graph top ten betweenness nodes
highlightlist = data['node'].tolist()
powergraph.show(vlabel='id', highlight=highlightlist)

In [9]:
#check pagerank
nodes = []
pagerank = []
for i, j in sorted(nx.pagerank(g).iteritems(), key=lambda(k, v): (-v, k))[0:11]:
    nodes.append(i)
    pagerank.append(j)
d = {"node":nodes, "pagerank": pagerank}
data = pd.DataFrame(d)
print display(data[["node","pagerank"]])

Unnamed: 0,node,pagerank
0,98,0.013698
1,88,0.011761
2,129,0.010895
3,154,0.009662
4,184,0.009648
5,153,0.009521
6,205,0.00933
7,94,0.009285
8,36,0.009138
9,140,0.009097


None


In [10]:
#Graph top ten pagerank nodes
highlightlist = data['node'].tolist()
powergraph.show(vlabel='id', highlight=highlightlist)

In [11]:
#check eigenvector - doesn't work on network.  May be multiple reasons.
#ev = nx.eigenvector_centrality(g)
#print sorted(ev.iteritems(), key=lambda(k, v): (-v, k))[0:9]