## Requirements

* python-igraph
* set directory in the next cell

In [None]:
datadir = '../Datasets/'

In [None]:
import igraph as ig
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm


In [None]:
## define colors and node sizes here
cls_edges = 'gainsboro'
cls = ['silver','dimgray','black']
sz = [6,9,12]

In [None]:
## To produce LaTeX from a DataFrame
#df = df.round(decimals=3)
#print(df.to_latex(index=False))
#print(df.to_latex(index=True))

## US Airport Volume of Passengers

The nodes are represented by the 3-letter airport codes; we also read in the volume of passengers that we use as edge weights. The edges are directed.

We also read node attributes: lat/lon, state, city.

In [None]:
## read edges and build weighted directed graph
D = pd.read_csv(datadir+'Airports/connections.csv')
g = ig.Graph.TupleList([tuple(x) for x in D.values], directed=True, edge_attrs=['weight'])
df = D.head()

In [None]:
## read vertex attributes and add to graph
A = pd.read_csv(datadir+'Airports/airports_loc.csv')
lookup = {k:v for v,k in enumerate(A['airport'])}
l = [lookup[x] for x in g.vs()['name']]
g.vs()['layout'] = [(A['lon'][i],A['lat'][i]) for i in l]
g.vs()['state'] = [A['state'][i] for i in l]
g.vs()['city'] = [A['city'][i] for i in l]
## add a few more attributes for visualization
g.vs()['size'] = sz[1]
g.vs()['color'] = cls[1]
g.es()['color'] = cls_edges
g.es()['arrow_size'] = 0.33
df = A.head()
print(g.vcount(),'nodes and',g.ecount(),'directed edges')

In [None]:
## graph is weakly connected except for 2 airports
cl = g.clusters(mode='WEAK').membership
for i in range(g.vcount()):
    if cl[i]>0:
        print(i, g.vs[i])
    

In [None]:
max(g.coreness())

In [None]:
max(g.degree())

## California subgraph -- we look at several centrality measures 

In [None]:
## Build smaller subgraph for California (you can try other states)
hva = [v for v in g.vs() if v['state'] == 'CA']
G = g.subgraph(hva)

## drop isolated vertices (i.e. without in-state connections)
G = G.subgraph([v for v in G.vs() if v.degree()>0])

## remove loops
G = G.simplify(multiple=False)
print(G.vcount(),'nodes and',G.ecount(),'directed edges')


In [None]:
## again the graph is weakly connected except for 2 airports
cl = G.clusters(mode='WEAK').membership
for i in range(G.vcount()):
    if cl[i]>0:
        print(G.vs[i])


In [None]:
## plot using lat/lon as layout
ly = ig.Layout(G.vs['layout'])
ly.mirror(1)
ig.plot(G, bbox=(0,0,300,300), layout=ly)

In [None]:
## compute normalized weights 
mw = np.max(G.es['weight'])
G.es()['normalized_weight'] = [w/mw for w in G.es()['weight']]

In [None]:
## directed degree centrality
def degree_centrality(g, weights=None):
    n = g.vcount()
    if g.is_directed():
        dc = [sum(x)/(2*(n-1)) for x in zip(G.strength(mode='in',weights=weights),\
              G.strength(mode='out',weights=weights))]
    else:
        dc = [x/(n-1) for x in G.strength(weights=weights)]
    return dc

## Warning for disconnected graphs

In the code below, we get a warning when running closeness centrality, since the graph is not connected.
Here are the details from the help file:

''If the graph is not connected, and there is no path between two
vertices, the number of vertices is used instead the length of
the geodesic. This is always longer than the longest possible
geodesic.''

In [None]:
## compute several centrality measures
C = pd.DataFrame({'airport':G.vs()['name'],\
                  'degree':degree_centrality(G,weights='normalized_weight'),\
                  'pagerank':G.pagerank(weights='weight'),'authority':G.authority_score(weights='weight'),\
                  'hub':G.hub_score(weights='weight'),'between':G.betweenness(),\
                  'closeness':G.closeness()})
## normalize betweenness
n = G.vcount()
C['between'] = [2*x/((n-1)*(n-2)) for x in C['between']]
## sort w.r.t. degree centrality, look at top airports
Cs = C.sort_values(by='degree', ascending=False)
df = Cs.head()
df

In [None]:
## Remark: this agres with intuition; note however that SAN has high values ... 
## except for betweenness, 
## short path therefore mainly go via LAX and SFO.

In [None]:
## bottom ones
df = Cs.tail()
df

In [None]:
## highlight top-3 airports w.r.t. pagerank
G.vs()['prk'] = C['pagerank']
for x in np.argsort(G.vs()['prk'])[-3:]:
    G.vs()[x]['color'] = cls[2]
    G.vs()[x]['size'] = sz[2]
#ig.plot(G,'California.eps',bbox=(0,0,300,300),layout=ly)
ig.plot(G,bbox=(0,0,300,300),layout=ly)


In [None]:
## rank-based correlation between measures
df = C.corr('kendall')
df

In [None]:
## plot nodes w.r.t. coreness
G.vs['color'] = cls[1]
G.vs['size'] = sz[1]
G.vs()['core'] = G.coreness()
Mc = np.max(G.vs()['core'])
mc = np.min(G.vs()['core'])
print('minimal coreness:',mc)
print('maximal coreness:',Mc)
for v in G.vs():
    if v['core'] == Mc:
        v['color'] = cls[2]
        v['size'] = sz[2]
    if v['core'] <= mc+1:
        v['color'] = cls[0]
        v['size'] = sz[0]
#ig.plot(G,"California_coreness.eps",bbox=(0,0,300,300),layout=ly)
ig.plot(G,bbox=(0,0,300,300),layout=ly)

In [None]:
## the above uses the geographical layout, so it is not clear what is going on
## let's use a force directed layout
## Different coreness is more clear
c = [1 if v['core']==13 else 2 if v['core']==2 else 0 for v in G.vs()]
ly = G.layout_kamada_kawai()
#ig.plot(G,"California_kamada.eps",bbox=(0,0,300,300),layout=ly)
ig.plot(G,bbox=(0,0,300,300),layout=ly)

In [None]:
## show closeness centralities, same layout
ix = np.round(G.closeness(),decimals=2)
G.vs['size'] = 3
#ig.plot(G,"California_closeness.eps",vertex_label=ix,layout=ly,bbox=(0,0,300,300))
ig.plot(G,vertex_label=ix,layout=ly,bbox=(0,0,300,300))

In [None]:
## now the 13-core is clearly appearing, and we aso observe a small connected component 
## that was buried in the previous viz.

## vertices in 13-core 
## note that there are less than 14 nodes, this is an interesting remark and
## it is because we consider both in and out-going edges by default for directed graph.
V = [v['name'] for v in G.vs() if v['core']==Mc]
print(V)

In [None]:
## comparing coreness with other centrality measures
C['coreness'] = G.vs['core']
df = C.groupby('coreness').median()
df

In [None]:
## group in 3 categories
G.vs()['Core'] = ['low' if v['core']<=2 else 'high' if v['core']==13 else 'mid' for v in G.vs()]
C['Coreness'] = G.vs['Core']
df = C.groupby('Coreness').mean()
df = df.drop('coreness',axis=1)
df

In [None]:
## grouped barplot
bh = np.array(df.loc[['high']])[0]
bm = np.array(df.loc[['mid']])[0]
bl = np.array(df.loc[['low']])[0]
barWidth = 0.25
# Set position of bar on X axis
r1 = np.arange(len(bh))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
 # Make the plot
plt.bar(r1, bh, color=cls[2], width=barWidth, edgecolor='white', label='high coreness')
plt.bar(r2, bm, color=cls[1], width=barWidth, edgecolor='white', label='mid coreness')
plt.bar(r3, bl, color=cls[0], width=barWidth, edgecolor='white', label='low coreness')
 
# Add xticks on the middle of the group bars
plt.xlabel('measure',fontsize=14)
plt.xticks([r + barWidth for r in range(len(bh))], df.columns, fontsize=10)
plt.ylabel('score',fontsize=14) 
# Create legend & Show graphic
plt.legend(fontsize=12);
#plt.savefig('California_core_vs_measures.eps',dpi=1200)

### delta-centrality example

In [None]:
## Delta-centrality with a simple pandemic spread model
def spread(g, alpha=0.1):
    n = g.vcount()
    I = np.diag(np.repeat(1,n))
    A = np.array(g.get_adjacency().data)
    One = np.ones((n,1))
    X = np.linalg.inv(I-alpha*np.transpose(A))
    Y = np.reshape(X.dot(One)/n,n)
    return np.sum(Y)
def spread_delta_centrality(g, alpha=0.1):
    dc = []
    spr = spread(g, alpha=alpha)
    for i in g.vs():
        G = g.copy()
        el = g.incident(i, mode='ALL')
        G.delete_edges(el)
        dc.append((spr-spread(G, alpha=alpha))/spr)
    return dc

In [None]:
G.vs['delta'] = spread_delta_centrality(G, alpha=.1)
DC = pd.DataFrame(np.transpose([G.vs['name'],G.vs['delta'],G.vs['color']]),columns=['airport','delta','color'])
DC.sort_values(by='delta',ascending=False, inplace=True)
DC.head()

In [None]:
heights = [float(x) for x in DC['delta']]
bars = DC['airport']
y_pos = range(len(bars))
plt.bar(y_pos, heights, color=DC['color'] )
# Rotation of the bars names
plt.ylabel('Delta Centrality',fontsize=12)
plt.xticks(y_pos, bars, rotation=90)
plt.yticks();
#plt.savefig('California_delta.eps',dpi=1200)

## Group centrality, centralization

Back to US graph, 
- which states have highest delta centralities w.r.t. efficiency?
- what about centralization for each state subgraph?

In [None]:
## group delta centrality
def efficiency(g):
    n = g.vcount()
    s = 0
    for i in range(n):
        v = g.get_shortest_paths(i)
        s += np.sum([1/(len(x)-1) for x in v if len(x) > 1])
    return s/(n*(n-1))

def efficiency_u(g):
    n = g.vcount()
    s = 0
    for i in range(n):
        v = g.get_shortest_paths(i)
        s += np.sum([1/len(x) for x in v[i+1:] if len(x)>0])
    return s/(n*(n-1))

import warnings
warnings.filterwarnings('ignore')

## group centrality -- by state
states = list(set(g.vs()['state']))
eff_us = efficiency(g)
dc = []
for s in states:
    v = [x for x in g.vs() if x['state']==s]
    G = g.copy()
    e = []
    for x in v:
        e.extend(g.incident(x, mode='ALL'))
    G.delete_edges(e)
    dc.append((eff_us-efficiency(G))/eff_us)

DC = pd.DataFrame({'state':states, 'delta_centrality':dc})
DC = DC.sort_values(by='delta_centrality', ascending=False)
df = DC.head(3)
df

In [None]:
## lowest
df = DC.tail(3)
df

In [None]:
## group centralization (using PageRank) -- by state
states = list(set(g.vs()['state']))
pr = []
st = []
for s in states:
    v = [x for x in g.vs() if x['state']==s]
    if len(v)>5: ## look at states with more than 5 airports only
        G = g.subgraph(v)
        G = G.simplify(multiple=False) ## drop self-loops
        p = G.pagerank(weights='weight')
        pr.append(np.max(p) - np.mean(p))
        st.append(s)

DC = pd.DataFrame({'State':st, 'Pagerank Centralization':pr})
DC = DC.sort_values(by='Pagerank Centralization', ascending=False)
df = DC.head(3)
df


In [None]:
v = [x for x in g.vs() if x['state']=='MI']
G = g.subgraph(v)
G = G.subgraph([v for v in G.vs() if v.degree()>0])
G = G.simplify(multiple=False)
#ig.plot(G, 'central_MI.eps', bbox=(0,0,300,300))
ig.plot(G,bbox=(0,0,300,300))


In [None]:
G.vs[0]

In [None]:
## one big hub city: Detroit
G.vs['deg'] = G.degree() # overall degree
for v in G.vs:
    print(v['city'],v['name'],'degree',v['deg'])

In [None]:
## lowest ones
df = DC.tail(3)
df

In [None]:
v = [x for x in g.vs() if x['state']=='ND']
G = g.subgraph(v)
G = G.subgraph([v for v in G.vs() if v.degree()>0])
G = G.simplify(multiple=False)
#ig.plot(G, 'central_ND.eps', bbox=(0,0,300,300))
ig.plot(G, bbox=(0,0,300,300))

In [None]:
## no big hub city
G.vs['city']

In [None]:
## what about California?
DC[DC['State']=='CA']