In [11]:
import os
import re
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import json
import math
import numpy as np
from bokeh.io import show, output_file
from bokeh.plotting import figure

from bokeh.io import show, output_notebook
from bokeh.plotting import figure, ColumnDataSource
import pandas as pd

from bokeh.models import HoverTool
from numpy import histogram, linspace

%matplotlib inline
%config IPCompleter.greedy=True

pd.set_option('display.max_colwidth', -1)

In [2]:
data_dir='/Users/genie/dev/projects/github/network_analysis_ticket_to_ride/data/'
destinations_path = os.path.join(data_dir,'destinations.txt')
routes_path = os.path.join(data_dir,'trainroutes.txt')
output_dir='/Users/genie/dev/projects/github/network_analysis_ticket_to_ride/output/'

In [3]:
pattern = re.compile('\\w+')

In [4]:
Gm = nx.MultiGraph()

In [5]:
# functions

# function to calculate points from segment length
def points(distance):
    x = [0, 1, 2, 4, 7, 0, 15, 0, 21]
    return x[distance]

def inverse_weight(distance):
    return(1/distance)

# distance of path p in graph g
def distance_of_path(g, p):
    d = sum([g.edges[p[e],p[e+1]]['distance'] for e in range(len(p)-1)])
    return d

# points of path p in graph g
def points_of_path(g, p):
    d = sum([g.edges[p[e],p[e+1]]['points'] for e in range(len(p)-1)])
    return d

# return all paths between u and v in graph g up to distance min(u,v)+2
def alternate_scoring_paths_with_cutoff(g, u, v, cutoff):
    sp_length = nx.shortest_path_length(g,u,v)
    paths = nx.all_simple_paths(g, u, v, cutoff=sp_length+2)
    return list(paths)

In [14]:
# construct graph

Gm.clear()

with open(routes_path) as f:
    for line in f:
        city1, city2, distance, route_type, color, is_multi = pattern.findall(line)
        distance = int(distance)
        if(Gm.has_edge(city1,city2)==False):
            Gm.add_edge(city1, city2, key=0, distance=distance, route_type=route_type, color=color, points=points(distance), 
                   weight=distance, importance=0)

G = nx.Graph(Gm)

In [15]:

#Gm.nodes['Kyiv']

## Structure

In [27]:
from bokeh.models import Range1d, Plot
from bokeh.models.graphs import from_networkx
from bokeh.models.graphs import NodesAndLinkedEdges
from bokeh.models.annotations import Title
from bokeh.models import Circle, HoverTool, MultiLine
from bokeh.plotting import figure, show, save
from bokeh.io import output_file, output_notebook
from bokeh.models import Div
from bokeh.layouts import gridplot, column

#output_notebook()
output_file(output_dir + 'node_network.html', mode='inline')

# normalize node size
degree_list = [G.degree[n] for n in G.nodes(data=False)]
start = 10
end = 25
min_ex = np.min(degree_list)
max_ex = np.max(degree_list)
degree_dict = {}
for n,v in sorted(Gm.degree, key=lambda x: x[1], reverse=True):
    norm_v = ((v-min_ex)/(max_ex-min_ex)) * (end-start) + start
    degree_dict[n] = norm_v

# Set node attributes
#nx.set_node_attributes(Gm, 'node_color', node_color)
nx.set_node_attributes(Gm, degree_dict, 'node_size')


# We could use figure here but don't want all the axes and titles
plot = Plot(x_range=Range1d(-2, 2), y_range=Range1d(-2 ,2), title=None) #title=Title(text="Network"))
#plot.title.text="Network"
plot.width = 900;

# Create a Bokeh graph from the NetworkX input using nx.spring_layout
graph = from_networkx(Gm, nx.spring_layout, scale=1.8, center=(0,0))
plot.renderers.append(graph)

# Blue circles for nodes, and light grey lines for edges
graph.node_renderer.glyph = Circle(size='node_size', fill_color='#2b83ba')
graph.edge_renderer.glyph = MultiLine(line_color="#cccccc", line_alpha=0.8, line_width=2)

# green hover for both nodes and edges
graph.node_renderer.hover_glyph = Circle(size=25, fill_color='#abdda4')
graph.edge_renderer.hover_glyph = MultiLine(line_color='#abdda4', line_width=4)

# When we hover over nodes, highlight adjecent edges too
graph.inspection_policy = NodesAndLinkedEdges()

plot.add_tools(HoverTool(tooltips="@index"))

# Add a title for the entire visualization using Div
plot_html = """<h3>Chart 1: Ticket-to-Ride Network Structure</h3>
Note: Nodes represent cities and edges represent tracks between cities, node size is proportional to the degree
<br>
<i>Hover over each node for more information</i>
"""
plot_subtitle = Div(width=800, text=html)

# Visualize
show(column(plot_subtitle, plot))

#show(plot)
#save(plot)

### Degree & Weights Distribution

In [42]:
from collections import Counter 
degree_count = Counter(['Degree-' + str(G.degree[n]) for n in G.nodes(data=False)]) 
weights_count = Counter(['Weight-' + str(d['weight']) for u,v,d in G.edges(data=True)])

from math import pi
import pandas as pd
from bokeh.plotting import figure, show, save
from bokeh.io import output_file, output_notebook
from bokeh.palettes import Category20c
from bokeh.transform import cumsum
from bokeh.layouts import row, column
from bokeh.models import Div

output_file(output_dir + "degree_weight_dist.html")
#output_notebook()

degrees = dict(degree_count)
p1_data = pd.Series(degrees).reset_index(name='value').rename(columns={'index':'degree'})
p1_data['angle'] = p1_data['value']/p1_data['value'].sum() * 2*pi
p1_data['pct_share'] = round((p1_data['value']/p1_data['value'].sum())*100,2)
p1_data['color'] = Category20c[len(degrees)]

p1 = figure(plot_height=350, plot_width=500, title="Degree Distribution", toolbar_location=None,
           tools="hover", tooltips="@degree: @value (@pct_share%)", x_range=(-0.5, 1.0))

p1.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='degree', source=p1_data)

p1.axis.axis_label=None
p1.axis.visible=False
p1.grid.grid_line_color = None
p1.legend.location = "bottom_right"

#show(p1)

weights = dict(weights_count)
p2_data = pd.Series(weights).reset_index(name='value').rename(columns={'index':'weight'})
p2_data['angle'] = p2_data['value']/p2_data['value'].sum() * 2*pi
p2_data['pct_share'] = round((p2_data['value']/p2_data['value'].sum())*100,2)
p2_data['color'] = Category20c[len(weights)]

p2 = figure(plot_height=350, plot_width=500, title="Weight Distribution", toolbar_location=None,
           tools="hover", tooltips="@weight: @value (@pct_share%)", x_range=(-0.5, 1.0))

p2.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='weight', source=p2_data)

p2.axis.axis_label=None
p2.axis.visible=False
p2.grid.grid_line_color = None
p2.legend.location = "bottom_right"

# Add a title for the entire visualization using Div
html = """<h3>Chart 2: Degree and Weight Distribution</h3>
<i>Hover over each pie segment for more information</i>
"""
subtitle = Div(width=800, text=html)
show(column(subtitle, row(p1, p2)))

#show(p2)
#show(row(p1, p2))


In [47]:
degree_df = pd.DataFrame([[n,v] for n,v in sorted(Gm.degree, key=lambda x: x[1], reverse=False)[:20]], columns=['city','degree'])
#max_degree_df.head()

from IPython.display import display, HTML
display(HTML(degree_df.to_html(index=False)))


city,degree
Edinburgh,1
Lisboa,2
Cadiz,2
Stockholm,2
Kobenhavn,2
Barcelona,3
Brest,3
London,3
Erzurum,3
Angora,3


### Destination Card Points Distribution

In [41]:
# construct destination card points list
card_points_list=list()
with open(destinations_path) as f:
    for line in f:
        source, destination, card_points = pattern.findall(line)
        card_points=int(card_points)
        card_points_list.append(card_points)

## bar chart

from collections import Counter 
#card_points = [row['card_points'] for idx,row in destinations_df.iterrows()]
col_count = Counter(card_points_list) 

from bokeh.plotting import figure, show, save
from bokeh.io import output_file, output_notebook
from bokeh.models import HoverTool
from bokeh.layouts import row, column
from bokeh.models import Div

output_notebook()
output_file(output_dir + "destination_card_points_dist.html")

labels = [str(key) for key,value in sorted(col_count.items())]
values = [value for key,value in sorted(col_count.items())]
source = ColumnDataSource(data = dict(labels = labels, values= values))
                          
p = figure(x_range=labels, plot_height=350, title=None)
p.vbar(x="labels", top="values", width=0.9, source=source)

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.add_tools(HoverTool(tooltips=[("card points","@labels"),("count","@values")]))                          

#show(p)

# Add a title for the entire visualization using Div
html = """<h3>Chart 3: Destination Card Points Distribution</h3>
<i>Hover over each bar for more information</i>
"""
subtitle = Div(width=800, text=html)
show(column(subtitle, p))


In [None]:
# construct shortest paths of destinations
temp=list()
with open(destinations_path) as f:
    for line in f:
        source, destination, card_points = pattern.findall(line)
        card_points=int(card_points)
        p1=list()
        points1=0
        distance1=0
        length1=0
        for p in nx.all_shortest_paths(G,source,destination,weight='weight'):
            _points=points_of_path(G,p)
            if(_points>points1):
                points1=_points
                distance1=distance_of_path(G,p)
                p1=p
                length1=len(p)
        total_points=card_points+points1
        temp.append((source,destination,card_points,length1,distance1,total_points,'-'.join(p1)))
        
destinations_df = pd.DataFrame.from_records(temp, columns=['source','destination','card_points',
                                                           'sp_length','sp_cost','sp_total_points','sp_path'])
del temp

In [None]:
#  calculate points-per-capita
destinations_df['sp_points_per_capita'] = destinations_df.apply(lambda row: round(row['sp_total_points']/row['sp_cost'],2), axis=1)

# connectivity
destinations_df['connectivity'] = destinations_df.apply(lambda row: nx.edge_connectivity(G,row['source'],row['destination']), axis=1)

In [None]:
destinations_df.sort_values(['sp_total_points'],ascending=False).head(10)

In [None]:
# compare alternate paths between given cities
temp = list()
u = 'Palermo'
v = 'Moskva'
cutoff = nx.shortest_path_length(G, u, v) + 2
paths = alternate_scoring_paths_with_cutoff(G, u, v, cutoff)
for p in paths:
    length1 = len(p)
    cost1 = distance_of_path(G, p)
    points1 = int(destinations_df[(destinations_df['source']==u) & (destinations_df['destination']==v)]['card_points'])
    points2 = points_of_path(G, p)
    total_points = points1 + points2
    per_capita=round(total_points/cost1,2)
    temp.append((p,length1,cost1,total_points,per_capita))

temp_df = pd.DataFrame.from_records(temp,columns=['path','path_length','path_cost','path_total_points','path_points_per_capita'])
del temp

temp_df.sort_values(['path_points_per_capita'],ascending=False).head(5)

In [None]:
#histogram of all points for all alternate paths

fig = plt.figure(figsize=(12,5))

plt.subplot(1, 2, 1)
plt.hist([temp_df['path_total_points']], label="Points")
plt.legend(loc='upper right')
plt.title('Palermo-Moskva Alternate paths \n Total Points distribution')

plt.subplot(1, 2, 2)
plt.hist([temp_df['path_cost']], label="Cost")
plt.legend(loc='upper right')
plt.title('Palermo-Moskva Alternate paths \n Cost for points distribution')

plt.tight_layout()

In [None]:
temp_df.sort_values(['path_total_points'],ascending=False).head(5)

In [None]:
temp_df.sort_values(['path_points_per_capita'],ascending=False).head(5)

In [None]:
# betweenness centrality

bc = nx.betweenness_centrality(G)
top_bc = dict(sorted(bc.items(), key=lambda x: x[1], reverse=True)[:5])
#print(json.dumps(top_bc, indent=4))

for k,v in top_bc.items():
    print('{} - {}'.format(k,v))

In [None]:
nx.diameter(G)

In [None]:
ecc = nx.eccentricity(G)
ecc = dict(sorted(ecc.items(), key=lambda x: x[1], reverse=True))
print(json.dumps(ecc, indent=4))

In [None]:
# convert multigraph to simple graph            
G = nx.Graph(Gm)

#node clustering co-efficient for each city
ccoef = nx.clustering(G)
print(json.dumps(ccoef, indent=4))

In [None]:
#histogram of degrees for all nodes
degree_list = [G.degree[n] for n in G.nodes(data=False)]

plt.hist(degree_list, label="Degrees")
plt.legend(loc='upper right')

In [None]:
#histogram of weights for all nodes
weights_list = [d['weight'] for u,v,d in G.edges(data=True)]

plt.hist(weights_list, label="Weights")
plt.legend(loc='upper right')

In [None]:
# histogram of destination cards points
card_points = [row['card_points'] for idx,row in destinations_df.iterrows()]

plt.hist(card_points, label="Destination Card Points")
plt.legend(loc='upper right')

In [None]:
# calc edge importance from min edge cut's
for idx,row in destinations_df.iterrows():
    for min_cut in list(nx.minimum_edge_cut(G,row['source'],row['destination'])):
        u, v = min_cut
        G.edges[u,v]['importance']+=1

In [None]:
for u, v, d in sorted(G.edges(data=True), key=lambda x: x[2]['importance'], reverse=True)[:10]:
    print('{u}-{v} {imp}'.format(u=u, v=v, imp=d['importance']))

In [None]:
# edge-betweenness centrality

ebc = nx.edge_betweenness_centrality(G,weight='weight')
top_ebc = dict(sorted(ebc.items(), key=lambda x: x[1], reverse=True)[:5])

for k,v in top_ebc.items():
    print('{} - {}'.format(k,v))