In [1]:
# http://holoviews.org/user_guide/Network_Graphs.html
import numpy as np
import pandas as pd
import holoviews as hv
import networkx as nx

hv.extension('bokeh')

In [2]:
%opts Nodes Graph [width=800 height=800 xaxis=None yaxis=None]


In [6]:
%%opts Graph [color_index='circle']
%%opts Graph (node_size=10 edge_line_width=1)
colors = ['#000000']+hv.Cycle('Category20').values
edges_df = pd.read_csv('../data/processed_network/fb_edges.csv')
# they do the nodes in one step, using spcial hv.Nodes command
fb_nodes = pd.read_csv('../data/processed_network/fb_nodes.csv')

edges_df.head()

Unnamed: 0,start,end
0,236,186
1,236,84
2,236,62
3,236,142
4,236,252


In [7]:
fb_nodes.head()

Unnamed: 0,x,y,index,circle
0,0.346231,0.294644,1,circle15
1,0.754652,0.903146,2,circle10
2,0.248924,0.535731,3,circle15
3,0.286255,0.033878,4,
4,0.385894,0.26404,5,circle16


I'm not sure what the x and y mean, probably jsut random variables? Or do they provide position?

In [8]:
# we add this extra step here, to make it two steps, so we can view head above
fb_nodes = hv.Nodes(fb_nodes).sort()
fb_graph = hv.Graph((edges_df, fb_nodes), label='Facebook Circles')
fb_graph = fb_graph.redim.range(x=(-0.05, 1.05), y=(-0.05, 1.05)).options(cmap=colors)
fb_graph

## Can we repeat the above with out data?

In [17]:
# convert from csv to pandas df, can read compressed also if required
df_nodes_file = pd.read_csv('../data/processed_network/test_nodes.csv', names = ["page"])
df_edges_file = pd.read_csv('../data/processed_network/test_edges.csv', usecols=[0, 1, 2])
# fix messy names
df_edges_file.columns = df_edges_file.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')


In [18]:
nodes = df_nodes_file.reset_index().set_index('page').squeeze() 
# remove duplicates
nodes = nodes.loc[~nodes.index.duplicated(keep='first')]

edges = df_edges_file
# need indices
edges['source'] = df_edges_file.source_node.map(nodes)
edges['destination'] = df_edges_file.destination_node.map(nodes)

In [19]:
# remove duplicates then convert to integer
# create empty list to hold indices
index_to_drop = []
# append list with indices
for index, row in edges['destination'].iteritems():
    if pd.isnull(row):
        index_to_drop.append(index)


edges.drop(edges.index[[index_to_drop]], inplace=True)
# now we should be able to convert destination into int
edges['destination'].astype(int)

edges.head()

Unnamed: 0,source_node,destination_node,weight,source,destination
0,/tax-codes/updating-tax-code,/check-income-tax-current-year,50181,20741,2779.0
1,/check-income-tax-current-year,/check-income-tax-current-year/sign-in/prove-i...,2612646,2779,2781.0
2,/penalty-points-endorsements,/view-driving-licence,27756,17003,21575.0
3,/government/organisations/companies-house,/get-information-about-a-company,3787239753,7758,5896.0
4,/council-tax-bands,/council-tax,19527,3618,3615.0


In [20]:
# need to convert from pandas series to pandas dataframe, to pass to holoviews later
nodes = nodes.to_frame()
# rename to avoid confusion over index
nodes.columns = ["index"]
# page is currently the index, let's make a new column that contains the info for holoviews
nodes['label'] = nodes.index

nodes.head()

Unnamed: 0_level_0,index,label
page,Unnamed: 1_level_1,Unnamed: 2_level_1
/,0,/
/1619-bursary-fund,1,/1619-bursary-fund
/1619-bursary-fund/eligibility,2,/1619-bursary-fund/eligibility
/1619-bursary-fund/further-information,3,/1619-bursary-fund/further-information
/1619-bursary-fund/how-to-claim,4,/1619-bursary-fund/how-to-claim


Now let's drop the variables we don't need so our pandas look like those in the facebook example above.


In [22]:
nodes = nodes[['index', 'label']]
nodes.head()

Unnamed: 0_level_0,index,label
page,Unnamed: 1_level_1,Unnamed: 2_level_1
/,0,/
/1619-bursary-fund,1,/1619-bursary-fund
/1619-bursary-fund/eligibility,2,/1619-bursary-fund/eligibility
/1619-bursary-fund/further-information,3,/1619-bursary-fund/further-information
/1619-bursary-fund/how-to-claim,4,/1619-bursary-fund/how-to-claim


In [23]:
edges = edges[['source', 'destination', 'weight']]

edges.head()

Unnamed: 0,source,destination,weight
0,20741,2779.0,50181
1,2779,2781.0,2612646
2,17003,21575.0,27756
3,7758,5896.0,3787239753
4,3618,3615.0,19527


In [30]:
# create some mroe space for the plot
%opts Nodes Graph [width=1000 height=1000 xaxis=None yaxis=None]


In [32]:
# we ignore some of the facebook example specific quirks, by commenting it out
fb_nodes = hv.Nodes(fb_nodes).sort()
fb_graph = hv.Graph((edges_df, fb_nodes), label='GOV.UK user journeys')
#fb_graph = fb_graph.redim.range(x=(-0.05, 1.05), y=(-0.05, 1.05))
fb_graph

Bundling it, to make it less hairballey

In [33]:
from holoviews.operation.datashader import datashade, bundle_graph
bundled = bundle_graph(fb_graph)
bundled