In [None]:
import os

import igraph as ig
import matplotlib.pyplot as plt
import pandas as pd
from igraph import Graph, VertexSeq

In [None]:
# Read the name of the edgelist dataset file from an environment variable.
DIR_DATA_RAW = os.getenv("DIR_DATA_RAW")
print(DIR_DATA_RAW)

In [None]:
# Load the edgelist dataset into a pandas data frame
edges = pd.read_csv(DIR_DATA_RAW + "/structural_network_adjacency_list_20190301.csv")
edges.drop_duplicates(inplace=True)

In [None]:
# Filter for pages whose URLs contain the word 'childcare'
search_string = "childcare"
childcare_edges = edges[
    (edges.source_base_path.str.contains(search_string))
    | (edges.sink_base_path.str.contains(search_string))
]

In [None]:
# Construct a graph object from the edges
g = Graph.DataFrame(childcare_edges, directed=True)

In [None]:
# The graph has one big component, and many small ones
# that are disconnected from the big one.
# Keep only the largest component.
components = g.clusters(mode="weak")
childcare = components.giant()

In [None]:
# Detect communities within the graph.  The spinglass
# algorithm allows for a maximum number of communities
# to be set.  It might detect fewer than this, but it
# won't detect more.  Every node (every page) will be
# assigned to exactly one community.
communities = childcare.community_spinglass(spins=5)

In [None]:
# Calculate the degree of each node (each page).  The
# degree is the number of edges into and out of the
# node.
degrees = [v.degree(mode="out") for v in VertexSeq(childcare)]

## Visualise in igraph

* Colour: community
* Label: degree
* Tooltip: not supported

In [None]:
pal = ig.drawing.colors.ClusterColoringPalette(len(communities))
childcare.vs["color"] = pal.get_many(communities.membership)
ig.plot(childcare, vertex_label=degrees, layout="fruchterman_reingold")

### Problems with igraph

No tooltip to show which URL is represented by each node.

## Try bokeh + networkx

The bokey library does interactive visualisations (with tooltips).

But bokeh can't deal with igraphs, only networkx

And networkx doesn't implement spinglass

So the route is igraph > networkx > bokeh


In [None]:
import networkx as nx
from bokeh.io import output_notebook, save, show
from bokeh.models import Circle, ColumnDataSource, MultiLine, Range1d
from bokeh.plotting import figure, from_networkx
from bokeh.transform import factor_cmap

# Make bokeh work in jupyter
output_notebook()

In [None]:
# Choose a title
title = "GOV.UK childcare pages graph"

# Display the URL in the tooltip
HOVER_TOOLTIPS = [("Page", "@name")]

# Create a plot — set dimensions, toolbar, and title
plot = figure(
    tooltips=HOVER_TOOLTIPS,
    tools="pan,wheel_zoom,save,reset",
    active_scroll="wheel_zoom",
    x_range=Range1d(-10.1, 10.1),
    y_range=Range1d(-10.1, 10.1),
    title=title,
)

# Convert the igraph object to networkx
childcare_nx = childcare.to_networkx()

# Create a network graph object with spring layout, supposedly the same algorithm as igraph Fruchterman--Reingold
# https://networkx.github.io/documentation/networkx-1.9/reference/generated/networkx.drawing.layout.spring_layout.html
network_graph = from_networkx(childcare_nx, nx.spring_layout, scale=10, center=(0, 0))

# Set node size and color
network_graph.node_renderer.glyph = Circle(size=15, fill_color="skyblue")

# Set edge opacity and width
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

# Add network graph to the plot
plot.renderers.append(network_graph)
show(plot)

### Problems with bokeh + networkx

1. The networkx spring layout is worse than the igraph spring layout, even though they supposedly both use the Fruchterman--Reingold algorithm.
2. I don't know how to colour the bokeh visualisation by community.