# Creating a Bokeh plot to visualise the economic recovery subgraph

Plot the networkx subgraph to explore the page view approach economic recovery subgraph (11677 nodes and 88111 edges). `seed0` and `seed1` nodes are defined to identify the interactions between them and adjacent nodes.

OUTPUT:
- A html file is saved within the working directory `Economic Recovery Neighbour Graph.html`

REQUIREMENTS:
- Run `step_one_identify_seed_pages.ipynb` to define `seed0` and `seed1` pages
- Run `step_two_extract_page_hits.sql` to extract page hits for sessions that visit at least one `seed0` or `seed1` page
- Run `step_three_extract_nodes_and_edges.sql` to extract nodes and edges
- Run `step_four_create_networkx_graph.ipynb` to create NetworkX graph of the economic recovery whole user journey

## Import statements

In [None]:
import networkx as nx
from bokeh.io import output_notebook, show, save
from bokeh.models import Circle, MultiLine
from bokeh.plotting import figure
from bokeh.plotting import from_networkx
from bokeh.models import NodesAndLinkedEdges

## Import graph and define seed pages

In [None]:
# get networkx graph
G = nx.read_gpickle("../../data/processed/functional_session_hit_directed_graph_er.gpickle")

# seed0 pages
seed0=['/topic/further-education-skills', 
       '/browse/working/finding-job']

# seed1 pages
seed1=['/browse/working/state-pension',
       '/browse/working/workplace-personal-pensions',
       '/browse/working/rights-trade-unions',
       '/contact-jobcentre-plus',
       '/jobseekers-allowance',
       '/browse/working/tax-minimum-wage',
       '/browse/working/time-off',
       '/looking-for-work-if-disabled',
       '/become-apprentice',
       '/volunteering',
       '/tell-employer-or-college-about-criminal-record', 
       '/browse/working/finding-job',
       '/employers-checks-job-applicants',
       '/browse/working/contract-working-hours',
       '/browse/working/redundancies-dismissals',
       '/browse/working/armed-forces',
       '/apply-apprenticeship',
       '/find-a-job',
       '/prove-right-to-work',
       '/job-offers-your-rights',
       '/report-problem-criminal-record-certificate',
       '/criminal-record-check-documents',
       '/criminal-record-checks-apply-role',
       '/moving-from-benefits-to-work',
       '/jobsearch-rights',
       '/find-internship',
       '/employment-rights-for-interns',
       '/topic/further-education-skills/apprenticeships',
       '/find-traineeship',
       '/work-reference',
       '/career-skills-and-training',
       '/access-to-work',
       '/request-copy-criminal-record',
       '/guidance/apply-for-communication-support-at-a-job-interview-if-you-have-a-disability-or-health-condition-access-to-work',
       '/topic/further-education-skills/vocational-qualifications',
       '/register-jobs-international-organisations',
       '/topic/defence-armed-forces/military-recruitment-training-operations',
       '/topic/further-education-skills/administration',
       '/topic/further-education-skills/learning-records-service',
       '/topic/work-careers/government-graduate-schemes',
       '/topic/work-careers/secondments-with-government']

## Add node properties related to colour

In [None]:
list(G.nodes)

In [None]:
wuj_colours = {
    'none': 'white',
    'seed0': 'red',
    'seed1': 'blue'
}

wuj_highlight_colours = {
    'none': 'green',
    'seed0': 'orange',
    'seed1': 'purple'
}

wuj_pages = {
    'seed0': set(seed0),
    'seed1': set(seed1)
}

for page in G.nodes:
    G.nodes[page]['wuj'] = 'none'
    G.nodes[page]['wuj_colour'] = wuj_colours['none']
    G.nodes[page]['title'] = page
    for key, value in wuj_pages.items():
        if page in value:
            G.nodes[page]['wuj'] = key
            G.nodes[page]['wuj_colour'] = wuj_colours[key]
            break

## Add colour to edges

In [166]:
# adding colour attributes to edges for the bokeh plot
for i,j,data in G.edges(data=True):
    data['wuj_edge_colour'] = 'black'
    data['edge_highlight_colour'] = '#027600'
    data['edge_alpha'] = 0.1

In [167]:
# seed0 graph
seed0_graph = G.subgraph([node[0] for node in G.nodes(data=True) if node[1] in seed0])

for i,j,data in seed0_graph.edges(data=True):
    data['wuj_edge_colour'] = 'red'
    data['edge_highlight_colour'] = 'orange'
    data['edge_alpha'] = 0.5

In [168]:
# seed1 graph
seed1_graph = G.subgraph([node[0] for node in G.nodes(data=True) if node[1] in seed1])

for i,j,data in seed1_graph.edges(data=True):
    data['wuj_edge_colour'] = 'blue'
    data['edge_highlight_colour'] = '#E60CEC'
    data['edge_alpha'] = 0.5

## Create neighbourhoods

In [169]:
seed0_neighbourhood = [list(nx.generators.ego_graph(G, seed0, radius=1).nodes) for seed0 in seed0]
seed0_neighbourhood = set.union(*map(set, seed0_neighbourhood))
seed0_neighbourhood_graph = G.subgraph(seed0_neighbourhood)

In [None]:
seed1_neighbourhood = [list(nx.generators.ego_graph(G, seed1, radius=1).nodes) for seed1 in seed1]
seed1_neighbourhood = set.union(*map(set, seed1_neighbourhood))
seed1_neighbourhood_graph = G.subgraph(seed1_neighbourhood)

## Plot the Bokeh graph

In [None]:
output_notebook()

# choose colours for node and edge highlighting
node_highlight_color = 'white'
edge_highlight_color = 'green'

# choose attributes from G network to size and colour by — setting manual size (e.g. 10) or color (e.g. 'skyblue') also allowed
color_by_this_attribute = 'wuj_colour'
edge_color_by_this_attribute = 'wuj_edge_colour'
edge_alpha_by_this_attribute = 'edge_alpha'
edge_highlight_color_by_this_attribute = 'edge_highlight_colour'

# title
title = 'Economic Recovery Neighbour Graph'

# establish which categories will appear when hovering over each node
HOVER_TOOLTIPS = [
       ('Seed page', '@wuj'),
       ('Page path', '@title')
]

# create a plot — set dimensions, toolbar, and title
plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset", active_scroll='wheel_zoom',title=title,
              plot_width=1400, plot_height=800)

# create a network graph object
network_graph = from_networkx(seed0_neighbourhood_graph, nx.kamada_kawai_layout, center=(0, 0))

# set node sizes and colours according to node degree (colour as category from attribute)
network_graph.node_renderer.glyph = Circle(fill_color=color_by_this_attribute)

# set node highlight colours
network_graph.node_renderer.hover_glyph = Circle(fill_color=node_highlight_color, line_width=2)
network_graph.node_renderer.selection_glyph = Circle(fill_color=node_highlight_color, line_width=2)

# set edge opacity and width
network_graph.edge_renderer.glyph = MultiLine(line_alpha=edge_alpha_by_this_attribute, line_width=1, line_color=edge_color_by_this_attribute)

# set edge highlight colours
network_graph.edge_renderer.selection_glyph = MultiLine(line_color=edge_highlight_color_by_this_attribute, line_width=1.5)
network_graph.edge_renderer.hover_glyph = MultiLine(line_color=edge_highlight_color_by_this_attribute, line_width=1.5)

# highlight nodes and edges
network_graph.selection_policy = NodesAndLinkedEdges()
network_graph.inspection_policy = NodesAndLinkedEdges()

plot.renderers.append(network_graph)

show(plot)
save(plot, filename=f"{title}.html")

## Nodes and their edges

To see if there are any patterns in relation to a node and it's edges, e.g. are there any clear segments for whole user journeys?

In [None]:
# create a dict of all nodes and it's edges
dict_of_nodes_and_edges = dict()
for node in G.nodes():
    dict_of_nodes_and_edges[node] = list(nx.neighbors(G, node))

# sort dict based on the length of its' values (i.e. top result is the largest number of neighbours)
sorted(dict_of_nodes_and_edges.items(), key= lambda x: len(x[1]), reverse=True)