# Citation Network

This notebook looks at visualising the network created in `citation_scrape.ipynb`

## Install

In [None]:
!pip install pandas
!pip install pyvis

## Imports

In [None]:
import json
import pandas as pd
from pyvis.network import Network

## File Paths

_
_These will need to be updated to match their path on your local system. The relevant files can be found in the 'data' directory._

_Default file paths are listed below_

In [None]:
CITATION_NETWORK_FILE_PATH = '../data/citation_network.csv'
# This was populated as part of the 'citation_scrape' notebook

LABEL_COLOUR_DICT_FILE_PATH = '../data/label_colours.json'


GRAPH_FILE_PATH = '../data'
# This is just a directory to store the location of the graph file that will be generated as part of this notebook

## Open files

In [None]:
citation_df = pd.read_csv(CITATION_NETWORK_FILE_PATH)

with open(LABEL_COLOUR_DICT_FILE_PATH, 'r') as f:
    label_colour_dict = json.load(f)

## Add nodes to graph

In [None]:
def add_citation_relation_network(data, network, root_ids, desired_relations, visible_nodes=None):
    edge_data = set(zip(data['source'], data['target'], data['relation']))
    for root in root_ids:
      network.add_node(root, root, title=root, color='#9688F2', size=20) # Add the root node into the network

    for edge in edge_data: # Loop for every edge in the data
        # Get the source, target and relation between them
        source = edge[0]
        target = edge[1]
        relation = edge[2]

        hide_nodes = False
        if visible_nodes is not None: # Check to see if we have a list of visible nodes
            hide_nodes = True

        if relation in desired_relations and (not hide_nodes or (source in visible_nodes and target in visible_nodes)): # If the relation is one we want to explore and if we have a list of visible nodes, make sure both nodes are visible
            network.add_node(source,source, title=source, color='#60D7ED', size=20)
            network.add_node(target,target, title=target, color='#60D7ED', size=20)
            network.add_edge(source,target, color=label_colour_dict[relation])
    print('Completed adding citation relations to network')
    return network

## Find nodes to a level from the root(s)

In [None]:
def get_nodes_to_level(root_node, network, max_level=0):
    # Create a set of all discovered nodes, add the root to the discovered
    discovered_nodes = set()
    for root in root_node:
        discovered_nodes.add(root)

    for _ in range(max_level + 1): # For each level until reaching max level
        # Get all neighbours from the current discovered nodes
        visited_nodes = set()
        for seen_node in discovered_nodes:
            for node in network.neighbors(seen_node):
                visited_nodes.add(node)

        # Add the newly visited nodes to the discovered nodes
        for visited_node in visited_nodes:
            discovered_nodes.add(visited_node)

    return discovered_nodes

## Parameters

_These can be changed to develop a different type of queried network_


In [None]:
ROOTS = ['ukpga_1984_55']
# ['ukpga_1984_55']

DESIRED_RELATIONS = ['repealed']
# ['added', 'amended', 'applied', 'certain functions transferred', 'excluded', 'extended', 'functions transferred and modified', 'functions transferred', 'words inserted', 'inserted', 'modified', 'number repealed', 'omitted', 'powers transferred', 'repealed', 'substituted', 'wholly in force at']

NODE_DISCOVERY_LEVEL = 0

## Create network

Create the network of law based upon the loaded data. Once created then traverse the network to find discoverable nodes
from the root(s)


_This step often can take a long time as the **whole** network is being loaded. This could be improved by querying the
dataframe instead of the creating and querying the network_

In [None]:
n = Network()
n.force_atlas_2based()

n = add_citation_relation_network(citation_df, n, ROOTS, DESIRED_RELATIONS)

visible = get_nodes_to_level(ROOTS, n, NODE_DISCOVERY_LEVEL)
print(f'Found {len(visible) - 1} nodes from the root')

## Create network, only showing visible nodes

Using the list of discoverable nodes, recreate the network but this time only add the discoverable nodes into the
network

In [None]:
n = Network(bgcolor='#222222', font_color='white', height='100%', width='100%')
n.force_atlas_2based()

n = add_citation_relation_network(citation_df, n, ROOTS, DESIRED_RELATIONS, visible)

n.show(f'{GRAPH_FILE_PATH}/citation_relation_{"_".join(DESIRED_RELATIONS).replace(" ", "")}_LEVEL_{NODE_DISCOVERY_LEVEL}_graph.html')