# viewing council datasets as a graph


In [27]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network
import os

In [28]:
# GET THE DATAFRAME
# source https://standards.esd.org.uk/?uri=list%2Ffunctions&tab=downloads

location = '../data/functions_mappedToenglishAndWelshServices.csv'
if (os.path.exists(location)):
  file = location
else:
  file = 'https://standards.esd.org.uk/csv?uri=list/functions&mappedToUri=list/englishAndWelshServices'
  
df = pd.read_csv(file, quotechar='"')

df = df[df.Status == 'Live'][['Level 1','Level 2','Label','Mapped label']]

## create a logseq graph

In [29]:
%%bash
#  CLEAR FILE SYSTEM OF NETWORK FOR USE IN LOGSEQ GRAPH
ls ../pages/ | head -n 3
echo ...
ls ../pages/ | tail -n 3

16 to 19 bursary fund.md
Abandoned shopping trolleys.md
Abandoned vehicles.md
...
Zoning.md
Zoo licence.md
Zoos and farm parks.md


In [None]:
#  CREATE INTERLINKED FILE NETWORK FOR USE IN LOGSEQ GRAPH CONTINUED
dir = '../pages/'
with open(f'{dir}/index.md', 'w') as ftop:
    for l1 in df[~df['Level 1'].isna()]['Level 1'].unique():
        ftop.write(f'- [[{l1}]]\n')
        print(f'-1 [[{l1}]]')
        with open(f'{dir}/{l1}.md', 'w') as fl1:
            for l2 in df[df['Level 1'] == l1][~df['Level 2'].isna()]['Level 2'].unique():
                print(f'--2 [[{l2}]]')
                fl1.write(f'- [[{l2}]]\n')
                with open(f'{dir}/{l2}.md', 'w') as fl2:
                    for l2l in df[~df['Level 2'].isna()][df['Level 2'] == l2]['Label'].unique():
                        print(f'---2l [[{l2l}]]')
                        fl2.write(f'- [[{l2l}]]\n')
                        with open(f'{dir}/{l2l}.md', 'w') as fl2l:
                            for l2lm in df[~df['Level 2'].isna() & ~df['Mapped label'].isna()][df['Label'] == l2l]['Mapped label'].unique():
                                print(f'---2lm [[{l2lm}]]')
                                fl2l.write(f'- [[{l2lm}]]\n')
                                with open(f'{dir}/{l2lm.replace("/"," and ")}.md', 'w') as fl2lm:
                                    fl2lm.write('')
            for l1l in df[(df['Level 1'] == l1) & df['Level 2'].isna() & ~df['Mapped label'].isna()]['Label'].unique():
                l1l = l1l.replace('/','_')
                print(f'--1l [[{l1l}]]')
                fl1.write(f'- [[{l1l}]]\n')
                with open(f'{dir}/{l1l}.md', 'w') as fl1l:
                    for l1lm in df[df['Level 2'].isna() & ~df['Mapped label'].isna()][df['Label'] == l1l]['Mapped label'].unique():
                        print(f'---1lm [[{l1lm}]]')
                        fl1l.write(f'- [[{l1lm}]]\n')
                        with open(f'{dir}/{l1lm.replace("/"," and ")}.md', 'w') as fl1lm:
                            fl1lm.write(f'')


## create a networkX graph

In [30]:
# GENERATE AN EDGELIST OF ALL THE CONNECTIONS
edgelist = pd.concat([
    df[~df['Level 1'].isna() & df['Level 2'].isna() & ~df['Mapped label'].isna()][[
        'Level 1', 'Label']].drop_duplicates().rename(columns={'Level 1': 'a', 'Label': 'b'}),  # l1l
    df[~df['Level 1'].isna() & ~df['Level 2'].isna()][['Level 1', 'Level 2']
                                                      ].drop_duplicates().rename(columns={'Level 1': 'a', 'Level 2': 'b'}),  # l1
    df[~df['Level 2'].isna() & ~df['Label'].isna()][['Level 2', 'Label']
                                                    ].drop_duplicates().rename(columns={'Level 2': 'a', 'Label': 'b'}),  # l2
    df[~df['Level 2'].isna() & ~df['Label'].isna() & ~df['Mapped label'].isna()][[
        'Level 2', 'Label']].drop_duplicates().rename(columns={'Level 2': 'a', 'Label': 'b'}),  # l2l
    df[df['Level 2'].isna() & ~df['Label'].isna() & ~df['Mapped label'].isna()][[
        'Label', 'Mapped label']].rename(columns={'Label': 'a', 'Mapped label': 'b'}),  # l3
    df[~df['Level 2'].isna() & ~df['Label'].isna() & ~df['Mapped label'].isna()][[
        'Label', 'Mapped label']].rename(columns={'Label': 'a', 'Mapped label': 'b'})  # l4
])


In [31]:
# TURN EDGELIST INTO GRAPH
G=nx.from_pandas_edgelist(edgelist, "a", "b")

## prepare to visualise

In [32]:
# PREP GRAPH FOR PYVIS
nt = Network(notebook=True, cdn_resources='remote', bgcolor="#222222", font_color="white", select_menu=False, filter_menu=False)
nt.from_nx(G)

In [41]:
# RENDER TO HTML
nt.write_html('data-network.html')

In [35]:
# CREATE GML THAT CAN BE USED IN E.G. GEPHI
nx.write_gml(G, "data-network.gml")

In [36]:
# RENDER HTML INSIDE NOTEBOOK
# ..TAKES AGES
# nt.show('../docs/data-network.html')

In [43]:
# CREATE GRAPH NARROWED BY KEYWORD
def topic_graph(topic: str, G):

    # define a search term or condition to identify nodes
    def match_search_term(node):
        # Replace this condition with your own criteria
        return search in node  # Example: Nodes with even values

    # find descendants of nodes matching the search term
    descendants = []
    for node in G.nodes():
        if match_search_term(node):
            descendants.extend(nx.descendants(G, node))

    # remove duplicates and convert to a set for uniqueness
    descendants = set(descendants)

    # plot the descendants
    s = G.subgraph(descendants)
    ntsub = Network(notebook=True, cdn_resources='remote', bgcolor="#222222", font_color="white", 
                    select_menu=True, filter_menu=False)
    ntsub.from_nx(s)
    ntsub.write_html(f'data-network-{search.lower()}.html')

    return ntsub


In [44]:
def make_topic_map(search: str, G):
    nt = topic_graph(search, G)    
    # CREATE FILE AND RENDER HTML INSIDE NOTEBOOK
    nt.show(f'data-network-{search.lower()}.html')

In [34]:
topics = [
  'Building',
  'Planning',
  'Housing',
]

In [45]:
[make_topic_map(i, G) for i in topics]

data-network-bulding.html
data-network-planning.html
data-network-housing.html


[None, None, None]

In [46]:
%%bash
mv *.html ../docs/