<a href="https://colab.research.google.com/github/iamvarol/blogposts/blob/main/medium/europe_gas_network/european_gas_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Neo4j Sandbox Setup

In [None]:
!pip install -q neo4j

[?25l[K     |███▋                            | 10 kB 21.3 MB/s eta 0:00:01[K     |███████▎                        | 20 kB 10.2 MB/s eta 0:00:01[K     |██████████▉                     | 30 kB 6.2 MB/s eta 0:00:01[K     |██████████████▌                 | 40 kB 5.7 MB/s eta 0:00:01[K     |██████████████████▏             | 51 kB 3.0 MB/s eta 0:00:01[K     |█████████████████████▊          | 61 kB 3.5 MB/s eta 0:00:01[K     |█████████████████████████▍      | 71 kB 4.0 MB/s eta 0:00:01[K     |█████████████████████████████   | 81 kB 4.5 MB/s eta 0:00:01[K     |████████████████████████████████| 90 kB 1.0 MB/s 
[?25h  Building wheel for neo4j (setup.py) ... [?25l[?25hdone


In [None]:
from neo4j import GraphDatabase
import time
import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', 0)

In [None]:
class Neo4jConnection:
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [None]:
uri  = 'bolt://3.86.237.233:7687' 
pwd  = 'arrow-briefing-crashes'
user = 'neo4j'
conn = Neo4jConnection(uri=uri, user=user , pwd=pwd)

In [None]:
# creating constraints on graph

conn.query("CREATE CONSTRAINT BorderPoints IF NOT EXISTS ON (n:BorderPoint) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT Compressors IF NOT EXISTS ON (n:Compressor) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT Consumers IF NOT EXISTS ON (n:Consumer) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT LNGs IF NOT EXISTS ON (n:LNG) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT Nodes IF NOT EXISTS ON (n:Node) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT PowerPlants IF NOT EXISTS ON (n:PowerPlant) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT Productions IF NOT EXISTS ON (n:Production) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT Storages IF NOT EXISTS ON (n:Storage) ASSERT n.id IS UNIQUE;")

[]

## Component Creation

In [None]:
# border points

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_BorderPoints.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(b:BorderPoint {id:id})
ON CREATE SET b.name          = name,
              b.country_code  = country_code,
              b.source_id     = source_ids[0],
              b.node_id       = node_ids[0],
              b.loc           = loc,
              b              += params
""")

[]

In [None]:
# compressors

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_Compressors.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(c:Compressor {id:id})
ON CREATE SET c.name          = name,
              c.country_code  = country_code,
              c.source_id     = source_ids[0],
              c.node_id       = node_ids[0],
              c.loc           = loc,
              c              += params
""")

[]

In [None]:
# consumers

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_Consumers.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(c:Consumer {id:id})
ON CREATE SET c.name          = name,
              c.country_code  = country_code,
              c.source_id     = source_ids[0],
              c.node_id       = node_ids[0],
              c.loc           = loc,
              c              += params
""")

[]

In [None]:
# LNGs

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_LNGs.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(l:LNG {id:id})
ON CREATE SET l.name          = name,
              l.country_code  = country_code,
              l.source_id     = source_ids,
              l.node_id       = node_ids[0],
              l.loc           = loc,
              l              += params
""")

[]

In [None]:
# storages

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_Storages.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(s:Storage {id:id})
ON CREATE SET s.name          = name,
              s.country_code  = country_code,
              s.source_id     = source_ids,
              s.node_id       = node_ids[0],
              s.loc           = loc,
              s              += params
""")

[]

In [None]:
# productions

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_Productions.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(p:Production {id:id})
ON CREATE SET p.name          = name,
              p.country_code  = country_code,
              p.source_id     = source_ids[0],
              p.node_id       = node_ids[0],
              p.loc           = loc,
              p              += params
""")

[]

In [None]:
# power plants

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_PowerPlants.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(pp:PowerPlant {id:id})
ON CREATE SET pp.name          = name,
              pp.country_code  = country_code,
              pp.source_id     = source_ids,
              pp.node_id       = node_ids[0],
              pp.loc           = loc,
              pp              += params
""")

[]

In [None]:
# nodes

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_Nodes.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(n:Node {id:id})
ON CREATE SET n.name          = name,
              n.country_code  = country_code,
              n.source_id     = source_ids[0],
              n.node_id       = node_ids[0],
              n.loc           = loc,
              n              += params
""")

[]

In [None]:
# pipe segments

conn.query("""
CALL apoc.periodic.iterate("
      LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_PipeSegments.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     row.name as name,
     apoc.convert.fromJsonList(row.country_code) as countries,
     apoc.convert.fromJsonList(row.node_id) as nodes,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MATCH (start_p:Node {id:nodes[0]})
MATCH (end_p:Node {id:nodes[1]})
RETURN start_p, end_p, id, name, params, countries
","   
      MERGE (start_p)-[pipe:PIPE {id:id}]->(end_p)
      ON CREATE SET pipe.name = name, 
                    pipe += params
", {batchSize:5, parallel:true}
)
""")

## EDA

In [None]:
q = """
CALL db.labels() YIELD label
CALL apoc.cypher.run('MATCH (:`'+label+'`) RETURN count(*) as count',{}) YIELD value
RETURN label as nodes, value.count as nodeCount
ORDER BY nodeCount DESC
"""

df = pd.DataFrame([dict(_) for _ in conn.query(q)])
df

Unnamed: 0,nodes,nodeCount
0,Node,5009
1,Consumer,1357
2,PowerPlant,310
3,Storage,297
4,Compressor,248
5,BorderPoint,109
6,Production,104
7,LNG,32


In [None]:
q = '''
CALL db.relationshipTypes() YIELD relationshipType as type
CALL apoc.cypher.run('MATCH ()-[:`'+type+'`]->() RETURN count(*) as count',{}) YIELD value
RETURN type as relationship, value.count as relationshipCount
'''
df = pd.DataFrame([dict(_) for _ in conn.query(q)])
df

Unnamed: 0,relationship,relationshipCount
0,PIPE,6526


In [None]:
q = '''
CALL apoc.meta.stats()
'''
df = pd.DataFrame([dict(_) for _ in conn.query(q)])
df

Unnamed: 0,labelCount,relTypeCount,propertyKeyCount,nodeCount,relCount,labels,relTypes,relTypesCount,stats
0,8,1,83,7466,6526,"{'BorderPoint': 109, 'Storage': 297, 'Consumer': 1357, 'LNG': 32, 'PowerPlant': 310, 'Node': 5009, 'Production': 104, 'Compressor': 248}","{'()-[:PIPE]->(:Node)': 6526, '()-[:PIPE]->()': 6526, '(:Node)-[:PIPE]->()': 6526}",{'PIPE': 6526},"{'relTypeCount': 1, 'propertyKeyCount': 83, 'labelCount': 8, 'nodeCount': 7466, 'relCount': 6526, 'labels': {'BorderPoint': 109, 'Storage': 297, 'Consumer': 1357, 'LNG': 32, 'PowerPlant': 310, 'Node': 5009, 'Production': 104, 'Compressor': 248}, 'relTypes': {'()-[:PIPE]->(:Node)': 6526, '()-[:PIPE]->()': 6526, '(:Node)-[:PIPE]->()': 6526}}"


In [None]:
query="""
MATCH p=(start:Node)-[:PIPE*20]->(next) 
WHERE not ((next)-[:PIPE]->()) AND start.id="INET_N_856"
RETURN DISTINCT [t in nodes(p) | t.id] as nodesPath, [t in nodes(p) | t.country_code] as countryPath
"""
df = pd.DataFrame([dict(_) for _ in conn.query(query)])
df

Unnamed: 0,nodesPath,countryPath
0,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, SEQ_9149__S_LMGN, SEQ_9139__S_LMGN, SEQ_9127__S_LMGN, N_486_M_LMGN, SEQ_7715__M_LMGN, SEQ_7723__M_LMGN, SEQ_7731__M_LMGN, SEQ_16065_p, SEQ_7733__L_LMGN, N_688_M_LMGN, SEQ_7739__L_LMGN, SEQ_7743__L_LMGN, SEQ_7747__L_LMGN, SEQ_7749__L_LMGN, NutsCons_1003]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL]"
1,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, SEQ_9149__S_LMGN, SEQ_9139__S_LMGN, SEQ_9127__S_LMGN, N_486_M_LMGN, SEQ_7715__M_LMGN, SEQ_7723__M_LMGN, SEQ_7731__M_LMGN, SEQ_16065_p, SEQ_7733__L_LMGN, SEQ_7731__L_LMGN, SEQ_7733__L_LMGN, N_688_M_LMGN, SEQ_7739__L_LMGN, SEQ_10595__M_LMGN, N_902_M_LMGN]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL]"
2,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, SEQ_9149__S_LMGN, SEQ_9139__S_LMGN, SEQ_9127__S_LMGN, N_486_M_LMGN, SEQ_7715__M_LMGN, SEQ_7723__M_LMGN, SEQ_7731__M_LMGN, SEQ_16065_p, SEQ_7733__L_LMGN, SEQ_7731__L_LMGN, SEQ_7727__L_LMGN, SEQ_7731__L_LMGN, SEQ_7733__L_LMGN, N_688_M_LMGN, N_902_M_LMGN]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL]"
3,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, N_1587_S_LMGN, SEQ_9693__M_LMGN, SEQ_9695__M_LMGN, SEQ_9701__M_LMGN, SEQ_9149__S_LMGN, SEQ_9139__S_LMGN, SEQ_9127__S_LMGN, N_486_M_LMGN, SEQ_7715__M_LMGN, SEQ_7723__M_LMGN, SEQ_7731__M_LMGN, N_688_M_LMGN, SEQ_7739__L_LMGN, SEQ_10595__M_LMGN, N_902_M_LMGN]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL]"
4,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, N_1587_S_LMGN, SEQ_9693__M_LMGN, SEQ_9695__M_LMGN, SEQ_9701__M_LMGN, SEQ_9149__S_LMGN, SEQ_9139__S_LMGN, SEQ_9127__S_LMGN, N_486_M_LMGN, SEQ_7715__M_LMGN, SEQ_7723__M_LMGN, SEQ_7731__M_LMGN, SEQ_16065_p, SEQ_7733__L_LMGN, N_688_M_LMGN, N_902_M_LMGN]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL]"
5,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, N_1587_S_LMGN, SEQ_9693__M_LMGN, SEQ_9695__M_LMGN, SEQ_9701__M_LMGN, SEQ_9149__S_LMGN, SEQ_9139__S_LMGN, SEQ_9127__S_LMGN, N_486_M_LMGN, SEQ_5209__M_LMGN, SEQ_6901_p, SEQ_5217__M_LMGN, SEQ_5231__M_LMGN, SEQ_5233__M_LMGN, SEQ_6861__S_LMGN, NutsCons_1165]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL]"
6,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, N_1587_S_LMGN, SEQ_9693__M_LMGN, SEQ_9695__M_LMGN, SEQ_8901_p, N_80_L_LMGN, SEQ_1693__L_LMGN, SEQ_1699__L_LMGN, SEQ_1703__L_LMGN, SEQ_1717__L_LMGN, N_81_L_LMGN, SEQ_7667__L_LMGN, N_417_S_LMGN, N_989_S_LMGN, SEQ_15267_p, NutsCons_1125]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL]"
7,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, N_1587_S_LMGN, SEQ_9693__M_LMGN, SEQ_9695__M_LMGN, SEQ_8901_p, N_80_L_LMGN, SEQ_1693__L_LMGN, SEQ_1699__L_LMGN, SEQ_1703__L_LMGN, SEQ_1717__L_LMGN, N_81_L_LMGN, SEQ_7667__L_LMGN, SEQ_7673__L_LMGN, SEQ_7675__L_LMGN, SEQ_7677__L_LMGN, INET_N_256]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL]"
8,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, N_1587_S_LMGN, SEQ_9693__M_LMGN, SEQ_9695__M_LMGN, SEQ_8901_p, N_80_L_LMGN, SEQ_1693__L_LMGN, SEQ_1699__L_LMGN, SEQ_1703__L_LMGN, SEQ_1717__L_LMGN, N_416_S_LMGN, SEQ_7667__L_LMGN, N_417_S_LMGN, N_989_S_LMGN, SEQ_15267_p, NutsCons_1125]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL]"
9,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, N_1587_S_LMGN, SEQ_9693__M_LMGN, SEQ_9695__M_LMGN, SEQ_8901_p, N_80_L_LMGN, SEQ_1693__L_LMGN, SEQ_1699__L_LMGN, SEQ_1703__L_LMGN, SEQ_1717__L_LMGN, N_416_S_LMGN, SEQ_7667__L_LMGN, SEQ_7673__L_LMGN, SEQ_7675__L_LMGN, SEQ_7677__L_LMGN, INET_N_256]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL]"


## Graph Data Science

In [None]:
!pip install -q graphdatascience

In [None]:
import pandas as pd
from graphdatascience import GraphDataScience

uri  = 'bolt://3.86.237.233:7687' 
pwd  = 'arrow-briefing-crashes'
user = 'neo4j'
gds = GraphDataScience(uri, auth=("neo4j", pwd))

In [None]:
# dropping an existing graph
query = "CALL gds.graph.drop('pipelines') YIELD graphName;"
gds.run_cypher(query)

Unnamed: 0,graphName
0,pipelines


In [None]:
query = """CALL gds.graph.project(
                'pipelines',
                'Node',
                'PIPE'
            )
            YIELD
                graphName, nodeProjection, nodeCount, relationshipProjection, relationshipCount
"""

gds.run_cypher(query)

Unnamed: 0,graphName,nodeProjection,nodeCount,relationshipProjection,relationshipCount
0,pipelines,"{'Node': {'label': 'Node', 'properties': {}}}",5009,"{'PIPE': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': 'PIPE', 'properties': {}}}",6526


In [None]:
G, res = gds.graph.project(
    'pipes',                 
    'Node',   
    "PIPE"                            
)
type(res)

pandas.core.series.Series

In [None]:
print(res)

nodeProjection            {'Node': {'label': 'Node', 'properties': {}}}                                                   
relationshipProjection    {'PIPE': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': 'PIPE', 'properties': {}}}
graphName                 pipes                                                                                           
nodeCount                 5009                                                                                            
relationshipCount         6526                                                                                            
projectMillis             287                                                                                             
Name: 0, dtype: object


In [None]:
G.node_count(), G.relationship_count()

(5009, 6526)

In [None]:
G.degree_distribution()

p99     5.000000 
min     0.000000 
max     12.000000
mean    1.302855 
p90     3.000000 
p50     1.000000 
p999    7.000000 
p95     3.000000 
p75     2.000000 
dtype: float64

In [None]:
res = gds.pageRank.stream(G)
res

Unnamed: 0,nodeId,score
0,2457,0.331688
1,2458,0.431934
2,2459,0.333572
3,2460,0.597695
4,2461,0.515261
...,...,...
5004,7461,0.411465
5005,7462,0.319784
5006,7463,0.267938
5007,7464,0.213750


In [None]:
# top 10 by page rank
res.sort_values(by='score', ascending=False)[:10]

Unnamed: 0,nodeId,score
2848,5305,4.861119
2852,5309,4.331437
514,2971,4.225036
1813,4270,3.832889
1708,4165,3.320917
1769,4226,3.236406
1776,4233,3.128835
1709,4166,2.935067
2779,5236,2.765317
2112,4569,2.691821


In [None]:
query = """
MATCH (n:Node)
WHERE ID(n) = 5305
RETURN n.id
"""
df = pd.DataFrame([dict(_) for _ in conn.query(query)])
df

Unnamed: 0,n.id
0,LKD_N_40


In [None]:
# Centrality determined by node degree
query = """
MATCH (n:Node)
WITH n, SIZE((n)-[:PIPE]-(:Node)) AS degree
RETURN n.id AS id, n.country_code AS node_code, degree
ORDER BY degree DESC
"""
df = pd.DataFrame([dict(_) for _ in conn.query(query)])
df

Unnamed: 0,id,node_code,degree
0,LKD_N_38,DE,14
1,N_144_L_LMGN,GB,13
2,INET_N_1679,UA,11
3,N_72_L_LMGN,RU,11
4,LKD_N_147,DE,11
...,...,...,...
5004,NutsCons_307,CH,1
5005,NutsCons_200,ES,1
5006,NutsCons_235,CH,1
5007,NutsCons_588,GR,1


In [None]:
# measure the nodes by indegree count
query = """MATCH (n:Node)
WITH n, SIZE((n)<-[:PIPE]-(:Node)) AS indegree
RETURN n.id AS id, n.country_code AS node_code, indegree
ORDER BY indegree DESC"""

df = pd.DataFrame([dict(_) for _ in conn.query(query)])
df

Unnamed: 0,id,node_code,indegree
0,INET_N_1071,NL,7
1,INET_N_883,UA,7
2,LKD_N_40,DE,7
3,INET_N_408,UA,6
4,INET_N_1684,UA,6
...,...,...,...
5004,NutsCons_142,AL,0
5005,INET_N_1037,LT,0
5006,Prod_9,XX,0
5007,NutsCons_1261,GB,0


In [None]:
# writing the page ranks of the nodes
q= """CALL gds.pageRank.write('pipes',
    {
        writeProperty: 'pagerank'
    }
)
YIELD nodePropertiesWritten, ranIterations"""
gds.run_cypher(q)

Unnamed: 0,nodePropertiesWritten,ranIterations
0,5009,20


In [None]:
q = """MATCH (n:Node)<-[r:PIPE]-(:Node)
RETURN n.id AS id, n.country_code AS countryCode, round(n.pagerank,2) AS pagerank, round(sum(r.max_cap_M_m3_per_d),2) as maximumAnnualGasVolume
ORDER BY pagerank DESC, id ASC
LIMIT 10"""
df = pd.DataFrame([dict(_) for _ in conn.query(q)])
df

Unnamed: 0,id,countryCode,pagerank,maximumAnnualGasVolume
0,LKD_N_40,DE,4.86,195.52
1,LKD_N_44,DE,4.33,54.79
2,N_183_M_LMGN,MD,4.23,82.19
3,SEQ_1813__M_LMGN,MD,3.83,54.79
4,SEQ_7563__L_LMGN,XX,3.32,82.19
5,N_32_M_LMGN,RU,3.24,54.79
6,N_85_M_LMGN,RU,3.13,54.79
7,SEQ_7577__L_LMGN,XX,2.94,27.4
8,SEQ_2409_p,AZ,2.77,54.79
9,INET_N_408,UA,2.69,164.38


## Degree Centrality

In [None]:
query = """
CALL gds.graph.project(
  'degreeCentralityPipes',
  'Node',
  {
    PIPE: {
      orientation: "UNDIRECTED"
    }
  }
)
"""
gds.run_cypher(query)

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Node': {'label': 'Node', 'properties': {}}}","{'PIPE': {'orientation': 'UNDIRECTED', 'aggregation': 'DEFAULT', 'type': 'PIPE', 'properties': {}}}",degreeCentralityPipes,5009,13052,116


In [None]:
query = """
CALL gds.degree.stream('degreeCentralityPipes')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).id AS id, score AS degree
ORDER BY degree DESC, id ASC
LIMIT 10
"""
gds.run_cypher(query)

Unnamed: 0,id,degree
0,LKD_N_38,14.0
1,N_144_L_LMGN,13.0
2,INET_N_1679,11.0
3,INET_N_408,11.0
4,LKD_N_147,11.0
5,LKD_N_363,11.0
6,N_72_L_LMGN,11.0
7,INET_N_1071,10.0
8,INET_N_1118,10.0
9,INET_N_1579,10.0


## Betweenness Centrality

In [None]:
query = "CALL gds.graph.project('betweennessPipes', 'Node', 'PIPE')"
gds.run_cypher(query)

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Node': {'label': 'Node', 'properties': {}}}","{'PIPE': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': 'PIPE', 'properties': {}}}",betweennessPipes,5009,6526,249


In [None]:
query = """
CALL gds.betweenness.write.estimate('betweennessPipes', { writeProperty: 'betweenness' })
YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory
"""

gds.run_cypher(query)

Unnamed: 0,nodeCount,relationshipCount,bytesMin,bytesMax,requiredMemory
0,5009,6526,1404224,1404224,1371 KiB


In [None]:
query = """
CALL gds.betweenness.stats('betweennessPipes')
YIELD centralityDistribution
RETURN centralityDistribution.min AS minimumScore, centralityDistribution.mean AS meanScore, centralityDistribution.max AS maxScore
"""
gds.run_cypher(query)

Unnamed: 0,minimumScore,meanScore,maxScore
0,0.0,17763.981371,864215.999992


In [None]:
query = """
CALL gds.betweenness.stream('betweennessPipes')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).id AS id, round(score,2) as score
ORDER BY score DESC, id ASC
LIMIT 10
"""

gds.run_cypher(query)

Unnamed: 0,id,score
0,INET_N_984,864212.91
1,LKD_N_38,607789.7
2,LKD_N_492,601340.02
3,INET_N_229,598424.32
4,SEQ_19574_p,598029.02
5,SEQ_19575_p,597328.02
6,LKD_N_491,597240.49
7,LKD_N_493,596627.02
8,SEQ_719__L_LMGN,595311.72
9,LKD_N_165,594963.55


In [None]:
query = """
CALL gds.betweenness.stream('betweennessPipes')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).id AS id, round(score,2) as score
ORDER BY score DESC, id ASC
"""

betweenness_df = gds.run_cypher(query)

In [None]:
betweenness_df.head(20)

Unnamed: 0,id,score
0,INET_N_984,864212.91
1,LKD_N_38,607789.7
2,LKD_N_492,601340.02
3,INET_N_229,598424.32
4,SEQ_19574_p,598029.02
5,SEQ_19575_p,597328.02
6,LKD_N_491,597240.49
7,LKD_N_493,596627.02
8,SEQ_719__L_LMGN,595311.72
9,LKD_N_165,594963.55


## Louvain

In [None]:
query = """
CALL gds.louvain.stream('pipes')
YIELD nodeId, communityId
RETURN
	communityId,
    SIZE(COLLECT(gds.util.asNode(nodeId).id)) AS number_of_nodes,
	COLLECT(gds.util.asNode(nodeId).id) AS ids,
    COLLECT(gds.util.asNode(nodeId).country_code) AS country_codes
ORDER BY number_of_nodes DESC, communityId;
"""
clusters_df = gds.run_cypher(query)
clusters_df.head(10)

Unnamed: 0,communityId,number_of_nodes,ids,country_codes
0,2691,211,"[SEQ_3791__L_LMGN, SEQ_3793__L_LMGN, N_1604_S_LMGN, SEQ_1229__L_LMGN, SEQ_9097__M_LMGN, N_2585_M_LMGN, SEQ_1235__L_LMGN, SEQ_5169__M_LMGN, SEQ_1237__L_LMGN, N_472_M_LMGN, N_594_M_LMGN, SEQ_201__L_LMGN, SEQ_199__L_LMGN, SEQ_145__L_LMGN, SEQ_195__L_LMGN, N_388_M_LMGN, SEQ_309__L_LMGN, SEQ_325__L_LMGN, SEQ_331__L_LMGN, N_390_M_LMGN, N_16_L_LMGN, SEQ_369__L_LMGN, SEQ_373__L_LMGN, SEQ_403__L_LMGN, SEQ_437__L_LMGN, SEQ_449__L_LMGN, SEQ_457__L_LMGN, SEQ_467__L_LMGN, N_17_L_LMGN, N_18_L_LMGN, SEQ_475__L_LMGN, SEQ_511__L_LMGN, SEQ_525__L_LMGN, SEQ_527__L_LMGN, SEQ_537__L_LMGN, SEQ_583__L_LMGN, SEQ_591__L_LMGN, N_19_L_LMGN, N_20_L_LMGN, N_21_L_LMGN, N_22_L_LMGN, SEQ_623__L_LMGN, SEQ_639__L_LMGN, N_24_L_LMGN, N_25_L_LMGN, N_26_L_LMGN, SEQ_805__L_LMGN, SEQ_1255__L_LMGN, N_82_L_LMGN, SEQ_1903__M_LMGN, SEQ_1757__L_LMGN, SEQ_1765__L_LMGN, SEQ_1771__L_LMGN, SEQ_1773__L_LMGN, SEQ_1797__L_LMGN, N_84_L_LMGN, SEQ_1829__L_LMGN, SEQ_1831__L_LMGN, SEQ_1833__L_LMGN, SEQ_1835__L_LMGN, SEQ_1839__L_LMGN, SEQ_1845__L_LMGN, N_86_L_LMGN, Stor_NonEU_21, N_34_M_LMGN, SEQ_3103__L_LMGN, SEQ_9386_p, SEQ_3813__L_LMGN, SEQ_3817__L_LMGN, N_182_L_LMGN, N_183_L_LMGN, N_258_M_LMGN, SEQ_1179__M_LMGN, SEQ_5955__M_LMGN, SEQ_5151__M_LMGN, SEQ_4003__M_LMGN, SEQ_5979__M_LMGN, SEQ_2383__M_LMGN, SEQ_7527__M_LMGN, SEQ_2715__M_LMGN, SEQ_5971__M_LMGN, SEQ_353__M_LMGN, SEQ_9501__M_LMGN, SEQ_5963__M_LMGN, SEQ_5989__M_LMGN, N_123_M_LMGN, SEQ_6069__M_LMGN, N_32_M_LMGN, N_53_M_LMGN, N_54_M_LMGN, N_83_M_LMGN, N_85_M_LMGN, N_118_M_LMGN, N_53_L_LMGN, SEQ_1183__M_LMGN, N_122_M_LMGN, N_125_M_LMGN, SEQ_1857__M_LMGN, SEQ_1895__M_LMGN, SEQ_1897__M_LMGN, ...]","[BY, BY, BY, BY, UA, UA, BY, BY, BY, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, BY, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, BY, BY, BY, RU, RU, RU, RU, RU, BY, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, ...]"
1,763,183,"[N_735_M_LMGN, N_736_M_LMGN, N_1843_S_LMGN, N_1166_S_LMGN, N_1167_S_LMGN, SEQ_13739__S_LMGN, SEQ_5193__L_LMGN, N_160_L_LMGN, N_815_M_LMGN, N_816_M_LMGN, N_553_M_LMGN, SEQ_13823__S_LMGN, N_822_M_LMGN, N_1494_S_LMGN, N_1848_S_LMGN, SEQ_13927__S_LMGN, N_555_M_LMGN, SEQ_1573_p, N_2106_S_LMGN, N_2107_S_LMGN, SEQ_5149__L_LMGN, SEQ_5153__L_LMGN, SEQ_5165__L_LMGN, N_896_M_LMGN, SEQ_10731__M_LMGN, SEQ_10735__M_LMGN, INET_N_1481, SEQ_8163__M_LMGN, NutsCons_1073, SEQ_8159__M_LMGN, N_737_M_LMGN, N_556_M_LMGN, N_823_M_LMGN, N_897_M_LMGN, NutsCons_949, N_551_M_LMGN, SEQ_6581__M_LMGN, N_115_L_LMGN, N_113_L_LMGN, SEQ_2405_p, SEQ_16101__S_LMGN, SEQ_2557__L_LMGN, SEQ_2553_p, N_114_L_LMGN, SEQ_6681__L_LMGN, SEQ_5079__L_LMGN, SEQ_5077__L_LMGN, N_208_L_LMGN, SEQ_5129__L_LMGN, N_777_M_LMGN, SEQ_8955__M_LMGN, SEQ_3488_p, N_825_M_LMGN, N_1479_S_LMGN, SEQ_16009__S_LMGN, SEQ_3534_p, N_1828_S_LMGN, N_624_L_LMGN, SEQ_5179__L_LMGN, SEQ_5175__L_LMGN, SEQ_9273__M_LMGN, N_2108_S_LMGN, SEQ_17879__S_LMGN, N_1505_S_LMGN, SEQ_11007__S_LMGN, N_1169_S_LMGN, SEQ_5205__L_LMGN, SEQ_5207__L_LMGN, NutsCons_976, INET_N_219, N_914_S_LMGN, N_513_S_LMGN, N_1474_S_LMGN, N_1168_S_LMGN, N_1829_S_LMGN, NutsCons_1121, N_199_M_LMGN, N_200_M_LMGN, SEQ_7529_p, SEQ_8927__M_LMGN, SEQ_8933__M_LMGN, NutsCons_1081, SEQ_5183__L_LMGN, N_1478_S_LMGN, SEQ_7616_p, N_702_M_LMGN, N_850_M_LMGN, N_512_S_LMGN, N_923_S_LMGN, SEQ_5849__M_LMGN, N_510_S_LMGN, SEQ_8329_p, N_66_L_LMGN, SEQ_1493__L_LMGN, SEQ_1513__L_LMGN, SEQ_8865_p, SEQ_9678_p, SEQ_11107__S_LMGN, N_432_M_LMGN, N_687_S_LMGN, ...]","[IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, CH, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, IT, ...]"
2,961,153,"[N_61_L_LMGN, N_163_L_LMGN, N_150_L_LMGN, SEQ_3245__L_LMGN, N_98_L_LMGN, SEQ_2157__L_LMGN, SEQ_8233__M_LMGN, N_1302_S_LMGN, SEQ_9997__M_LMGN, SEQ_1900_p, N_811_M_LMGN, SEQ_9973__M_LMGN, SEQ_9971__M_LMGN, SEQ_9963__M_LMGN, N_2320_S_LMGN, SEQ_12551__S_LMGN, SEQ_8005__L_LMGN, SEQ_7999__L_LMGN, N_117_L_LMGN, SEQ_2599__L_LMGN, SEQ_2366_p, SEQ_2597__L_LMGN, Stor_EU_33, SEQ_2153__L_LMGN, N_97_L_LMGN, N_434_M_LMGN, SEQ_3713__L_LMGN, SEQ_3707__L_LMGN, SEQ_3705__L_LMGN, N_145_L_LMGN, SEQ_3189__L_LMGN, N_146_L_LMGN, SEQ_4207__L_LMGN, SEQ_4209__L_LMGN, SEQ_4215__L_LMGN, SEQ_4219__L_LMGN, SEQ_4223__L_LMGN, N_56_L_LMGN, SEQ_1295__L_LMGN, SEQ_9995__M_LMGN, SEQ_1365__L_LMGN, SEQ_4975_p, SEQ_7017__M_LMGN, N_118_L_LMGN, N_1348_S_LMGN, N_2238_S_LMGN, N_2237_S_LMGN, N_2239_S_LMGN, SEQ_19623__S_LMGN, N_186_L_LMGN, N_57_L_LMGN, SEQ_1317__L_LMGN, SEQ_1311__L_LMGN, N_147_L_LMGN, SEQ_6599__L_LMGN, SEQ_5273__L_LMGN, SEQ_5285__L_LMGN, SEQ_1291__L_LMGN, SEQ_1387__L_LMGN, N_1366_S_LMGN, N_1352_S_LMGN, N_1354_S_LMGN, SEQ_1361__L_LMGN, N_60_L_LMGN, SEQ_6514_p, N_210_L_LMGN, SEQ_7497__L_LMGN, SEQ_7493__L_LMGN, SEQ_6524_p, SEQ_15277__S_LMGN, N_1729_S_LMGN, N_2000_S_LMGN, N_2001_S_LMGN, SEQ_4371__L_LMGN, SEQ_4367__L_LMGN, SEQ_1371__L_LMGN, SEQ_1375__L_LMGN, SEQ_6895_p, SEQ_10825__M_LMGN, N_1158_S_LMGN, N_1356_S_LMGN, N_1855_S_LMGN, SEQ_8134_p, SEQ_8140_p, SEQ_1307__L_LMGN, SEQ_8334_p, SEQ_8225__M_LMGN, N_1954_S_LMGN, SEQ_4521__M_LMGN, SEQ_8229__M_LMGN, SEQ_9247__M_LMGN, NutsCons_820, SEQ_14256_p, SEQ_10819__M_LMGN, SEQ_14261_p, SEQ_9863__S_LMGN, N_630_S_LMGN, NutsCons_866, NutsCons_778, SEQ_15392_p, ...]","[FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, ...]"
3,4621,149,"[INET_N_753, INET_N_1731, INET_N_1131, INET_N_750, INET_N_853, INET_N_1302, INET_N_1819, INET_N_1760, SEQ_7164_p, INET_N_408, SEQ_592_p, INET_N_995, INET_N_1761, SEQ_7475_p, INET_N_874, SEQ_7649_p, INET_N_679, INET_N_1626, N_81_L_LMGN, SEQ_7667__L_LMGN, N_417_S_LMGN, SEQ_6673__M_LMGN, INET_N_640, SEQ_7701__L_LMGN, SEQ_7035__S_LMGN, SEQ_7031__S_LMGN, SEQ_7687__L_LMGN, SEQ_7681__L_LMGN, SEQ_7677__L_LMGN, SEQ_7675__L_LMGN, SEQ_7673__L_LMGN, SEQ_9695__M_LMGN, SEQ_9693__M_LMGN, SEQ_9691__M_LMGN, SEQ_9683__M_LMGN, SEQ_9671__M_LMGN, N_395_M_LMGN, SEQ_7905__M_LMGN, SEQ_7897__M_LMGN, SEQ_1717__L_LMGN, SEQ_1703__L_LMGN, SEQ_1699__L_LMGN, SEQ_11659__S_LMGN, INET_N_971, SEQ_5829__L_LMGN, N_1584_S_LMGN, SEQ_20333__S_LMGN, N_1573_S_LMGN, SEQ_1693__L_LMGN, N_80_L_LMGN, SEQ_5901__L_LMGN, N_141_L_LMGN, INET_N_863, SEQ_5151_p, SEQ_3901__S_LMGN, N_2689_M_LMGN, N_617_M_LMGN, N_78_L_LMGN, N_180_L_LMGN, N_247_L_LMGN, SEQ_589__M_LMGN, SEQ_5861__L_LMGN, SEQ_14521__S_LMGN, N_746_S_LMGN, INET_N_1567, N_989_S_LMGN, SEQ_5727__L_LMGN, SEQ_5735__L_LMGN, SEQ_7292_p, SEQ_5741__L_LMGN, INET_N_168, Stor_NonEU_37, N_231_L_LMGN, SEQ_4087__M_LMGN, SEQ_9753__M_LMGN, N_230_L_LMGN, SEQ_6369__L_LMGN, SEQ_3041__L_LMGN, N_77_L_LMGN, N_139_L_LMGN, SEQ_8901_p, SEQ_10180_p, NutsCons_1006, SEQ_9659__M_LMGN, SEQ_14094_p, N_1923_S_LMGN, N_1591_S_LMGN, INET_N_1817, N_415_S_LMGN, SEQ_14814_p, N_416_S_LMGN, NutsCons_1176, N_755_S_LMGN, N_756_S_LMGN, SEQ_15255_p, SEQ_15267_p, NutsCons_1065, N_1583_S_LMGN, N_1587_S_LMGN, SEQ_15741_p, ...]","[SK, SK, BY, BY, BY, BY, BY, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, PL, PL, PL, HU, HU, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, CZ, CZ, UA, PL, CZ, CZ, PL, PL, PL, BY, UA, UA, HU, HU, HU, UA, UA, UA, UA, PL, PL, PL, PL, PL, HU, HU, HU, HU, HU, BY, UA, PL, HU, UA, UA, UA, UA, UA, PL, UA, PL, PL, PL, PL, HU, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, PL, ...]"
4,4028,147,"[SEQ_6715__M_LMGN, SEQ_21469__S_LMGN, N_620_M_LMGN, SEQ_6717__M_LMGN, SEQ_4067__M_LMGN, INET_N_149, SEQ_4061__M_LMGN, SEQ_4059__M_LMGN, SEQ_4053__M_LMGN, SEQ_1989_p, NutsCons_1029, SEQ_1991_p, N_398_M_LMGN, SEQ_6613__M_LMGN, SEQ_6615__M_LMGN, N_446_S_LMGN, N_208_S_LMGN, N_445_S_LMGN, SEQ_7445__S_LMGN, SEQ_7865__M_LMGN, SEQ_7869__M_LMGN, SEQ_5116_p, SEQ_1305__M_LMGN, SEQ_1311__M_LMGN, NutsCons_1057, N_2501_S_LMGN, SEQ_6733__M_LMGN, N_502_M_LMGN, SEQ_1315__M_LMGN, N_131_M_LMGN, SEQ_5303__M_LMGN, N_498_M_LMGN, SEQ_5821__S_LMGN, N_209_S_LMGN, SEQ_7371__S_LMGN, N_421_M_LMGN, SEQ_4463__M_LMGN, SEQ_4467__M_LMGN, SEQ_4471__M_LMGN, SEQ_7231_p, SEQ_4477__M_LMGN, SEQ_3173__M_LMGN, SEQ_3171__M_LMGN, SEQ_9569__S_LMGN, SEQ_5323__M_LMGN, SEQ_5315__M_LMGN, SEQ_5313__M_LMGN, N_149_M_LMGN, SEQ_11559_p, SEQ_11562_p, N_150_M_LMGN, N_403_M_LMGN, SEQ_4483__M_LMGN, N_423_M_LMGN, SEQ_5273__M_LMGN, SEQ_12820_p, N_494_M_LMGN, SEQ_5287__M_LMGN, N_495_M_LMGN, N_438_S_LMGN, Stor_EU_14, SEQ_5643__M_LMGN, SEQ_5647__M_LMGN, SEQ_5651__M_LMGN, SEQ_6633__M_LMGN, SEQ_6639__M_LMGN, SEQ_6703__M_LMGN, SEQ_14404_p, N_589_S_LMGN, SEQ_14412_p, N_2680_S_LMGN, SEQ_14641_p, NutsCons_1022, NutsCons_1010, NutsCons_1051, SEQ_3033__S_LMGN, NutsCons_994, SEQ_15141_p, NutsCons_1421, SEQ_15309_p, N_1032_S_LMGN, NutsCons_1360, SEQ_15318_p, N_1313_S_LMGN, N_1323_S_LMGN, N_1330_S_LMGN, N_1580_S_LMGN, NutsCons_1017, N_1956_S_LMGN, N_1961_S_LMGN, NutsCons_48, NutsCons_988, NutsCons_157, N_2503_S_LMGN, N_2504_S_LMGN, N_2585_S_LMGN, NutsCons_1173, N_2599_S_LMGN, SEQ_16696_p, SEQ_16697_p, ...]","[HU, HU, HU, HU, RS, RS, RS, RS, RS, RS, RS, RS, RO, RO, RO, RO, RO, RO, RO, RS, RS, RO, HU, HU, RS, RS, RS, RS, HU, HU, HU, HU, RO, RO, RO, RO, RO, RO, RO, RO, RO, BG, BG, RS, RS, HU, HU, RO, RO, RO, RO, RO, RO, RO, RO, RO, RO, RO, RO, RO, HU, RO, RO, RO, RO, RO, RO, RS, RS, RS, RS, RO, RS, RS, RO, RO, RS, RO, RO, BG, RO, RO, RO, BG, RO, RO, RS, RS, RS, BG, BG, RS, BG, RS, RO, HU, HU, HU, RO, RO, ...]"
5,4113,143,"[SEQ_6881__L_LMGN, N_2631_S_LMGN, Stor_EU_108, SEQ_932_p, LKD_N_88, SEQ_18223_p, LKD_N_121, LKD_N_57, LKD_N_58, LKD_N_63, LKD_N_64, NutsCons_393, SEQ_18593_p, SEQ_18594_p, LKD_N_66, LKD_N_71, LKD_N_73, SEQ_18610_p, LKD_N_74, LKD_N_75, LKD_N_76, LKD_N_77, SEQ_18618_p, SEQ_18621_p, SEQ_18622_p, SEQ_18623_p, SEQ_18626_p, SEQ_18627_p, SEQ_18628_p, LKD_N_79, SEQ_18632_p, LKD_N_80, LKD_N_81, LKD_N_82, LKD_N_84, LKD_N_85, LKD_N_86, SEQ_18643_p, SEQ_18645_p, SEQ_18646_p, SEQ_18727_p, SEQ_18729_p, LKD_N_102, SEQ_18749_p, SEQ_18751_p, LKD_N_110, LKD_N_122, LKD_N_123, LKD_N_124, LKD_N_125, LKD_N_126, SEQ_18793_p, SEQ_18794_p, LKD_N_128, SEQ_18796_p, LKD_N_129, LKD_N_130, LKD_N_131, SEQ_18805_p, SEQ_18813_p, SEQ_18817_p, SEQ_19039_p, LKD_N_228, LKD_N_229, SEQ_19054_p, NutsCons_252, SEQ_19727_p, SEQ_19730_p, SEQ_19731_p, SEQ_19736_p, SEQ_19738_p, SEQ_19739_p, LKD_N_604, SEQ_19772_p, INET_N_1183, INET_N_732, INET_N_1809, Stor_EU_96, NutsCons_242, NutsCons_272, NutsCons_211, INET_N_1818, NutsCons_218, NutsCons_227, NutsCons_476, NutsCons_188, NutsCons_243, NutsCons_192, NutsCons_457, NutsCons_194, NutsCons_221, NutsCons_247, NutsCons_323, NutsCons_317, NutsCons_241, NutsCons_77, NutsCons_526, NutsCons_255, NutsCons_321, NutsCons_254, ...]","[AT, AT, AT, AT, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, AT, DE, DE, DE, DE, ...]"
6,4102,135,"[SEQ_6833__L_LMGN, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, SEQ_3460_p, INET_N_933, LKD_N_96, LKD_N_325, LKD_N_327, LKD_N_329, LKD_N_330, LKD_N_332, LKD_N_333, SEQ_19312_p, LKD_N_343, SEQ_19317_p, SEQ_19321_p, LKD_N_347, LKD_N_349, LKD_N_350, LKD_N_352, LKD_N_355, SEQ_19330_p, LKD_N_357, LKD_N_360, LKD_N_362, LKD_N_363, LKD_N_364, LKD_N_366, LKD_N_367, LKD_N_368, LKD_N_369, LKD_N_371, LKD_N_372, LKD_N_373, LKD_N_378, LKD_N_379, LKD_N_383, SEQ_19374_p, LKD_N_385, LKD_N_386, SEQ_19381_p, LKD_N_387, LKD_N_393, LKD_N_394, LKD_N_397, SEQ_19392_p, LKD_N_399, LKD_N_400, LKD_N_401, LKD_N_402, LKD_N_403, LKD_N_404, SEQ_19406_p, LKD_N_409, LKD_N_411, LKD_N_412, LKD_N_415, LKD_N_418, LKD_N_419, SEQ_19430_p, SEQ_19432_p, LKD_N_421, LKD_N_423, LKD_N_424, LKD_N_426, LKD_N_427, LKD_N_428, LKD_N_430, LKD_N_432, LKD_N_434, LKD_N_442, LKD_N_508, LKD_N_509, SEQ_19599_p, LKD_N_512, LKD_N_515, LKD_N_516, LKD_N_517, SEQ_19610_p, SEQ_19611_p, LKD_N_518, LKD_N_521, LKD_N_524, LKD_N_525, SEQ_19620_p, LKD_N_527, LKD_N_538, SEQ_19649_p, LKD_N_539, LKD_N_540, LKD_N_541, LKD_N_542, LKD_N_544, LKD_N_546, LKD_N_547, LKD_N_549, LKD_N_555, LKD_N_568, LKD_N_570, ...]","[PL, PL, PL, PL, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, ...]"
7,4182,131,"[N_271_NS_LMGN, N_281_NS_LMGN, N_53_NS_LMGN, N_52_NS_LMGN, N_541_M_LMGN, SEQ_4351__L_LMGN, N_278_NS_LMGN, N_279_NS_LMGN, N_238_NS_LMGN, N_187_NS_LMGN, N_2_NS_LMGN, N_10_NS_LMGN, SEQ_7797__M_LMGN, N_1624_S_LMGN, N_1623_S_LMGN, N_3_NS_LMGN, N_33_NS_LMGN, N_34_NS_LMGN, N_35_NS_LMGN, N_47_NS_LMGN, N_51_NS_LMGN, NO_N_62, N_73_NS_LMGN, N_77_NS_LMGN, N_82_NS_LMGN, N_83_NS_LMGN, N_84_NS_LMGN, N_87_NS_LMGN, SEQ_39355_p, N_88_NS_LMGN, N_92_NS_LMGN, N_136_NS_LMGN, N_125_NS_LMGN, N_128_NS_LMGN, N_137_NS_LMGN, N_147_NS_LMGN, N_22_NS_LMGN, N_181_NS_LMGN, N_250_NS_LMGN, Prod_49, N_218_NS_LMGN, N_315_NS_LMGN, N_249_NS_LMGN, N_256_NS_LMGN, N_220_NS_LMGN, N_214_NS_LMGN, N_277_NS_LMGN, N_280_NS_LMGN, N_244_NS_LMGN, N_42_NS_LMGN, N_297_NS_LMGN, N_298_NS_LMGN, N_305_NS_LMGN, N_317_NS_LMGN, N_312_NS_LMGN, N_326_NS_LMGN, Prod_46, N_196_NS_LMGN, Prod_47, Prod_40, Prod_95, Prod_58, Prod_45, Prod_59, Prod_55, Prod_39, Prod_48, Prod_68, N_157_NS_LMGN, Prod_8, Prod_56, Prod_73, Prod_86, Prod_79, Prod_87, Prod_84, Prod_54, Prod_70, Prod_66, Stor_EU_35, SEQ_1096_p, SEQ_1106_p, SEQ_3994_p, LKD_N_163, SEQ_18889_p, LKD_N_164, NO_N_17, N_55_NS_LMGN, NO_N_29, NO_N_12, NO_N_18, SEQ_20101_p, SEQ_20144_p, SEQ_20151_p, SEQ_20152_p, SEQ_20162_p, N_37_NS_LMGN, SEQ_20195_p, SEQ_20314_p, SEQ_20316_p, ...]","[XX, XX, XX, XX, FR, BE, XX, XX, XX, XX, XX, XX, BE, BE, BE, XX, XX, XX, XX, NO, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, BE, BE, GB, DE, DE, DE, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, ...]"
8,4683,131,"[INET_N_1071, INET_N_178, N_602_M_LMGN, SEQ_10253__M_LMGN, N_774_M_LMGN, N_2493_S_LMGN, SEQ_719__L_LMGN, N_1609_S_LMGN, SEQ_715__L_LMGN, SEQ_2200_p, SEQ_8093__L_LMGN, SEQ_8895__M_LMGN, SEQ_12953__S_LMGN, N_1759_S_LMGN, SEQ_8873__M_LMGN, SEQ_6169__S_LMGN, SEQ_709__L_LMGN, NutsCons_807, SEQ_7800_p, SEQ_7801_p, NutsCons_754, NutsCons_810, SEQ_6523__M_LMGN, SEQ_14154_p, SEQ_1132_p, SEQ_1625_p, SEQ_1885_p, LKD_N_0, LKD_N_1, LKD_N_2, LKD_N_4, LKD_N_5, LKD_N_6, LKD_N_8, LKD_N_9, LKD_N_10, LKD_N_11, LKD_N_12, LKD_N_13, LKD_N_14, LKD_N_15, LKD_N_16, SEQ_18486_p, LKD_N_40, LKD_N_41, LKD_N_42, LKD_N_43, LKD_N_44, SEQ_18569_p, LKD_N_116, LKD_N_117, LKD_N_201, LKD_N_233, LKD_N_234, LKD_N_235, LKD_N_237, LKD_N_238, LKD_N_256, LKD_N_257, LKD_N_258, LKD_N_259, LKD_N_260, LKD_N_261, SEQ_19132_p, LKD_N_262, LKD_N_263, LKD_N_265, LKD_N_267, LKD_N_268, NutsCons_537, SEQ_19147_p, LKD_N_271, NutsCons_438, LKD_N_273, LKD_N_274, LKD_N_282, SEQ_19163_p, LKD_N_284, LKD_N_285, LKD_N_286, LKD_N_287, SEQ_19168_p, LKD_N_292, LKD_N_293, LKD_N_294, LKD_N_296, LKD_N_297, LKD_N_298, INET_N_1847, LKD_N_302, LKD_N_303, NutsCons_534, LKD_N_305, LKD_N_308, SEQ_19856_p, INET_N_481, INET_N_1821, LKD_N_309, SEQ_18333_p, INET_N_331, ...]","[NL, BE, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, BE, NL, NL, NL, NL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, NL, NL, DE, DE, NL, NL, ...]"
9,4971,120,"[N_39_NS_LMGN, N_152_NS_LMGN, N_205_NS_LMGN, N_202_NS_LMGN, N_299_NS_LMGN, SEQ_459__S_LMGN, SEQ_10293__M_LMGN, N_1757_S_LMGN, SEQ_10865__M_LMGN, SEQ_10867__M_LMGN, SEQ_12931__S_LMGN, SEQ_707__L_LMGN, SEQ_4119__S_LMGN, N_731_M_LMGN, SEQ_4131__S_LMGN, SEQ_5823__M_LMGN, SEQ_2054_p, SEQ_689__L_LMGN, SEQ_691__L_LMGN, SEQ_697__L_LMGN, SEQ_10291__M_LMGN, N_881_M_LMGN, SEQ_2348_p, N_730_M_LMGN, N_543_M_LMGN, N_2533_S_LMGN, SEQ_10869__M_LMGN, N_522_M_LMGN, SEQ_5571__M_LMGN, N_1377_S_LMGN, SEQ_6172_p, N_299_S_LMGN, Stor_EU_117, SEQ_4115__S_LMGN, SEQ_10271__M_LMGN, N_80_S_LMGN, SEQ_12852_p, SEQ_10277__M_LMGN, SEQ_14156_p, SEQ_14157_p, N_30_S_LMGN, N_32_S_LMGN, N_33_S_LMGN, NutsCons_740, NutsCons_835, N_153_NS_LMGN, N_151_NS_LMGN, N_14_NS_LMGN, N_40_NS_LMGN, N_204_NS_LMGN, N_126_NS_LMGN, N_150_NS_LMGN, NutsCons_769, SEQ_18573_p, LKD_N_103, LKD_N_105, LKD_N_167, LKD_N_168, LKD_N_169, LKD_N_170, SEQ_18973_p, LKD_N_199, LKD_N_200, Stor_EU_88, LKD_N_207, LKD_N_208, SEQ_19027_p, LKD_N_224, SEQ_19031_p, SEQ_19036_p, NutsCons_18, LKD_N_314, LKD_N_315, SEQ_19247_p, LKD_N_317, SEQ_19293_p, LKD_N_322, LKD_N_680, LKD_N_681, LKD_N_683, LKD_N_685, LKD_N_687, LKD_N_688, LKD_N_705, SEQ_20381_p, N_27_L_LMGN, INET_N_37, INET_N_761, INET_N_1344, Stor_EU_101, NutsCons_824, NutsCons_63, NutsCons_31, NutsCons_806, NutsCons_801, SEQ_7135_p, NutsCons_805, NutsCons_75, NutsCons_739, INET_N_1112, ...]","[XX, XX, XX, XX, XX, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, XX, NL, NL, NL, NL, NL, NL, NL, NL, NL, XX, XX, NL, NL, XX, XX, XX, XX, XX, XX, XX, NL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, XX, XX, NL, DE, DE, DE, NL, DE, DE, NL, NL, NL, NL, DE, NL, DE, ...]"


In [None]:
clusters_df.tail(10)

Unnamed: 0,communityId,number_of_nodes,ids,country_codes
224,4662,1,[INET_N_118],[DE]
225,4675,1,[INET_N_1287],[IT]
226,4685,1,[INET_N_214],[DE]
227,4701,1,[INET_N_636],[DE]
228,4710,1,[INET_N_947],[DE]
229,4740,1,[INET_N_270],[GB]
230,4922,1,[INET_N_900],[SK]
231,4929,1,[NutsCons_1515],[XX]
232,4956,1,[NutsCons_597],[XX]
233,4973,1,[INET_N_522],[XX]


## Path Finding

In [None]:
query = """
CALL gds.graph.project(
    'routes-weighted',
    'Node',
    'PIPE',
        {
            relationshipProperties: 'length_km'
        }
)
"""

gds.run_cypher(query)

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Node': {'label': 'Node', 'properties': {}}}","{'PIPE': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': 'PIPE', 'properties': {'length_km': {'defaultValue': None, 'property': 'length_km', 'aggregation': 'DEFAULT'}}}}",routes-weighted,5009,6526,495


In [None]:
query = """
MATCH (source:Node {id: 'INET_N_856'}), (target:Node {id: 'NutsCons_1003'})
CALL gds.shortestPath.dijkstra.stream('routes-weighted', {
    sourceNode: source,
    targetNode: target,
    relationshipWeightProperty: 'length_km'
})
YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
RETURN
    index,
    gds.util.asNode(sourceNode).id AS sourceNodeName,
    gds.util.asNode(targetNode).id AS targetNodeName,
    totalCost as totalDistance,
    [nodeId IN nodeIds | gds.util.asNode(nodeId).id] AS nodeIDs,
    costs as distances
ORDER BY index
"""
gds.run_cypher(query)

Unnamed: 0,index,sourceNodeName,targetNodeName,totalDistance,nodeIDs,distances
0,0,INET_N_856,NutsCons_1003,649.707706,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, SEQ_9149__S_LMGN, SEQ_9139__S_LMGN, SEQ_9127__S_LMGN, N_486_M_LMGN, SEQ_7715__M_LMGN, SEQ_7723__M_LMGN, SEQ_7731__M_LMGN, N_688_M_LMGN, SEQ_7739__L_LMGN, SEQ_7743__L_LMGN, SEQ_7747__L_LMGN, SEQ_7749__L_LMGN, NutsCons_1003]","[0.0, 55.420951, 110.841205, 220.16349100000002, 253.03525000000002, 260.771973, 289.15599, 317.347467, 352.397237, 372.523689, 398.64748099999997, 448.01613399999997, 487.93127499999997, 540.4601309999999, 569.220877, 597.2742549999999, 630.323322, 638.421699, 649.707706]"
