## European Gas Pipeline Knowledge Graph

In [1]:
!pip install -q neo4j

In [2]:
from neo4j import GraphDatabase
import time
import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', 0)

In [3]:
class Neo4jConnection:
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)

    def close(self):
        if self.__driver is not None:
            self.__driver.close()

    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try:
            session = self.__driver.session(database=db) if db is not None else self.__driver.session()
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally:
            if session is not None:
                session.close()
        return response

In [4]:
uri  = 'bolt://localhost:7687'
pwd  = 'graph4va'
user = 'neo4j'
conn = Neo4jConnection(uri=uri, user=user , pwd=pwd)

In [5]:
# creating constraints on graph

conn.query("CREATE CONSTRAINT BorderPoints IF NOT EXISTS ON (n:BorderPoint) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT Compressors IF NOT EXISTS ON (n:Compressor) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT Consumers IF NOT EXISTS ON (n:Consumer) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT LNGs IF NOT EXISTS ON (n:LNG) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT Nodes IF NOT EXISTS ON (n:Node) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT PowerPlants IF NOT EXISTS ON (n:PowerPlant) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT Productions IF NOT EXISTS ON (n:Production) ASSERT n.id IS UNIQUE;")
conn.query("CREATE CONSTRAINT Storages IF NOT EXISTS ON (n:Storage) ASSERT n.id IS UNIQUE;")

[]

## Component Creation

In [6]:
# border points

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_BorderPoints.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(b:BorderPoint {id:id})
ON CREATE SET b.name          = name,
              b.country_code  = country_code,
              b.source_id     = source_ids[0],
              b.node_id       = node_ids[0],
              b.loc           = loc,
              b              += params
""")

[]

In [7]:
# compressors

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_Compressors.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(c:Compressor {id:id})
ON CREATE SET c.name          = name,
              c.country_code  = country_code,
              c.source_id     = source_ids[0],
              c.node_id       = node_ids[0],
              c.loc           = loc,
              c              += params
""")

[]

In [8]:
# consumers

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_Consumers.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(c:Consumer {id:id})
ON CREATE SET c.name          = name,
              c.country_code  = country_code,
              c.source_id     = source_ids[0],
              c.node_id       = node_ids[0],
              c.loc           = loc,
              c              += params
""")

[]

In [9]:
# LNGs

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_LNGs.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(l:LNG {id:id})
ON CREATE SET l.name          = name,
              l.country_code  = country_code,
              l.source_id     = source_ids,
              l.node_id       = node_ids[0],
              l.loc           = loc,
              l              += params
""")

[]

In [10]:
# storages

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_Storages.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(s:Storage {id:id})
ON CREATE SET s.name          = name,
              s.country_code  = country_code,
              s.source_id     = source_ids,
              s.node_id       = node_ids[0],
              s.loc           = loc,
              s              += params
""")

[]

In [11]:
# productions

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_Productions.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(p:Production {id:id})
ON CREATE SET p.name          = name,
              p.country_code  = country_code,
              p.source_id     = source_ids[0],
              p.node_id       = node_ids[0],
              p.loc           = loc,
              p              += params
""")

[]

In [12]:
# power plants

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_PowerPlants.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(pp:PowerPlant {id:id})
ON CREATE SET pp.name          = name,
              pp.country_code  = country_code,
              pp.source_id     = source_ids,
              pp.node_id       = node_ids[0],
              pp.loc           = loc,
              pp              += params
""")

[]

In [13]:
# nodes

conn.query("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_Nodes.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     replace(row.name, '"','') as name,
     row.country_code as country_code,
     apoc.convert.fromJsonList(row.source_id) as source_ids,
     apoc.convert.fromJsonList(row.node_id) as node_ids,
     point({latitude:toFloat(row.lat), longitude:toFloat(row.long)}) as loc,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MERGE(n:Node {id:id})
ON CREATE SET n.name          = name,
              n.country_code  = country_code,
              n.source_id     = source_ids[0],
              n.node_id       = node_ids[0],
              n.loc           = loc,
              n              += params
""")

[]

In [14]:
# pipe segments

conn.query("""
CALL apoc.periodic.iterate("
      LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/iamvarol/blogposts/main/medium/europe_gas_network/data/IGGIELGNC3_PipeSegments.csv' as row
FIELDTERMINATOR ';'
WITH row.id as id,
     row.name as name,
     apoc.convert.fromJsonList(row.country_code) as countries,
     apoc.convert.fromJsonList(row.node_id) as nodes,
     apoc.convert.fromJsonMap(replace(row.param, 'None', 'null')) as params
MATCH (start_p:Node {id:nodes[0]})
MATCH (end_p:Node {id:nodes[1]})
RETURN start_p, end_p, id, name, params, countries
","
      MERGE (start_p)-[pipe:PIPE {id:id}]->(end_p)
      ON CREATE SET pipe.name = name,
                    pipe += params
", {batchSize:5, parallel:true}
)
""")

[<Record batches=1306 total=6526 timeTaken=0 committedOperations=6516 failedOperations=10 failedBatches=2 retries=0 errorMessages={"ForsetiClient[transactionId=1524, clientId=2] can't acquire ExclusiveLock{owner=ForsetiClient[transactionId=1523, clientId=6]} on NODE_RELATIONSHIP_GROUP_DELETE(2556), because holders of that lock are waiting for ForsetiClient[transactionId=1524, clientId=2].\n Wait list:ExclusiveLock[\r\nClient[1523] waits for [ForsetiClient[transactionId=1524, clientId=2]]]": 1, "ForsetiClient[transactionId=2194, clientId=3] can't acquire ExclusiveLock{owner=ForsetiClient[transactionId=2190, clientId=5]} on NODE_RELATIONSHIP_GROUP_DELETE(5032), because holders of that lock are waiting for ForsetiClient[transactionId=2194, clientId=3].\n Wait list:ExclusiveLock[\r\nClient[2190] waits for [ForsetiClient[transactionId=2194, clientId=3],ForsetiClient[transactionId=-1, clientId=6]]]": 1} batch={'total': 1306, 'committed': 1304, 'failed': 2, 'errors': {"org.neo4j.kernel.Deadlo

## EDA

In [15]:
q = """
CALL db.labels() YIELD label
CALL apoc.cypher.run('MATCH (:`'+label+'`) RETURN count(*) as count',{}) YIELD value
RETURN label as nodes, value.count as nodeCount
ORDER BY nodeCount DESC
"""

df = pd.DataFrame([dict(_) for _ in conn.query(q)])
df

Unnamed: 0,nodes,nodeCount
0,Node,5009
1,Consumer,1357
2,PowerPlant,310
3,Storage,297
4,Compressor,248
5,BorderPoint,109
6,Production,104
7,LNG,32


In [16]:
q = '''
CALL db.relationshipTypes() YIELD relationshipType as type
CALL apoc.cypher.run('MATCH ()-[:`'+type+'`]->() RETURN count(*) as count',{}) YIELD value
RETURN type as relationship, value.count as relationshipCount
'''
df = pd.DataFrame([dict(_) for _ in conn.query(q)])
df

Unnamed: 0,relationship,relationshipCount
0,PIPE,6516


In [17]:
q = '''
CALL apoc.meta.stats()
'''
df = pd.DataFrame([dict(_) for _ in conn.query(q)])
df

Unnamed: 0,labelCount,relTypeCount,propertyKeyCount,nodeCount,relCount,labels,relTypes,relTypesCount,stats
0,8,1,83,7466,6516,"{'BorderPoint': 109, 'Storage': 297, 'Consumer': 1357, 'LNG': 32, 'PowerPlant': 310, 'Node': 5009, 'Production': 104, 'Compressor': 248}","{'()-[:PIPE]->(:Node)': 6516, '()-[:PIPE]->()': 6516, '(:Node)-[:PIPE]->()': 6516}",{'PIPE': 6516},"{'relTypeCount': 1, 'propertyKeyCount': 83, 'labelCount': 8, 'nodeCount': 7466, 'relCount': 6516, 'labels': {'BorderPoint': 109, 'Storage': 297, 'Consumer': 1357, 'LNG': 32, 'PowerPlant': 310, 'Node': 5009, 'Production': 104, 'Compressor': 248}, 'relTypes': {'()-[:PIPE]->(:Node)': 6516, '()-[:PIPE]->()': 6516, '(:Node)-[:PIPE]->()': 6516}}"


In [18]:
query="""
MATCH p=(start:Node)-[:PIPE*20]->(next)
WHERE not ((next)-[:PIPE]->()) AND start.id="INET_N_856"
RETURN DISTINCT [t in nodes(p) | t.id] as nodesPath, [t in nodes(p) | t.country_code] as countryPath
"""
df = pd.DataFrame([dict(_) for _ in conn.query(query)])
df

Unnamed: 0,nodesPath,countryPath
0,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, SEQ_5085_p, INET_N_1805, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, INET_N_933, SEQ_30_p, INET_N_933, LKD_N_96, LKD_N_516, LKD_N_517, LKD_N_518, LKD_N_549, LKD_N_518, LKD_N_521, SEQ_19620_p, INET_N_1637]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, DE, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE]"
1,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, SEQ_5085_p, INET_N_1805, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, INET_N_933, SEQ_30_p, INET_N_933, LKD_N_96, LKD_N_516, LKD_N_517, LKD_N_518, LKD_N_549, LKD_N_518, LKD_N_521, LKD_N_524, N_2287_S_LMGN]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, DE, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE]"
2,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, SEQ_5085_p, INET_N_1805, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, INET_N_933, SEQ_30_p, INET_N_933, LKD_N_96, LKD_N_516, LKD_N_517, LKD_N_518, LKD_N_549, LKD_N_518, LKD_N_521, LKD_N_530, INET_N_1300]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, DE, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE]"
3,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, SEQ_5085_p, INET_N_1805, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, INET_N_933, SEQ_30_p, INET_N_933, LKD_N_96, LKD_N_516, LKD_N_517, LKD_N_518, LKD_N_549, LKD_N_518, LKD_N_521, LKD_N_530, NutsCons_447]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, DE, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE]"
4,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, SEQ_5085_p, INET_N_1805, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, INET_N_933, SEQ_30_p, INET_N_933, LKD_N_96, LKD_N_516, LKD_N_517, LKD_N_518, LKD_N_521, LKD_N_524, LKD_N_525, NutsCons_435, INET_N_173]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, DE, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE]"
5,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, SEQ_5085_p, INET_N_1805, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, INET_N_933, SEQ_30_p, INET_N_933, LKD_N_96, LKD_N_516, LKD_N_517, LKD_N_518, LKD_N_521, LKD_N_530, LKD_N_535, SEQ_19639_p, NutsCons_472]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, DE, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE]"
6,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, SEQ_5085_p, INET_N_1805, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, INET_N_933, SEQ_30_p, INET_N_933, LKD_N_96, LKD_N_516, LKD_N_517, LKD_N_521, SEQ_19620_p, LKD_N_527, LKD_N_538, SEQ_19649_p, NutsCons_349]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, DE, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE]"
7,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, SEQ_5085_p, INET_N_1805, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, INET_N_933, SEQ_30_p, INET_N_933, LKD_N_96, LKD_N_516, LKD_N_517, LKD_N_521, SEQ_19620_p, LKD_N_527, LKD_N_531, LKD_N_530, INET_N_1300]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, DE, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE]"
8,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, SEQ_5085_p, INET_N_1805, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, INET_N_933, SEQ_30_p, INET_N_933, LKD_N_96, LKD_N_516, LKD_N_517, LKD_N_521, SEQ_19620_p, LKD_N_527, LKD_N_531, LKD_N_530, NutsCons_447]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, DE, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE]"
9,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, SEQ_5085_p, INET_N_1805, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, INET_N_933, SEQ_30_p, INET_N_933, LKD_N_96, SEQ_18708_p, SEQ_18711_p, LKD_N_97, SEQ_18724_p, SEQ_18726_p, SEQ_18727_p, SEQ_18729_p, INET_N_1576]","[PL, PL, PL, PL, PL, PL, PL, PL, PL, DE, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE]"


## Graph Data Science

In [21]:
!pip install -q graphdatascience

In [24]:
import pandas as pd
from graphdatascience import GraphDataScience

uri  = 'bolt://localhost:7687'
pwd  = 'graph4va'
user = 'neo4j'
gds = GraphDataScience(uri, auth=("neo4j", pwd))

In [50]:
# dropping an existing graph
query = "CALL gds.graph.drop('pipelines') YIELD graphName;"
gds.run_cypher(query)

In [26]:
query = """CALL gds.graph.project(
                'pipelines',
                'Node',
                'PIPE'
            )
            YIELD
                graphName, nodeProjection, nodeCount, relationshipProjection, relationshipCount
"""

gds.run_cypher(query)

Unnamed: 0,graphName,nodeProjection,nodeCount,relationshipProjection,relationshipCount
0,pipelines,"{'Node': {'label': 'Node', 'properties': {}}}",5009,"{'PIPE': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': 'PIPE', 'properties': {}}}",6516


In [27]:
G, res = gds.graph.project(
    'pipes',
    'Node',
    "PIPE"
)
type(res)

pandas.core.series.Series

In [28]:
print(res)

nodeProjection            {'Node': {'label': 'Node', 'properties': {}}}                                                   
relationshipProjection    {'PIPE': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': 'PIPE', 'properties': {}}}
graphName                 pipes                                                                                           
nodeCount                 5009                                                                                            
relationshipCount         6516                                                                                            
projectMillis             28                                                                                              
Name: 0, dtype: object


In [29]:
G.node_count(), G.relationship_count()

(5009, 6516)

In [30]:
G.degree_distribution()

p99     5.000000 
min     0.000000 
max     12.000000
mean    1.300858 
p90     3.000000 
p50     1.000000 
p999    7.000000 
p95     3.000000 
p75     2.000000 
dtype: float64

In [31]:
res = gds.pageRank.stream(G)
res

Unnamed: 0,nodeId,score
0,2457,0.331688
1,2458,0.431934
2,2459,0.333572
3,2460,0.597695
4,2461,0.515261
...,...,...
5004,7461,0.411465
5005,7462,0.319784
5006,7463,0.267938
5007,7464,0.213750


In [32]:
# top 10 by page rank
res.sort_values(by='score', ascending=False)[:10]

Unnamed: 0,nodeId,score
2848,5305,4.861048
2852,5309,4.331384
514,2971,4.225036
1813,4270,3.832889
1708,4165,3.320917
1769,4226,3.236406
1776,4233,3.128835
1709,4166,2.935067
2779,5236,2.765317
2112,4569,2.691821


In [33]:
query = """
MATCH (n:Node)
WHERE ID(n) = 5305
RETURN n.id
"""
df = pd.DataFrame([dict(_) for _ in conn.query(query)])
df

Failed to write data to connection IPv4Address(('localhost', 7687)) (ResolvedIPv4Address(('127.0.0.1', 7687)))


Query failed: Failed to write data to connection IPv4Address(('localhost', 7687)) (ResolvedIPv4Address(('127.0.0.1', 7687)))


TypeError: 'NoneType' object is not iterable

In [34]:
# Centrality determined by node degree
query = """
MATCH (n:Node)
WITH n, SIZE((n)-[:PIPE]-(:Node)) AS degree
RETURN n.id AS id, n.country_code AS node_code, degree
ORDER BY degree DESC
"""
df = pd.DataFrame([dict(_) for _ in conn.query(query)])
df

Unnamed: 0,id,node_code,degree
0,LKD_N_38,DE,14
1,N_144_L_LMGN,GB,13
2,N_72_L_LMGN,RU,11
3,INET_N_1679,UA,11
4,LKD_N_363,DE,11
...,...,...,...
5004,INET_N_1408,IT,1
5005,NutsCons_1362,TR,1
5006,Prod_51,XX,1
5007,NutsCons_714,EE,1


In [35]:
# measure the nodes by indegree count
query = """MATCH (n:Node)
WITH n, SIZE((n)<-[:PIPE]-(:Node)) AS indegree
RETURN n.id AS id, n.country_code AS node_code, indegree
ORDER BY indegree DESC"""

df = pd.DataFrame([dict(_) for _ in conn.query(query)])
df

Unnamed: 0,id,node_code,indegree
0,INET_N_1071,NL,7
1,INET_N_883,UA,7
2,LKD_N_40,DE,7
3,LKD_N_77,DE,6
4,LKD_N_132,DE,6
...,...,...,...
5004,N_2495_S_LMGN,FR,0
5005,INET_N_863,UA,0
5006,SEQ_1305__M_LMGN,HU,0
5007,N_671_S_LMGN,PT,0


In [36]:
# writing the page ranks of the nodes
q= """CALL gds.pageRank.write('pipes',
    {
        writeProperty: 'pagerank'
    }
)
YIELD nodePropertiesWritten, ranIterations"""
gds.run_cypher(q)

Unnamed: 0,nodePropertiesWritten,ranIterations
0,5009,20


In [37]:
q = """MATCH (n:Node)<-[r:PIPE]-(:Node)
RETURN n.id AS id, n.country_code AS countryCode, round(n.pagerank,2) AS pagerank, round(sum(r.max_cap_M_m3_per_d),2) as maximumAnnualGasVolume
ORDER BY pagerank DESC, id ASC
LIMIT 10"""
df = pd.DataFrame([dict(_) for _ in conn.query(q)])
df

Unnamed: 0,id,countryCode,pagerank,maximumAnnualGasVolume
0,LKD_N_40,DE,4.86,195.52
1,LKD_N_44,DE,4.33,54.79
2,N_183_M_LMGN,MD,4.23,82.19
3,SEQ_1813__M_LMGN,MD,3.83,54.79
4,SEQ_7563__L_LMGN,XX,3.32,82.19
5,N_32_M_LMGN,RU,3.24,54.79
6,N_85_M_LMGN,RU,3.13,54.79
7,SEQ_7577__L_LMGN,XX,2.94,27.4
8,SEQ_2409_p,AZ,2.77,54.79
9,INET_N_408,UA,2.69,164.38


## Degree Centrality

In [38]:
query = """
CALL gds.graph.project(
  'degreeCentralityPipes',
  'Node',
  {
    PIPE: {
      orientation: "UNDIRECTED"
    }
  }
)
"""
gds.run_cypher(query)

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Node': {'label': 'Node', 'properties': {}}}","{'PIPE': {'orientation': 'UNDIRECTED', 'aggregation': 'DEFAULT', 'type': 'PIPE', 'properties': {}}}",degreeCentralityPipes,5009,13032,15


In [39]:
query = """
CALL gds.degree.stream('degreeCentralityPipes')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).id AS id, score AS degree
ORDER BY degree DESC, id ASC
LIMIT 10
"""
gds.run_cypher(query)

Unnamed: 0,id,degree
0,LKD_N_38,14.0
1,N_144_L_LMGN,13.0
2,INET_N_1679,11.0
3,INET_N_408,11.0
4,LKD_N_147,11.0
5,LKD_N_363,11.0
6,N_72_L_LMGN,11.0
7,INET_N_1071,10.0
8,INET_N_1118,10.0
9,INET_N_1579,10.0


## Betweenness Centrality

In [40]:
query = "CALL gds.graph.project('betweennessPipes', 'Node', 'PIPE')"
gds.run_cypher(query)

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Node': {'label': 'Node', 'properties': {}}}","{'PIPE': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': 'PIPE', 'properties': {}}}",betweennessPipes,5009,6516,10


In [41]:
query = """
CALL gds.betweenness.write.estimate('betweennessPipes', { writeProperty: 'betweenness' })
YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory
"""

gds.run_cypher(query)

Unnamed: 0,nodeCount,relationshipCount,bytesMin,bytesMax,requiredMemory
0,5009,6516,1404224,1404224,1371 KiB


In [42]:
query = """
CALL gds.betweenness.stats('betweennessPipes')
YIELD centralityDistribution
RETURN centralityDistribution.min AS minimumScore, centralityDistribution.mean AS meanScore, centralityDistribution.max AS maxScore
"""
gds.run_cypher(query)

Unnamed: 0,minimumScore,meanScore,maxScore
0,0.0,17748.48583,863607.999992


In [43]:
query = """
CALL gds.betweenness.stream('betweennessPipes')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).id AS id, round(score,2) as score
ORDER BY score DESC, id ASC
LIMIT 10
"""

gds.run_cypher(query)

Unnamed: 0,id,score
0,INET_N_984,863605.26
1,LKD_N_38,606942.11
2,LKD_N_492,600710.37
3,INET_N_229,597845.66
4,SEQ_19574_p,597400.37
5,SEQ_19575_p,596701.37
6,LKD_N_491,596621.84
7,LKD_N_493,596002.37
8,SEQ_719__L_LMGN,594105.73
9,LKD_N_165,594088.6


In [44]:
query = """
CALL gds.betweenness.stream('betweennessPipes')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).id AS id, round(score,2) as score
ORDER BY score DESC, id ASC
"""

betweenness_df = gds.run_cypher(query)

In [45]:
betweenness_df.head(20)

Unnamed: 0,id,score
0,INET_N_984,863605.26
1,LKD_N_38,606942.11
2,LKD_N_492,600710.37
3,INET_N_229,597845.66
4,SEQ_19574_p,597400.37
5,SEQ_19575_p,596701.37
6,LKD_N_491,596621.84
7,LKD_N_493,596002.37
8,SEQ_719__L_LMGN,594105.73
9,LKD_N_165,594088.6


## Louvain

In [46]:
query = """
CALL gds.louvain.stream('pipes')
YIELD nodeId, communityId
RETURN
	communityId,
    SIZE(COLLECT(gds.util.asNode(nodeId).id)) AS number_of_nodes,
	COLLECT(gds.util.asNode(nodeId).id) AS ids,
    COLLECT(gds.util.asNode(nodeId).country_code) AS country_codes
ORDER BY number_of_nodes DESC, communityId;
"""
clusters_df = gds.run_cypher(query)
clusters_df.head(10)

Unnamed: 0,communityId,number_of_nodes,ids,country_codes
0,3480,195,"[INET_N_817, SEQ_7777_p, INET_N_755, INET_N_929, SEQ_667_p, SEQ_7802_p, SEQ_678_p, SEQ_7811_p, SEQ_7829_p, SEQ_7859_p, SEQ_7882_p, SEQ_702_p, INET_N_1490, INET_N_1689, INET_N_1188, SEQ_8530_p, INET_N_1690, SEQ_8615_p, INET_N_979, SEQ_8671_p, INET_N_402, SEQ_8753_p, SEQ_831_p, SEQ_8881_p, INET_N_1691, INET_N_885, INET_N_1814, INET_N_1555, SEQ_9854_p, SEQ_9876_p, SEQ_9896_p, INET_N_1280, INET_N_839, INET_N_64, INET_N_883, INET_N_139, SEQ_11191_p, INET_N_717, SEQ_11233_p, INET_N_1758, SEQ_1040_p, INET_N_780, SEQ_11307_p, SEQ_1051_p, SEQ_11529_p, INET_N_1598, SEQ_11608_p, INET_N_1579, INET_N_1187, INET_N_1589, SEQ_1160_p, SEQ_12580_p, N_168_L_LMGN, N_173_L_LMGN, SEQ_2829__L_LMGN, N_126_L_LMGN, SEQ_5383__M_LMGN, N_183_M_LMGN, N_10_L_LMGN, SEQ_3791__L_LMGN, SEQ_3793__L_LMGN, N_1604_S_LMGN, SEQ_1319__M_LMGN, SEQ_4715_p, N_312_M_LMGN, SEQ_5601__M_LMGN, SEQ_5597__M_LMGN, SEQ_7047__L_LMGN, SEQ_1797__M_LMGN, INET_N_742, SEQ_15043__S_LMGN, N_393_S_LMGN, N_711_S_LMGN, N_175_L_LMGN, SEQ_1229__L_LMGN, SEQ_3975__L_LMGN, Prod_3, N_477_M_LMGN, N_145_M_LMGN, SEQ_7009__L_LMGN, SEQ_9097__M_LMGN, N_2585_M_LMGN, SEQ_1235__L_LMGN, SEQ_5169__M_LMGN, SEQ_1237__L_LMGN, SEQ_4395__M_LMGN, SEQ_7005__L_LMGN, SEQ_7001__L_LMGN, SEQ_6995__L_LMGN, SEQ_7781_p, N_72_L_LMGN, INET_N_893, SEQ_3079__M_LMGN, N_472_M_LMGN, N_594_M_LMGN, SEQ_1255__L_LMGN, SEQ_9386_p, SEQ_3813__L_LMGN, SEQ_3817__L_LMGN, N_182_L_LMGN, ...]","[UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, MD, MD, BY, BY, BY, BY, UA, UA, UA, UA, UA, MD, MD, RO, RO, UA, UA, UA, BY, UA, UA, UA, UA, RO, UA, UA, BY, BY, BY, UA, RO, RO, RO, RO, RU, RU, UA, RU, RU, BY, BY, BY, BY, RU, ...]"
1,4496,186,"[N_61_L_LMGN, N_163_L_LMGN, N_150_L_LMGN, SEQ_3245__L_LMGN, N_98_L_LMGN, SEQ_2157__L_LMGN, SEQ_8233__M_LMGN, N_1302_S_LMGN, SEQ_9997__M_LMGN, SEQ_1900_p, N_811_M_LMGN, SEQ_9973__M_LMGN, SEQ_9971__M_LMGN, N_2320_S_LMGN, SEQ_12551__S_LMGN, SEQ_8005__L_LMGN, SEQ_7999__L_LMGN, N_117_L_LMGN, SEQ_2599__L_LMGN, SEQ_2366_p, SEQ_17843__S_LMGN, N_2120_S_LMGN, SEQ_2582_p, SEQ_2586_p, SEQ_2593__L_LMGN, SEQ_2597__L_LMGN, Stor_EU_33, SEQ_2153__L_LMGN, N_97_L_LMGN, SEQ_21287__S_LMGN, SEQ_2577__L_LMGN, SEQ_2575__L_LMGN, SEQ_3323__M_LMGN, N_341_M_LMGN, SEQ_4533__M_LMGN, N_434_M_LMGN, SEQ_3713__L_LMGN, SEQ_3707__L_LMGN, SEQ_3705__L_LMGN, N_145_L_LMGN, SEQ_3189__L_LMGN, N_146_L_LMGN, SEQ_4207__L_LMGN, SEQ_4209__L_LMGN, SEQ_4215__L_LMGN, SEQ_4219__L_LMGN, SEQ_4223__L_LMGN, N_56_L_LMGN, SEQ_1295__L_LMGN, SEQ_9995__M_LMGN, SEQ_1365__L_LMGN, SEQ_4975_p, SEQ_7017__M_LMGN, N_118_L_LMGN, N_1348_S_LMGN, N_2238_S_LMGN, N_2237_S_LMGN, N_2239_S_LMGN, SEQ_19623__S_LMGN, N_186_L_LMGN, N_57_L_LMGN, SEQ_1317__L_LMGN, SEQ_1311__L_LMGN, N_147_L_LMGN, SEQ_6599__L_LMGN, SEQ_5273__L_LMGN, SEQ_5285__L_LMGN, SEQ_1291__L_LMGN, SEQ_1387__L_LMGN, N_1366_S_LMGN, N_1352_S_LMGN, N_1354_S_LMGN, SEQ_1361__L_LMGN, N_60_L_LMGN, SEQ_6514_p, N_210_L_LMGN, SEQ_7497__L_LMGN, SEQ_7493__L_LMGN, SEQ_6524_p, SEQ_15277__S_LMGN, N_1729_S_LMGN, N_2000_S_LMGN, N_2001_S_LMGN, SEQ_4371__L_LMGN, SEQ_4367__L_LMGN, SEQ_1371__L_LMGN, SEQ_1375__L_LMGN, SEQ_6895_p, SEQ_10825__M_LMGN, N_1158_S_LMGN, N_1356_S_LMGN, N_1855_S_LMGN, SEQ_8134_p, SEQ_8140_p, SEQ_1307__L_LMGN, SEQ_8334_p, SEQ_8225__M_LMGN, SEQ_3335__M_LMGN, N_342_M_LMGN, N_1954_S_LMGN, ...]","[FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, ...]"
2,4971,175,"[INET_N_1071, N_271_NS_LMGN, N_281_NS_LMGN, N_53_NS_LMGN, N_52_NS_LMGN, N_39_NS_LMGN, N_152_NS_LMGN, N_205_NS_LMGN, N_202_NS_LMGN, N_299_NS_LMGN, SEQ_459__S_LMGN, INET_N_178, SEQ_10293__M_LMGN, N_1757_S_LMGN, SEQ_10865__M_LMGN, SEQ_10867__M_LMGN, SEQ_12931__S_LMGN, SEQ_707__L_LMGN, SEQ_4119__S_LMGN, N_731_M_LMGN, SEQ_4131__S_LMGN, SEQ_5823__M_LMGN, SEQ_2054_p, SEQ_689__L_LMGN, SEQ_691__L_LMGN, SEQ_697__L_LMGN, N_602_M_LMGN, SEQ_10253__M_LMGN, N_774_M_LMGN, N_2493_S_LMGN, SEQ_719__L_LMGN, N_1609_S_LMGN, SEQ_10291__M_LMGN, N_881_M_LMGN, SEQ_715__L_LMGN, SEQ_2200_p, SEQ_2348_p, N_730_M_LMGN, SEQ_8093__L_LMGN, N_543_M_LMGN, N_2533_S_LMGN, SEQ_8895__M_LMGN, SEQ_3795_p, SEQ_12953__S_LMGN, N_1759_S_LMGN, SEQ_10869__M_LMGN, N_522_M_LMGN, SEQ_5571__M_LMGN, SEQ_8873__M_LMGN, SEQ_6169__S_LMGN, SEQ_709__L_LMGN, NutsCons_807, SEQ_6172_p, N_299_S_LMGN, Stor_EU_117, SEQ_4115__S_LMGN, NutsCons_834, SEQ_7800_p, SEQ_7801_p, NutsCons_754, NutsCons_810, SEQ_10271__M_LMGN, N_80_S_LMGN, SEQ_12852_p, SEQ_6523__M_LMGN, SEQ_10277__M_LMGN, SEQ_14154_p, SEQ_14156_p, SEQ_14157_p, N_30_S_LMGN, N_32_S_LMGN, N_33_S_LMGN, NutsCons_740, NutsCons_835, N_153_NS_LMGN, N_151_NS_LMGN, N_14_NS_LMGN, N_40_NS_LMGN, N_82_NS_LMGN, N_83_NS_LMGN, N_84_NS_LMGN, N_204_NS_LMGN, N_92_NS_LMGN, N_136_NS_LMGN, N_128_NS_LMGN, N_137_NS_LMGN, N_147_NS_LMGN, N_150_NS_LMGN, N_181_NS_LMGN, Prod_49, N_280_NS_LMGN, N_42_NS_LMGN, N_305_NS_LMGN, N_317_NS_LMGN, Prod_40, Prod_58, Prod_45, Prod_55, Prod_54, NutsCons_769, ...]","[NL, XX, XX, XX, XX, XX, XX, XX, XX, XX, NL, BE, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, XX, NL, NL, NL, NL, NL, NL, NL, NL, NL, NL, BE, NL, NL, NL, NL, NL, XX, XX, NL, NL, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, NL, ...]"
3,1528,174,"[N_388_M_LMGN, SEQ_309__L_LMGN, SEQ_325__L_LMGN, SEQ_331__L_LMGN, N_390_M_LMGN, N_16_L_LMGN, SEQ_369__L_LMGN, SEQ_373__L_LMGN, SEQ_403__L_LMGN, SEQ_437__L_LMGN, SEQ_449__L_LMGN, SEQ_457__L_LMGN, SEQ_467__L_LMGN, N_17_L_LMGN, N_18_L_LMGN, SEQ_475__L_LMGN, SEQ_511__L_LMGN, SEQ_525__L_LMGN, SEQ_527__L_LMGN, SEQ_537__L_LMGN, SEQ_583__L_LMGN, SEQ_591__L_LMGN, N_19_L_LMGN, N_20_L_LMGN, N_21_L_LMGN, N_22_L_LMGN, SEQ_623__L_LMGN, SEQ_639__L_LMGN, N_24_L_LMGN, N_25_L_LMGN, N_26_L_LMGN, SEQ_805__L_LMGN, N_82_L_LMGN, SEQ_1903__M_LMGN, SEQ_1757__L_LMGN, SEQ_1765__L_LMGN, SEQ_1771__L_LMGN, SEQ_1773__L_LMGN, SEQ_1797__L_LMGN, N_84_L_LMGN, SEQ_1829__L_LMGN, SEQ_1831__L_LMGN, SEQ_1833__L_LMGN, SEQ_1835__L_LMGN, SEQ_1839__L_LMGN, SEQ_1845__L_LMGN, N_86_L_LMGN, Stor_NonEU_21, N_34_M_LMGN, SEQ_3103__L_LMGN, SEQ_1179__M_LMGN, SEQ_5955__M_LMGN, SEQ_4003__M_LMGN, SEQ_5979__M_LMGN, SEQ_2383__M_LMGN, SEQ_2715__M_LMGN, SEQ_5971__M_LMGN, SEQ_353__M_LMGN, SEQ_9501__M_LMGN, SEQ_5963__M_LMGN, SEQ_5989__M_LMGN, N_123_M_LMGN, SEQ_6069__M_LMGN, SEQ_109__M_LMGN, SEQ_121__M_LMGN, N_9_M_LMGN, N_32_M_LMGN, N_83_M_LMGN, N_85_M_LMGN, SEQ_1183__M_LMGN, N_122_M_LMGN, SEQ_1857__M_LMGN, N_228_M_LMGN, N_231_M_LMGN, N_232_M_LMGN, N_237_M_LMGN, N_238_M_LMGN, N_271_M_LMGN, SEQ_12086_p, N_272_M_LMGN, N_275_M_LMGN, N_273_M_LMGN, N_345_M_LMGN, N_347_M_LMGN, N_348_M_LMGN, N_349_M_LMGN, SEQ_3415__M_LMGN, N_350_M_LMGN, SEQ_3447__M_LMGN, N_351_M_LMGN, N_352_M_LMGN, N_353_M_LMGN, N_354_M_LMGN, N_358_M_LMGN, N_387_M_LMGN, N_389_M_LMGN, SEQ_3981__M_LMGN, SEQ_3997__M_LMGN, SEQ_4007__M_LMGN, N_436_M_LMGN, ...]","[RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, RU, ...]"
4,4830,161,"[INET_N_753, INET_N_1731, INET_N_1131, INET_N_750, INET_N_853, INET_N_1302, INET_N_1819, INET_N_1760, SEQ_7164_p, INET_N_408, SEQ_592_p, INET_N_995, INET_N_1761, SEQ_7475_p, INET_N_874, SEQ_7649_p, INET_N_679, INET_N_1626, INET_N_223, INET_N_749, SEQ_1178_p, INET_N_707, SEQ_6673__M_LMGN, INET_N_640, SEQ_1746_p, SEQ_1747_p, SEQ_1749_p, SEQ_1751_p, SEQ_1753_p, SEQ_5789__L_LMGN, SEQ_5243__M_LMGN, N_489_M_LMGN, N_490_M_LMGN, SEQ_9721__M_LMGN, SEQ_1775_p, SEQ_9741__M_LMGN, SEQ_9695__M_LMGN, SEQ_9693__M_LMGN, SEQ_9691__M_LMGN, SEQ_9683__M_LMGN, SEQ_9671__M_LMGN, SEQ_4097__M_LMGN, SEQ_4101__M_LMGN, SEQ_2745_p, N_397_M_LMGN, N_752_S_LMGN, N_1590_S_LMGN, N_2583_S_LMGN, SEQ_5829__L_LMGN, SEQ_1693__L_LMGN, N_80_L_LMGN, SEQ_5901__L_LMGN, N_141_L_LMGN, SEQ_5131_p, N_399_M_LMGN, INET_N_863, SEQ_5151_p, N_559_S_LMGN, SEQ_5426_p, SEQ_3901__S_LMGN, N_2689_M_LMGN, N_617_M_LMGN, N_78_L_LMGN, N_180_L_LMGN, N_247_L_LMGN, SEQ_589__M_LMGN, SEQ_10331__M_LMGN, SEQ_5727__L_LMGN, SEQ_5735__L_LMGN, SEQ_7292_p, SEQ_5741__L_LMGN, INET_N_168, N_611_M_LMGN, Stor_NonEU_37, N_231_L_LMGN, SEQ_9753__M_LMGN, Stor_NonEU_30, N_230_L_LMGN, SEQ_6369__L_LMGN, SEQ_3041__L_LMGN, N_77_L_LMGN, N_139_L_LMGN, SEQ_8901_p, SEQ_10180_p, NutsCons_1365, SEQ_5661__M_LMGN, SEQ_5669__M_LMGN, SEQ_5673__M_LMGN, N_610_M_LMGN, N_613_M_LMGN, NutsCons_1006, SEQ_9659__M_LMGN, SEQ_14094_p, SEQ_18137__S_LMGN, NutsCons_672, N_1591_S_LMGN, INET_N_1817, N_275_S_LMGN, SEQ_14713_p, NutsCons_1176, ...]","[SK, SK, BY, BY, BY, BY, BY, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, UA, HU, HU, SK, SK, SK, SK, SK, SK, HU, HU, HU, HU, HU, HU, PL, PL, PL, PL, PL, HU, HU, HU, HU, HU, HU, HU, UA, PL, PL, PL, BY, RO, RO, UA, UA, HU, HU, HU, HU, HU, UA, UA, UA, UA, UA, HU, HU, HU, HU, HU, HU, BY, UA, HU, UA, UA, UA, UA, UA, UA, PL, UA, RO, RO, RO, RO, HU, HU, PL, PL, PL, HU, HU, HU, PL, RO, RO, PL, ...]"
5,4510,155,"[SEQ_6833__L_LMGN, SEQ_17253_p, SEQ_17262_p, SEQ_30_p, SEQ_3460_p, INET_N_229, INET_N_933, LKD_N_96, SEQ_18708_p, SEQ_18711_p, LKD_N_97, SEQ_18718_p, SEQ_18719_p, LKD_N_99, SEQ_18724_p, LKD_N_193, SEQ_18953_p, SEQ_18956_p, LKD_N_507, LKD_N_325, LKD_N_327, LKD_N_329, LKD_N_330, LKD_N_332, LKD_N_333, SEQ_19312_p, LKD_N_343, SEQ_19317_p, SEQ_19321_p, LKD_N_347, LKD_N_349, LKD_N_350, LKD_N_352, LKD_N_355, SEQ_19330_p, LKD_N_357, LKD_N_360, LKD_N_362, LKD_N_363, LKD_N_364, LKD_N_366, LKD_N_367, LKD_N_368, LKD_N_369, LKD_N_371, LKD_N_372, LKD_N_373, LKD_N_378, LKD_N_379, LKD_N_383, SEQ_19374_p, LKD_N_385, LKD_N_386, SEQ_19381_p, LKD_N_387, LKD_N_393, LKD_N_394, LKD_N_397, SEQ_19392_p, LKD_N_399, LKD_N_400, LKD_N_401, LKD_N_402, LKD_N_403, LKD_N_404, SEQ_19406_p, LKD_N_409, LKD_N_411, LKD_N_412, LKD_N_415, LKD_N_418, LKD_N_419, SEQ_19430_p, SEQ_19432_p, LKD_N_426, LKD_N_428, LKD_N_434, LKD_N_442, LKD_N_508, LKD_N_509, SEQ_19599_p, LKD_N_512, LKD_N_515, LKD_N_516, LKD_N_517, SEQ_19610_p, SEQ_19611_p, LKD_N_518, LKD_N_521, LKD_N_524, LKD_N_525, SEQ_19620_p, LKD_N_527, LKD_N_530, LKD_N_531, LKD_N_532, LKD_N_535, LKD_N_538, SEQ_19649_p, LKD_N_539, ...]","[PL, PL, PL, PL, PL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, ...]"
6,807,149,"[N_67_M_LMGN, N_317_M_LMGN, SEQ_3160_p, SEQ_3167_p, SEQ_8183__L_LMGN, SEQ_45__M_LMGN, SEQ_3177_p, SEQ_3179_p, N_319_M_LMGN, N_526_M_LMGN, N_323_M_LMGN, N_525_M_LMGN, N_635_M_LMGN, N_723_M_LMGN, N_20_M_LMGN, N_638_M_LMGN, N_328_M_LMGN, N_211_M_LMGN, N_206_M_LMGN, SEQ_7825__L_LMGN, SEQ_7799__L_LMGN, SEQ_7781__L_LMGN, SEQ_4894_p, N_262_L_LMGN, N_261_L_LMGN, SEQ_4946_p, SEQ_1019__M_LMGN, N_112_M_LMGN, N_68_M_LMGN, SEQ_4455__M_LMGN, N_250_M_LMGN, N_709_S_LMGN, SEQ_5523__L_LMGN, SEQ_7771__L_LMGN, N_222_L_LMGN, SEQ_4013__L_LMGN, SEQ_6421_p, SEQ_6426_p, SEQ_4031__L_LMGN, SEQ_6433_p, N_176_L_LMGN, SEQ_6453_p, SEQ_6467_p, N_221_L_LMGN, SEQ_7252_p, SEQ_2543__M_LMGN, SEQ_2509__M_LMGN, SEQ_7582_p, N_249_M_LMGN, SEQ_7586_p, N_250_S_LMGN, N_251_S_LMGN, SEQ_7751_p, SEQ_8205__L_LMGN, SEQ_7759_p, SEQ_6933__L_LMGN, N_260_L_LMGN, SEQ_8055__L_LMGN, SEQ_8205_p, SEQ_5567__L_LMGN, SEQ_7179__L_LMGN, SEQ_10431_p, SEQ_10433_p, SEQ_10439_p, N_253_S_LMGN, N_21_M_LMGN, N_102_M_LMGN, NutsCons_1312, SEQ_1771__M_LMGN, N_361_M_LMGN, N_363_M_LMGN, SEQ_1017__S_LMGN, N_527_M_LMGN, N_318_M_LMGN, N_528_M_LMGN, SEQ_6901__M_LMGN, N_639_M_LMGN, SEQ_9143__M_LMGN, SEQ_9145__M_LMGN, NutsCons_1223, NutsCons_1304, N_53_S_LMGN, NutsCons_1202, N_58_S_LMGN, NutsCons_1302, NutsCons_1199, NutsCons_1224, SEQ_14599_p, NutsCons_1277, NutsCons_1276, NutsCons_1355, N_372_S_LMGN, NutsCons_1392, NutsCons_1249, NutsCons_1203, N_537_S_LMGN, NutsCons_1222, NutsCons_1220, N_543_S_LMGN, SEQ_14973_p, ...]","[TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, XX, XX, XX, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, XX, XX, TR, TR, TR, TR, TR, TR, TR, TR, XX, XX, TR, XX, XX, TR, TR, TR, XX, XX, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, TR, ...]"
7,4256,145,"[N_1377_S_LMGN, LKD_N_7, SEQ_18492_p, LKD_N_20, LKD_N_21, LKD_N_22, LKD_N_23, LKD_N_25, LKD_N_26, SEQ_18507_p, LKD_N_28, NutsCons_361, LKD_N_32, LKD_N_34, NutsCons_366, SEQ_18521_p, LKD_N_36, SEQ_18524_p, LKD_N_38, LKD_N_45, LKD_N_46, SEQ_18542_p, LKD_N_47, LKD_N_48, NutsCons_16, SEQ_18549_p, LKD_N_50, LKD_N_51, LKD_N_53, LKD_N_55, SEQ_18665_p, SEQ_18668_p, LKD_N_91, LKD_N_103, LKD_N_105, SEQ_18736_p, LKD_N_107, NutsCons_372, SEQ_18761_p, INET_N_653, SEQ_18767_p, SEQ_18768_p, SEQ_18769_p, SEQ_18772_p, SEQ_18773_p, SEQ_18774_p, LKD_N_165, LKD_N_167, LKD_N_168, LKD_N_169, LKD_N_170, LKD_N_192, LKD_N_196, LKD_N_198, LKD_N_200, Stor_EU_88, LKD_N_204, LKD_N_205, LKD_N_206, LKD_N_207, LKD_N_208, LKD_N_209, LKD_N_210, LKD_N_214, SEQ_19001_p, LKD_N_217, LKD_N_222, SEQ_19027_p, LKD_N_224, SEQ_19031_p, SEQ_19036_p, SEQ_19069_p, SEQ_19218_p, SEQ_19228_p, SEQ_19229_p, SEQ_19231_p, SEQ_19232_p, SEQ_19243_p, SEQ_19244_p, SEQ_19247_p, LKD_N_317, LKD_N_320, SEQ_19277_p, LKD_N_694, SEQ_19892_p, LKD_N_696, LKD_N_697, LKD_N_698, LKD_N_699, LKD_N_700, LKD_N_704, LKD_N_705, SEQ_19257_p, INET_N_477, INET_N_761, Stor_EU_101, Stor_EU_85, Stor_EU_125, NutsCons_362, NutsCons_63, ...]","[NL, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, ...]"
8,4128,137,"[N_149_L_LMGN, N_234_L_LMGN, SEQ_3237__L_LMGN, SEQ_3231__L_LMGN, SEQ_3227__L_LMGN, N_2035_S_LMGN, N_1859_S_LMGN, SEQ_19141__S_LMGN, N_45_L_LMGN, SEQ_5097__L_LMGN, N_921_M_LMGN, SEQ_10787__M_LMGN, SEQ_4389__L_LMGN, SEQ_4395__L_LMGN, N_151_L_LMGN, SEQ_5953__L_LMGN, SEQ_5947__L_LMGN, N_238_L_LMGN, SEQ_5963__L_LMGN, SEQ_8171__M_LMGN, SEQ_8173__M_LMGN, SEQ_8179__M_LMGN, SEQ_21051__S_LMGN, SEQ_4243_p, SEQ_3261__L_LMGN, SEQ_4379__L_LMGN, SEQ_6166_p, N_2460_S_LMGN, N_2397_S_LMGN, SEQ_3223__L_LMGN, N_2037_S_LMGN, N_2564_S_LMGN, N_2666_S_LMGN, N_855_S_LMGN, SEQ_889__L_LMGN, SEQ_893__L_LMGN, SEQ_5101__L_LMGN, SEQ_5103__L_LMGN, SEQ_5959__L_LMGN, SEQ_9900_p, N_893_M_LMGN, SEQ_10789__M_LMGN, SEQ_8189__M_LMGN, SEQ_8219__M_LMGN, NutsCons_869, NutsCons_766, N_1645_S_LMGN, N_1996_S_LMGN, SEQ_16225_p, N_2126_S_LMGN, N_2328_S_LMGN, N_2402_S_LMGN, INET_N_46, NutsCons_273, N_2455_S_LMGN, N_2457_S_LMGN, NutsCons_884, NutsCons_854, LKD_N_27, LKD_N_132, SEQ_18823_p, LKD_N_133, LKD_N_136, LKD_N_137, LKD_N_140, LKD_N_141, LKD_N_142, LKD_N_143, LKD_N_144, LKD_N_145, LKD_N_146, LKD_N_147, LKD_N_148, LKD_N_149, LKD_N_150, LKD_N_151, LKD_N_152, LKD_N_153, LKD_N_154, SEQ_18852_p, LKD_N_155, SEQ_18867_p, LKD_N_156, LKD_N_157, LKD_N_158, SEQ_19068_p, SEQ_19073_p, SEQ_19074_p, SEQ_19077_p, SEQ_19078_p, SEQ_19079_p, SEQ_19081_p, SEQ_19739_p, SEQ_19740_p, LKD_N_631, NutsCons_311, LKD_N_634, LKD_N_635, LKD_N_662, LKD_N_664, ...]","[FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, FR, CH, FR, FR, FR, FR, FR, FR, FR, CH, FR, FR, FR, CH, CH, FR, FR, FR, FR, CH, FR, FR, FR, FR, FR, FR, FR, FR, CH, FR, CH, CH, CH, FR, FR, CH, FR, CH, FR, FR, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, ...]"
9,4708,124,"[INET_N_1391, N_2631_S_LMGN, Stor_EU_108, SEQ_932_p, LKD_N_88, SEQ_18223_p, LKD_N_121, LKD_N_73, SEQ_18610_p, LKD_N_74, LKD_N_75, LKD_N_76, LKD_N_77, SEQ_18618_p, SEQ_18622_p, SEQ_18623_p, LKD_N_79, LKD_N_80, LKD_N_81, LKD_N_82, LKD_N_84, LKD_N_85, LKD_N_86, SEQ_18643_p, SEQ_18645_p, SEQ_18646_p, SEQ_18727_p, SEQ_18729_p, LKD_N_102, SEQ_18746_p, SEQ_18749_p, SEQ_18751_p, SEQ_18752_p, LKD_N_110, LKD_N_122, LKD_N_123, LKD_N_124, LKD_N_125, LKD_N_126, SEQ_18793_p, SEQ_18794_p, LKD_N_128, SEQ_18796_p, LKD_N_129, LKD_N_130, LKD_N_131, SEQ_18805_p, SEQ_18813_p, SEQ_18817_p, SEQ_19039_p, LKD_N_228, LKD_N_229, SEQ_19054_p, LKD_N_230, SEQ_19064_p, LKD_N_231, NutsCons_252, LKD_N_578, SEQ_19727_p, SEQ_19735_p, SEQ_19736_p, SEQ_19738_p, INET_N_1183, Stor_EU_96, NutsCons_272, INET_N_1818, NutsCons_218, NutsCons_227, NutsCons_476, NutsCons_243, NutsCons_457, NutsCons_194, NutsCons_221, NutsCons_229, NutsCons_247, NutsCons_317, NutsCons_234, NutsCons_526, NutsCons_407, NutsCons_255, NutsCons_321, NutsCons_254, NutsCons_195, NutsCons_420, NutsCons_289, SEQ_18808_p, NutsCons_197, NutsCons_8, SEQ_19927_p, NutsCons_314, NutsCons_421, NutsCons_210, NutsCons_574, INET_N_1797, NutsCons_295, NutsCons_477, NutsCons_205, SEQ_19851_p, NutsCons_319, INET_N_88, ...]","[CZ, AT, AT, AT, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, DE, CZ, CZ, DE, ...]"


In [47]:
clusters_df.tail(10)

Unnamed: 0,communityId,number_of_nodes,ids,country_codes
212,4697,1,[INET_N_1],[BE]
213,4710,1,[INET_N_947],[DE]
214,4732,1,[INET_N_241],[SI]
215,4740,1,[INET_N_270],[GB]
216,4803,1,[INET_N_1454],[IT]
217,4806,1,[INET_N_1499],[IT]
218,4922,1,[INET_N_900],[SK]
219,4929,1,[NutsCons_1515],[XX]
220,4956,1,[NutsCons_597],[XX]
221,4973,1,[INET_N_522],[XX]


## Path Finding

In [48]:
query = """
CALL gds.graph.project(
    'routes-weighted',
    'Node',
    'PIPE',
        {
            relationshipProperties: 'length_km'
        }
)
"""

gds.run_cypher(query)

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Node': {'label': 'Node', 'properties': {}}}","{'PIPE': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': 'PIPE', 'properties': {'length_km': {'defaultValue': None, 'property': 'length_km', 'aggregation': 'DEFAULT'}}}}",routes-weighted,5009,6516,43


In [49]:
query = """
MATCH (source:Node {id: 'INET_N_856'}), (target:Node {id: 'NutsCons_1003'})
CALL gds.shortestPath.dijkstra.stream('routes-weighted', {
    sourceNode: source,
    targetNode: target,
    relationshipWeightProperty: 'length_km'
})
YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
RETURN
    index,
    gds.util.asNode(sourceNode).id AS sourceNodeName,
    gds.util.asNode(targetNode).id AS targetNodeName,
    totalCost as totalDistance,
    [nodeId IN nodeIds | gds.util.asNode(nodeId).id] AS nodeIDs,
    costs as distances
ORDER BY index
"""
gds.run_cypher(query)

Unnamed: 0,index,sourceNodeName,targetNodeName,totalDistance,nodeIDs,distances
0,0,INET_N_856,NutsCons_1003,649.707706,"[INET_N_856, SEQ_7_p, INET_N_1829, INET_N_329, N_409_S_LMGN, SEQ_9701__M_LMGN, SEQ_9149__S_LMGN, SEQ_9139__S_LMGN, SEQ_9127__S_LMGN, N_486_M_LMGN, SEQ_7715__M_LMGN, SEQ_7723__M_LMGN, SEQ_7731__M_LMGN, N_688_M_LMGN, SEQ_7739__L_LMGN, SEQ_7743__L_LMGN, SEQ_7747__L_LMGN, SEQ_7749__L_LMGN, NutsCons_1003]","[0.0, 55.420951, 110.841205, 220.16349100000002, 253.03525000000002, 260.771973, 289.15599, 317.347467, 352.397237, 372.523689, 398.64748099999997, 448.01613399999997, 487.93127499999997, 540.4601309999999, 569.220877, 597.2742549999999, 630.323322, 638.421699, 649.707706]"
