In [2]:
from graphdatascience import GraphDataScience
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Use Neo4j URI and credentials according to your setup
# NEO4J_URI could look similar to "bolt://my-server.neo4j.io:7687"
ip="localhost"
port="7687"
pwd="bps12345"
gds = GraphDataScience("bolt://"+ip+":"+port, auth=("neo4j", pwd))

# Check the installed GDS version on the server
print(gds.version())
assert gds.version()

2.2.2


In [4]:
# set database
gds.set_database('neo4j')

In [5]:
# list gds function
gds.list()

Unnamed: 0,name,description,signature,type
0,gds.allShortestPaths.delta.mutate,The Delta Stepping shortest path algorithm com...,gds.allShortestPaths.delta.mutate(graphName ::...,procedure
1,gds.allShortestPaths.delta.mutate.estimate,Returns an estimation of the memory consumptio...,gds.allShortestPaths.delta.mutate.estimate(gra...,procedure
2,gds.allShortestPaths.delta.stats,The Delta Stepping shortest path algorithm com...,gds.allShortestPaths.delta.stats(graphName :: ...,procedure
3,gds.allShortestPaths.delta.stats.estimate,Returns an estimation of the memory consumptio...,gds.allShortestPaths.delta.stats.estimate(grap...,procedure
4,gds.allShortestPaths.delta.stream,The Delta Stepping shortest path algorithm com...,gds.allShortestPaths.delta.stream(graphName ::...,procedure
...,...,...,...,...
364,gds.util.infinity,RETURN gds.util.infinity() - Return infinity a...,gds.util.infinity() :: (FLOAT?),function
365,gds.util.isFinite,RETURN gds.util.isFinite(value) - Return true ...,gds.util.isFinite(value :: NUMBER?) :: (BOOLEAN?),function
366,gds.util.isInfinite,RETURN gds.util.isInfinite(value) - Return tru...,gds.util.isInfinite(value :: NUMBER?) :: (BOOL...,function
367,gds.util.nodeProperty,Returns a node property value from a named in-...,"gds.util.nodeProperty(graphName :: STRING?, no...",function


In [16]:
# Create a port relation graph projection
gds.run_cypher(
  """
  CALL gds.graph.project(
    "port-relation",
    "Port",
    {Visit: {orientation: 'UNDIRECTED', properties:'count'}}
  )
  """
)

#graph project for path finding analysis
gds.run_cypher(
  """
  CALL gds.graph.project(
    "port-path",
    [
      "Port","Shipping"
    ],
    [
      {Origin: {orientation: 'UNDIRECTED', properties:'distance'}},
      {Destination: {orientation: 'UNDIRECTED', properties:'distance'}}
    ]
  )
  """
)

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Port': {'label': 'Port', 'properties': {}}, ...","{'Origin': {'orientation': 'UNDIRECTED', 'aggr...",port-path,23641,93520,288


In [14]:
# Project the graph into the GDS Graph Catalog
G_port_relation = gds.graph.get('port-relation')
G_port_relation.node_count()

261

In [17]:
# Project the graph into the GDS Graph Catalog
G_port_path = gds.graph.get('port-path')
G_port_path.node_count()

23641

In [22]:
## find shortest path using yens algorithm
source_id = gds.find_node_id(["Port"], {"port_name": "Agio Theodoroi"})
target_id = gds.find_node_id(["Port"], {"port_name": "YSTAD"})

res = gds.shortestPath.yens.stream(G_port_path, sourceNode=source_id, targetNode=target_id, k =5, relationshipWeightProperty="distance")
res

Unnamed: 0,index,sourceNode,targetNode,totalCost,nodeIds,costs,path
0,0,31963,31956,998.14,"[31963, 4047, 31966, 15928, 31959, 6235, 31993...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03, 0.06...","((cost), (cost), (cost), (cost), (cost), (cost..."
1,1,31963,31956,998.14,"[31963, 12927, 31966, 15928, 31959, 6235, 3199...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03, 0.06...","((cost), (cost), (cost), (cost), (cost), (cost..."
2,2,31963,31956,998.14,"[31963, 4047, 31966, 15928, 31959, 6235, 31993...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03, 0.06...","((cost), (cost), (cost), (cost), (cost), (cost..."
3,3,31963,31956,998.14,"[31963, 12927, 31966, 15928, 31959, 6235, 3199...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03, 0.06...","((cost), (cost), (cost), (cost), (cost), (cost..."
4,4,31963,31956,998.14,"[31963, 4047, 31966, 15928, 31959, 6235, 31993...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03, 0.06...","((cost), (cost), (cost), (cost), (cost), (cost..."


In [41]:
# community detection
louvain_result = gds.run_cypher(
    """
    CALL gds.louvain.stream('port-relation')
    YIELD nodeId, communityId
    RETURN 
        gds.util.asNode(nodeId).port_id AS id, 
        gds.util.asNode(nodeId).port_name AS port,
        gds.util.asNode(nodeId).country_code AS country,
        gds.util.asNode(nodeId).area AS area, 
        communityId
    ORDER BY communityId ASC
    """
)
louvain_result.to_csv('result/port_community_detection_louvain.csv',index=False)
louvain_result

Unnamed: 0,id,port,country,area,communityId
0,350,SIMRISHAMN,se,baltic,38
1,9217,LÄNSI,no,baltic,38
2,111,KIEL,de,baltic,38
3,6312,LANDSKRONA,se,baltic,38
4,87213,HELSINGBORG,se,baltic,38
...,...,...,...,...,...
256,wfp.16210,Trabzon,tr,black,253
257,wfp.15198,Gorele,tr,black,253
258,wfp.17304,Gemikonagi,cy,black,256
259,wfp.15463,Ordu,tr,black,260


In [44]:
louvain_result[louvain_result['communityId']==38]

Unnamed: 0,id,port,country,area,communityId
0,350,SIMRISHAMN,se,baltic,38
1,9217,LÄNSI,no,baltic,38
2,111,KIEL,de,baltic,38
3,6312,LANDSKRONA,se,baltic,38
4,87213,HELSINGBORG,se,baltic,38
5,932,MALMÖ,se,baltic,38
6,91321,HELSINGÖR,dk,baltic,38
7,130593,YSTAD,se,baltic,38
8,55,KÖGE,dk,baltic,38
9,4305,KÖPENHAMN,dk,baltic,38


In [37]:
louvain_result['communityId'].value_counts()

81     65
85     47
94     40
38     38
185    25
124     9
101     8
183     8
65      6
247     2
253     2
260     2
114     1
153     1
231     1
233     1
237     1
238     1
242     1
245     1
256     1
Name: communityId, dtype: int64

In [6]:
# port importance
pagerank_result = gds.run_cypher(
    """
    CALL gds.pageRank.stream('port-relation',{
        maxIterations: 100,
        dampingFactor: 0.85,
        relationshipWeightProperty: 'count'
    })
    YIELD nodeId, score
    RETURN 
        gds.util.asNode(nodeId).port_id AS id, 
        gds.util.asNode(nodeId).port_name AS port, 
        gds.util.asNode(nodeId).country_code AS country, 
        score
    ORDER BY score DESC
    """
)
pagerank_result.to_csv("result/port_centrality_pagerank.csv",index=False)
pagerank_result

Unnamed: 0,id,port,country,score
0,wfp.17151,Ambarli,tr,7.567009
1,4305,KÖPENHAMN,dk,7.018103
2,350,SIMRISHAMN,se,5.619428
3,782598,TRELLEBORG,se,5.611754
4,wfp.16140,Beykoz,tr,5.227836
...,...,...,...,...
256,wfp.15773,Parga,gr,0.150000
257,9393214,FÖRBY,fi,0.150000
258,wfp.17747,Istanbul,tr,0.150000
259,wfp.16883,Evpatoria,ua,0.150000


In [30]:
# country importance using betweenness
betweenness_result = gds.run_cypher(
    """
    CALL gds.betweenness.stream('port-relation',{
        relationshipWeightProperty: 'count'
    })
    YIELD nodeId, score
    RETURN 
        gds.util.asNode(nodeId).port_id AS id, 
        gds.util.asNode(nodeId).port_name AS port, 
        score
    ORDER BY score DESC
    """
)
betweenness_result.to_csv("result/port_centrality_betweenness.csv",index=False)
betweenness_result

Unnamed: 0,id,port,score
0,wfp.17356,Lavrion (Laurium),5698.127048
1,111,KIEL,4627.478042
2,987352,GRENÅ,3913.350953
3,350,SIMRISHAMN,3057.894262
4,wfp.16140,Beykoz,2708.384176
...,...,...,...
256,wfp.17304,Gemikonagi,0.000000
257,wfp.14759,Yalta,0.000000
258,wfp.14760,Kerch,0.000000
259,wfp.14730,Cheikh Zennad,0.000000


In [36]:
# The CELF algorithm for influence maximization aims to find k nodes that maximize the expected spread of influence in the network. 
# Leskovec et al. 2007 introduced the CELF algorithm in their study Cost-effective Outbreak Detection in Networks
celf_result = gds.run_cypher(
    """
    CALL gds.beta.influenceMaximization.celf.stream('port-relation',{
        seedSetSize: 261
    })
    YIELD nodeId, spread
    RETURN 
        gds.util.asNode(nodeId).port_id AS id, 
        gds.util.asNode(nodeId).port_name AS port,
        spread
    ORDER BY spread DESC
    """
)
celf_result.to_csv('result/port_centrality_celf.csv',index=False)
celf_result

Unnamed: 0,id,port,spread
0,350,SIMRISHAMN,99.30
1,wfp.17236,Rhodes,2.40
2,8128,HÖNSÄTER,1.62
3,wfp.15644,Ayancik,1.54
4,87213,HELSINGBORG,1.39
...,...,...,...
256,wfp.15217,Iskenderun,0.04
257,2321,Agnesberg,0.04
258,9557327,DEGERHAMN,0.01
259,782598,TRELLEBORG,0.00


In [38]:
embedding_res = gds.run_cypher(
    """
    CALL gds.fastRP.stream('port-relation',
    {
        embeddingDimension: 10,
        randomSeed: 2022,
        relationshipWeightProperty: 'count'
    }
    )
    YIELD nodeId, embedding
    RETURN 
        gds.util.asNode(nodeId).port_id AS id, 
        gds.util.asNode(nodeId).port_name AS port,
        embedding
    """
)
embedding_res.to_feather('result/port_embedding_fastrp.f')
embedding_res

Unnamed: 0,id,port,embedding
0,wfp.14761,Reni,"[-0.0002938761026598513, 0.27786871790885925, ..."
1,350,SIMRISHAMN,"[-0.5459726452827454, -0.7790752649307251, -0...."
2,8092,TROLLHÄTTAN,"[0.07464912533760071, -0.1781981885433197, 0.9..."
3,9217,LÄNSI,"[-0.06574603170156479, 0.040035322308540344, 0..."
4,9593,NORRSUNDET,"[-0.07633186876773834, -0.33849430084228516, -..."
...,...,...,...
256,wfp.17304,Gemikonagi,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
257,wfp.14759,Yalta,"[0.3101901412010193, -0.08310481905937195, 0.0..."
258,wfp.14760,Kerch,"[0.6213930249214172, 0.23753054440021515, -1.1..."
259,wfp.14730,Cheikh Zennad,"[-0.6218538880348206, 0.7503742575645447, -0.3..."
