In [51]:
from graphdatascience import GraphDataScience
import pandas as pd

In [52]:
df_country = pd.read_csv("data/country.csv",sep="|")
df_country['iso_alpha_2'] = df_country['iso_alpha_2'].str.lower()
df_country

Unnamed: 0,country,domain,iso_alpha_2,iso_alpha_3,Lat,Lon,continent_m49_code,continent_name,sub_continent_m49_code,sub_continent_name
0,Burkina Faso,Burkina Faso,bf,BFA,12.075308,-1.688031,2,AFRICA,11,WESTERN_AFRICA
1,Guinea-Bissau,Guinea Bissau,gw,GNB,12.100035,-14.900021,2,AFRICA,11,WESTERN_AFRICA
2,Mauritania,Mauritania,mr,MRT,20.254038,-9.239926,2,AFRICA,11,WESTERN_AFRICA
3,Senegal,Senegal,sn,SEN,14.475061,-14.452961,2,AFRICA,11,WESTERN_AFRICA
4,Liberia,Liberia,lr,LBR,5.749972,-9.365852,2,AFRICA,11,WESTERN_AFRICA
...,...,...,...,...,...,...,...,...,...,...
223,Monaco,Monako,mc,MCO,43.731142,7.419758,150,EUROPE,155,WESTERN_EUROPE
224,Liechtenstein,Liechtenstein,li,LIE,47.141631,9.553153,150,EUROPE,155,WESTERN_EUROPE
225,Luxembourg,Luxemburg,lu,LUX,49.815868,6.129675,150,EUROPE,155,WESTERN_EUROPE
226,Austria,Austria,at,AUT,47.200034,13.199959,150,EUROPE,155,WESTERN_EUROPE


In [53]:
# Use Neo4j URI and credentials according to your setup
# NEO4J_URI could look similar to "bolt://my-server.neo4j.io:7687"
ip="localhost"
port="7687"
pwd="bps12345"
gds = GraphDataScience("bolt://"+ip+":"+port, auth=("neo4j", pwd))

# Check the installed GDS version on the server
print(gds.version())
assert gds.version()

2.2.2


In [54]:
# set database
gds.set_database('neo4j')

In [55]:
# list gds function
gds.list()

Unnamed: 0,name,description,signature,type
0,gds.allShortestPaths.delta.mutate,The Delta Stepping shortest path algorithm com...,gds.allShortestPaths.delta.mutate(graphName ::...,procedure
1,gds.allShortestPaths.delta.mutate.estimate,Returns an estimation of the memory consumptio...,gds.allShortestPaths.delta.mutate.estimate(gra...,procedure
2,gds.allShortestPaths.delta.stats,The Delta Stepping shortest path algorithm com...,gds.allShortestPaths.delta.stats(graphName :: ...,procedure
3,gds.allShortestPaths.delta.stats.estimate,Returns an estimation of the memory consumptio...,gds.allShortestPaths.delta.stats.estimate(grap...,procedure
4,gds.allShortestPaths.delta.stream,The Delta Stepping shortest path algorithm com...,gds.allShortestPaths.delta.stream(graphName ::...,procedure
...,...,...,...,...
364,gds.util.infinity,RETURN gds.util.infinity() - Return infinity a...,gds.util.infinity() :: (FLOAT?),function
365,gds.util.isFinite,RETURN gds.util.isFinite(value) - Return true ...,gds.util.isFinite(value :: NUMBER?) :: (BOOLEAN?),function
366,gds.util.isInfinite,RETURN gds.util.isInfinite(value) - Return tru...,gds.util.isInfinite(value :: NUMBER?) :: (BOOL...,function
367,gds.util.nodeProperty,Returns a node property value from a named in-...,"gds.util.nodeProperty(graphName :: STRING?, no...",function


In [57]:
# list gds function
gds.graph.list()

Unnamed: 0,degreeDistribution,graphName,database,memoryUsage,sizeInBytes,nodeCount,relationshipCount,configuration,density,creationTime,modificationTime,schema


In [58]:
# Create a country relation graph projection
gds.run_cypher(
  """
  CALL gds.graph.project(
    "country-relation",
    "Country",
    {Visit: {orientation: 'UNDIRECTED', properties:'count'}}
  )
  """
)

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Country': {'label': 'Country', 'properties':...","{'Visit': {'orientation': 'UNDIRECTED', 'aggre...",country-relation,21,256,964


In [59]:
# Project the graph into the GDS Graph Catalog
# We call the object representing the projected graph `G_country`
G_country = gds.graph.get('country-relation')
G_country.node_count()

21

In [60]:
## find shortest path using yens algorithm
source_id = gds.find_node_id(["Country"], {"country_code": "ge"})
target_id = gds.find_node_id(["Country"], {"country_code": "il"})

res = gds.shortestPath.yens.stream(G_country, sourceNode=source_id, targetNode=target_id, k =3, relationshipWeightProperty="count")
res

Unnamed: 0,index,sourceNode,targetNode,totalCost,nodeIds,costs,path
0,0,32223,32219,4.0,"[32223, 32225, 32206, 32219]","[0.0, 1.0, 2.0, 4.0]","((cost), (cost), (cost))"
1,1,32223,32219,5.0,"[32223, 32212, 32221, 32219]","[0.0, 2.0, 3.0, 5.0]","((cost), (cost), (cost))"
2,2,32223,32219,5.0,"[32223, 32212, 32221, 32219]","[0.0, 2.0, 3.0, 5.0]","((cost), (cost), (cost))"


In [61]:
# community detection
louvain_result = gds.run_cypher(
    """
    CALL gds.louvain.stream('country-relation')
    YIELD nodeId, communityId
    RETURN 
        gds.util.asNode(nodeId).country_code AS code, 
        gds.util.asNode(nodeId).country_name AS country, 
        communityId
    ORDER BY communityId ASC
    """
)
louvain_result

Unnamed: 0,code,country,communityId
0,se,Sweden,5
1,no,Norway,5
2,de,Germany,5
3,dk,Denmark,5
4,fi,Finland,5
5,ru,Russian Federation,5
6,ee,Estonia,5
7,lv,Latvia,5
8,it,Italy,5
9,ua,Ukraine,11


In [7]:
louvain_result.to_csv("result/country_community_detection_louvain.csv",index=False)

In [62]:
# country importance
pagerank_result = gds.run_cypher(
    """
    CALL gds.pageRank.stream('country-relation',{
        maxIterations: 100,
        dampingFactor: 0.85,
        relationshipWeightProperty: 'count'
    })
    YIELD nodeId, score
    RETURN 
        gds.util.asNode(nodeId).country_code AS code, 
        gds.util.asNode(nodeId).country_name AS country, 
        score
    ORDER BY score DESC
    """
)
pagerank_result

Unnamed: 0,code,country,score
0,se,Sweden,4.17349
1,tr,Turkey,3.947208
2,gr,Greece,1.358112
3,ua,Ukraine,1.044366
4,ro,Romania,1.031104
5,me,Montenegro,0.999999
6,dk,Denmark,0.993592
7,lb,Lebanon,0.881116
8,eg,Egypt,0.837801
9,fi,Finland,0.824928


In [63]:
pagerank_result.to_csv("result/country_centrality_pagerank.csv",index=False)

In [68]:
# country importance using betweenness
betweenness_result = gds.run_cypher(
    """
    CALL gds.betweenness.stream('country-relation',{
        relationshipWeightProperty: 'count'
    })
    YIELD nodeId, score
    RETURN 
        gds.util.asNode(nodeId).country_code AS code, 
        gds.util.asNode(nodeId).country_name AS country, 
        score
    ORDER BY score DESC
    """
)
betweenness_result

Unnamed: 0,code,country,score
0,tr,Turkey,36.007648
1,gr,Greece,34.624892
2,il,Israel,28.791667
3,bg,Bulgaria,28.441667
4,cy,Cyprus,26.194048
5,ru,Russian Federation,21.843254
6,de,Germany,20.048485
7,ua,Ukraine,10.666667
8,dk,Denmark,9.4
9,eg,Egypt,8.641414


In [65]:
betweenness_result.to_csv("result/country_centrality_betweenness.csv",index=False)

In [76]:
# The CELF algorithm for influence maximization aims to find k nodes that maximize the expected spread of influence in the network. 
# Leskovec et al. 2007 introduced the CELF algorithm in their study Cost-effective Outbreak Detection in Networks
celf_result = gds.run_cypher(
    """
    CALL gds.beta.influenceMaximization.celf.stream('country-relation',{
        seedSetSize: 3
    })
    YIELD nodeId, spread
    RETURN 
        gds.util.asNode(nodeId).country_code AS code, 
        gds.util.asNode(nodeId).country_name AS country, 
        spread
    ORDER BY spread DESC
    """
)
celf_result.to_csv('result/country_centrality_celf.csv',index=False)
celf_result

Unnamed: 0,code,country,spread
0,tr,Turkey,5.62
1,se,Sweden,1.45
2,cy,Cyprus,1.16


In [88]:
embedding_res = gds.run_cypher(
    """
    CALL gds.fastRP.stream('country-relation',
    {
        embeddingDimension: 10,
        randomSeed: 2022,
        relationshipWeightProperty: 'count'
    }
    )
    YIELD nodeId, embedding
    RETURN 
        gds.util.asNode(nodeId).country_code AS code, 
        gds.util.asNode(nodeId).country_name AS country, 
        embedding
    """
)
embedding_res.to_feather('result/country_embedding_fastrp.f')
embedding_res

Unnamed: 0,code,country,embedding
0,ua,Ukraine,"[0.6479694843292236, -0.5393590331077576, 0.70..."
1,se,Sweden,"[0.014119576662778854, 1.2931790351867676, 0.0..."
2,no,Norway,"[0.0007103577954694629, 0.8546448945999146, 0...."
3,de,Germany,"[0.03351972997188568, 1.2927680015563965, 0.16..."
4,lb,Lebanon,"[-0.0034325362648814917, 1.1997438669204712, 1..."
5,dk,Denmark,"[0.009109556674957275, 1.3111073970794678, 0.0..."
6,tr,Turkey,"[0.013138430193066597, 1.9923452138900757, 0.0..."
7,fi,Finland,"[0.06812076270580292, 1.0600388050079346, 0.85..."
8,gr,Greece,"[0.008923329412937164, 1.4752461910247803, 0.0..."
9,eg,Egypt,"[0.8580087423324585, 1.0550066232681274, 0.005..."
