In [None]:
!pip install py2neo

Collecting py2neo
  Using cached py2neo-2021.2.3-py2.py3-none-any.whl (177 kB)
Collecting monotonic
  Using cached monotonic-1.6-py2.py3-none-any.whl (8.2 kB)
Collecting interchange~=2021.0.4
  Using cached interchange-2021.0.4-py2.py3-none-any.whl (28 kB)
Collecting pansi>=2020.7.3
  Using cached pansi-2020.7.3-py2.py3-none-any.whl (10 kB)
Installing collected packages: monotonic, pansi, interchange, py2neo
Successfully installed interchange-2021.0.4 monotonic-1.6 pansi-2020.7.3 py2neo-2021.2.3


In [None]:
import pandas as pd
from py2neo import Graph
from typing import Union
graph= Graph("bolt://localhost:7687",password="**********",name="neo4j")

## Load Data to Neo4j

In [None]:
#Create Indices for each node entity to decrease runing time
graph.run(""" CREATE INDEX CompanyNameIndex IF NOT EXISTS FOR (p:Company) ON (p.id) """)

In [None]:
#Creating nodes
#Adjust for each node type
graph.run(""" load csv with headers from "file:///entities.csv" as csvline
create (n:Company{name:csvline.company,id:csvline.company_id}) """)

In [None]:
#Creating Edges
#Adjust for each edge type
graph.run(""" CALL apoc.periodic.iterate("CALL apoc.load.csv('file:///entities.csv') yield map as line return line",
"WITH line.company AS sID,  line.sectory AS sector match (n:Company{name:sID}),(t:Sector{name:sector})
merge (n)-[r:BELONGS_TO]->(t)",{batchSize:1000, iterateList:true,parallel:false})
 """)

In [None]:
graph.run("""CALL apoc.periodic.iterate("CALL apoc.load.csv('file:///suppliers.csv') yield map as line return line",
"WITH toInteger(line.source_id) AS sID, line.start_date	 as date,line.revenue_pct as revenue,
toInteger(line.target_id) AS tID match (n:Company{id:sID}),(t:Company{id:tID})
merge (n)-[r:SUPPLIES{start_date:date, revenue: revenue}]->(t)",{batchSize:1000, iterateList:true,parallel:false})
""")

## Exploring The Network

In [None]:
#Explore Graph metrics
graph.run("""MATCH (p)-[r]-(c)
RETURN COUNT( distinct type(r)) AS numedge, count(distinct p) as numNode,
COUNT( distinct r) AS numEdge, toFloat((count(distinct r)/COUNT( distinct p))) as AvgDegree""")

numedge,numNode,numEdge,AvgDegree
9,237431,1739156,7.0


In [None]:
#Isolated nodes
graph.run("""match (n:Company)
where not EXISTS((n)--(:Company))
return count(n)
""")

count(n)
81784


In [None]:
graph.run("""MATCH (c:Company)-[]->(s:Sector)
WITH s as Sector, count(c) AS RingSize
WHERE RingSize >1
RETURN Sector,RingSize ORDER BY RingSize DESC""")

Sector,RingSize
(_237329:Sector {name: 'Finance'}),42030
(_237324:Sector {name: 'Technology Services'}),22946
(_237328:Sector {name: 'Producer Manufacturing'}),18637


In [None]:
graph.run("""MATCH (p:Company)-[r:ULTIMATE_PARENT_OF]->(c:Company)
RETURN p, COUNT(r) AS numParent
ORDER BY numParent DESC LIMIT 10""")

p,numParent
"(_235922:Company {id: 336894, name: 'Government of China'})",1617
"(_236091:Company {id: 336913, name: 'Government of India'})",185
"(_236146:Company {id: 336951, name: 'Government of Russia'})",173


## Preprocessing

In [None]:
#Graph Projection
graph.run("""CALL gds.graph.project.cypher('graphSupply',
  'MATCH (n)
   RETURN
    id(n) AS id,
    labels(n) AS labels',
  'MATCH (n)-[r:SUPPLIES]->(m) RETURN id(n) AS source, id(m) AS target, type(r) AS type')""")

nodeQuery,relationshipQuery,graphName,nodeCount,relationshipCount,projectMillis
"MATCH (n)  RETURN  id(n) AS id,  labels(n) AS labels","MATCH (n)-[r:SUPPLIES]->(m) RETURN id(n) AS source, id(m) AS target, type(r) AS type",graphSupply,237431,214935,3884


In [None]:
#to_ship relation extraction
graph.run("""MATCH (s:Company)-[r:SUPPLIES]->(t:Company)
with s, t
Match (t)-[l:LOCATED_IN]->(loc:Country)
MERGE (s)-[e:SHIP_TO]->(loc)""")

In [None]:
#Add Weight to the edges based on up-to-dateness of relations
graph.run("""CALL apoc.periodic.iterate("CALL apoc.load.csv('file:///supplier_weights.csv') yield map as line return line",
"WITH toInteger(line.source_id) AS sID, toInteger(line.target_id) AS tID,toInteger(line.time_weight) as weight
match (n:Company{id:sID})-[r:SUPPLIES]->(t:Company{id:tID})
set r.weight=weight",{batchSize:1000, iterateList:true,parallel:false})""")

In [None]:
## Add Inductive split masks
#train mask %70
graph.run("""match (n:Company)
with n, rand()>0.7 as rand
set n.trainmask=rand""")
#train split
graph.run("""match (n:Company{trainmask:True})-[r]-(m:Company{trainmask:True})
return n.id, type(r), m.id""")