In [1]:
from py2neo import Graph, Node, Relationship, NodeMatcher
from pandas import DataFrame

In [2]:
neo4j_config = { 'user': 'neo4j', 'password': 'neo4jneo4j', 'host': 'mylocalhost'}
# Connect to Graph
graph = Graph(**neo4j_config)
# Check that we are connected to the expected Neo4J Store - to avoid accidents...
dbname = graph.database.name
print(dbname)

egypts.db


## Number of nodes in the network

In [3]:
query = "match (n) return count(n) as cnt"
cur = graph.run(query)
dd = cur.data()
print(dd[0]["cnt"])

25178


## Components in the network

In [4]:
query = """
call algo.unionFind.stream('Person', 'contacts', {})
yield nodeId, setId
return count(distinct setId) as components
"""
cur = graph.run(query)
dd = cur.data()
print(dd[0]["components"])

3122


## Size of Top 5 Components

In [5]:
query = """
call algo.unionFind.stream('Person', 'contacts', {})
yield nodeId, setId
return setId, count(*) as component_size
order by component_size desc
limit 5
"""
cur = graph.run(query)
dd = cur.data()
cnt = 0
for res in dd:
    cnt += 1
    print("{cnt}. Size: {size}".format(size=res["component_size"], cnt=cnt))

1. Size: 17762
2. Size: 64
3. Size: 16
4. Size: 16
5. Size: 14


## Trim Edges
This function finds the subnet containing nodes where weight between nodes is larger than a threshold value. It will return the number of components in the subnet and the size (number of nodes) in the subnet.

In [6]:
def trim_edges(weight=1):
    query = """
    call algo.unionFind.stream('Person', 'contacts', {{weightProperty:'weight', threshold:{weight:.1f}, concurrency:1}})
    yield nodeId, setId
    with setId, count(*) as component_size
    where component_size > 1
    return count(setId) as number_components, sum(component_size) as graph_size
    """.format(weight=weight)
    cur = graph.run(query)
    dd = cur.data()
    return dd[0]["number_components"], dd[0]["graph_size"]

In [7]:
query = """
call algo.unionFind.stream('Person', 'contacts', {weightProperty:'weight', threshold:5.0, concurrency:1})
yield nodeId, setId
with setId, count(*) as component_size
where component_size > 1
return setId, component_size
order by component_size desc
"""
cur = graph.run(query)
dd = cur.data()
cnt = 0
for res in dd:
    cnt += 1
    print("{cnt}. Size: {size} SetId: {setId}".format(size=res["component_size"], cnt=cnt, setId=res["setId"]))

1. Size: 4097 SetId: 520
2. Size: 76 SetId: 2481
3. Size: 35 SetId: 10197
4. Size: 20 SetId: 558
5. Size: 18 SetId: 964
6. Size: 15 SetId: 5786
7. Size: 15 SetId: 2723
8. Size: 13 SetId: 1554
9. Size: 11 SetId: 2753
10. Size: 11 SetId: 2092
11. Size: 11 SetId: 1537
12. Size: 10 SetId: 5453
13. Size: 9 SetId: 7003
14. Size: 9 SetId: 3843
15. Size: 8 SetId: 2524
16. Size: 7 SetId: 3062
17. Size: 7 SetId: 7263
18. Size: 7 SetId: 40
19. Size: 7 SetId: 3350
20. Size: 7 SetId: 11239
21. Size: 6 SetId: 125
22. Size: 6 SetId: 920
23. Size: 6 SetId: 277
24. Size: 6 SetId: 2170
25. Size: 6 SetId: 8285
26. Size: 6 SetId: 937
27. Size: 6 SetId: 8016
28. Size: 6 SetId: 1089
29. Size: 6 SetId: 5979
30. Size: 6 SetId: 2512
31. Size: 5 SetId: 5009
32. Size: 5 SetId: 4354
33. Size: 5 SetId: 690
34. Size: 5 SetId: 1883
35. Size: 5 SetId: 3256
36. Size: 5 SetId: 2514
37. Size: 5 SetId: 1267
38. Size: 5 SetId: 10436
39. Size: 5 SetId: 18609
40. Size: 5 SetId: 10887
41. Size: 5 SetId: 5040
42. Size: 5 SetI

In [12]:
query = """
call algo.unionFind.stream('Person', 'contacts', {weightProperty:'weight', threshold:20.0, concurrency:1})
yield nodeId, setId
with setId, count(*) as component_size
where component_size > 1
return count(setId) as number_components, sum(component_size) as graph_size
"""
cur = graph.run(query)
dd = cur.data()
print("Components: {comp} - Graph size: {size}".format(comp=dd[0]["number_components"], size=dd[0]["graph_size"]))

Components: 104 - Graph size: 418


In [10]:
trim_edges(6)

(796, 6051)