# Part C 

In [1]:
import sys
import random
from pprint import pprint as pp
random.seed(42)
sys.version

'3.7.4 (default, Oct 15 2019, 22:29:14) \n[GCC 7.4.0]'

In [2]:
import neo4j
import py2neo
print(neo4j.__version__)
print(py2neo.__version__)

1.7.6
4.3.0


In [3]:
from neo4j import GraphDatabase
from py2neo import Graph

# instantiate drivers
NEO4J_URI="bolt://localhost:7687"
gdb = GraphDatabase.driver(uri=NEO4J_URI, auth=None)
graph = Graph(NEO4J_URI)

The graph has the following structure

![graph](./schemas/dblp_slim_after/graph.png)

In [16]:
graph.run("CALL algo.list();").data()[:1]

[{'name': 'algo.allShortestPaths.stream',
  'description': "CALL algo.allShortestPaths.stream(weightProperty:String{nodeQuery:'labelName', relationshipQuery:'relationshipName', defaultValue:1.0, concurrency:4}) YIELD sourceNodeId, targetNodeId, distance - yields a stream of {sourceNodeId, targetNodeId, distance}",
  'signature': 'algo.allShortestPaths.stream(propertyName :: STRING?, config = {} :: MAP?) :: (sourceNodeId :: INTEGER?, targetNodeId :: INTEGER?, distance :: FLOAT?)',
  'type': 'procedure'}]

## PageRank

In [11]:
q_page_rank ="""
CALL algo.pageRank.stream(
    'MATCH (a:Article) 
     WHERE exists ((a)−[:CITED_BY]−>()) 
     RETURN ID(a) AS id',
    'MATCH (a1:Article)−[:CITED_BY]−>(a2:Article)
     RETURN id (a1) AS source, ID(a2) AS target',
    {graph : 'cypher', iterations:20, dampingFactor:0.85}) 
YIELD nodeId, score 
MATCH (a:Article) WHERE ID(a) = nodeId
RETURN a.title AS title, score 
ORDER BY score DESC;"""

graph.run(q_page_rank).data()[:5]

[{'title': 'Template-directed molecular assembly on silicon carbide nanomesh: comparison between CuPc and pentacene.',
  'score': 1.3380366005469115},
 {'title': 'Influence of investment, disinfection, and storage on the microhardness of ocular resins.',
  'score': 1.2711560409981755},
 {'title': 'Fast Runtime Block Cyclic Data Redistribution on Multiprocessors',
  'score': 1.2702508100308478},
 {'title': 'Replication of the range of native anterior cruciate ligament fiber length change behavior achieved by different grafts: measurement using computer-assisted navigation.',
  'score': 1.2594134361017493},
 {'title': 'Evidence for maize (Zea mays) in the Late Archaic (3000-1800 B.C.) in the Norte Chico region of Peru.',
  'score': 1.2585701291449365}]

## Triangle counting

https://neo4j.com/docs/graph-algorithms/current/labs-algorithms/triangle-counting-clustering-coefficient/

In [31]:
q_triangle_counting = """CALL algo.triangleCount.stream('Article', 'CITED_BY')
YIELD nodeId, triangles, coefficient
RETURN algo.asNode(nodeId).title AS title, algo.asNode(nodeId).id as id, triangles, coefficient
ORDER BY coefficient DESC"""

triangle_count = graph.run(q_triangle_counting).data()

In [32]:
triangle_count[:4]

[{'title': 'Quantum entanglement between an optical photon and a solid-state spin qubit',
  'id': '35e72f5eafce5d0a8853add98956d5ec2f58d9f4',
  'triangles': 212,
  'coefficient': 0.603988603988604},
 {'title': 'A joint detection of CEA and CA-50 levels in saliva and serum of patients with tumors in oral region and salivary gland',
  'id': '5cfdb256b6ae968374469bd36702ed341cfe9485',
  'triangles': 329,
  'coefficient': 0.5864527629233511},
 {'title': 'Patient preferences for colon cancer screening',
  'id': 'c4062742b4e0d13cfa0e992fdf2cebf2eb71c415',
  'triangles': 287,
  'coefficient': 0.5786290322580645},
 {'title': 'Evolution of Cooperation in the Snowdrift Game with Heterogeneous Population',
  'id': '82e8de5c4be2aa76e670e17e21db73edbb3d5a56',
  'triangles': 427,
  'coefficient': 0.5762483130904184}]

https://en.wikipedia.org/wiki/Clustering_coefficient

$${\displaystyle C={\frac {3\times {\mbox{number of triangles}}}{\mbox{number of all triplets}}}}$$

### Finding triangles 

In [27]:
q_triangle_descr = """CALL algo.triangle.stream('Article','CITED_BY')
YIELD nodeA, nodeB, nodeC
RETURN algo.asNode(nodeA) AS nodeA, algo.asNode(nodeB) AS nodeB, algo.asNode(nodeC) AS nodeC"""

triangle_count_nodes = graph.run(q_triangle_descr).data()

In [29]:
triangle_count_nodes[:1]

[{'nodeA': (_26133:Article {doi: '10.1007/BF02270828', doi_url: 'https://doi.org/10.1007/BF02270828', id: 'ed6f7f0d65f4c8bde43f13667c406ff3403f9814', title: 'Predation, seed size partitioning and the evolution of body size in seed-eating finches', year: 2005}),
  'nodeB': (_26204:Article {doi: '10.1007/s00439-003-1056-8', doi_url: 'https://doi.org/10.1007/s00439-003-1056-8', id: '36efcd42e8fbe18e0e1baf6c2696e2971aee78b2', title: 'Apolipoprotein B-100 XbaI gene polymorphism in gallbladder cancer', year: 2003}),
  'nodeC': (_26205:Article {doi: '10.1016/j.ijcard.2009.06.058', doi_url: 'https://doi.org/10.1016/j.ijcard.2009.06.058', id: '864ca4044d2ebf88ae4bf45df730f571039488b3', title: 'Heart rate dynamics in different levels of Zen meditation.', year: 2010})}]

Printing only one triangle

![triangle_count](schemas/c_triangles_limit_1.png)

Printing how the first five triangles are connected

![triangle_count](schemas/c_triangles_limit_5.png)