# Part C 

In [1]:
import sys
import random
from pprint import pprint as pp
random.seed(42)
sys.version

'3.7.4 (default, Oct 15 2019, 22:29:14) \n[GCC 7.4.0]'

In [2]:
import neo4j
import py2neo
print(neo4j.__version__)
print(py2neo.__version__)

1.7.6
4.3.0


In [3]:
from neo4j import GraphDatabase
from py2neo import Graph

# instantiate drivers
NEO4J_URI="bolt://localhost:7687"
gdb = GraphDatabase.driver(uri=NEO4J_URI, auth=None)
graph = Graph(NEO4J_URI)

The graph has the following structure

![graph](./schemas/dblp_slim_after/graph.png)

In [4]:
graph.run("CALL algo.list();").data()[:1]

[{'name': 'algo.allShortestPaths.stream',
  'description': "CALL algo.allShortestPaths.stream(weightProperty:String{nodeQuery:'labelName', relationshipQuery:'relationshipName', defaultValue:1.0, concurrency:4}) YIELD sourceNodeId, targetNodeId, distance - yields a stream of {sourceNodeId, targetNodeId, distance}",
  'signature': 'algo.allShortestPaths.stream(propertyName :: STRING?, config = {} :: MAP?) :: (sourceNodeId :: INTEGER?, targetNodeId :: INTEGER?, distance :: FLOAT?)',
  'type': 'procedure'}]

## PageRank

In [5]:
q_page_rank ="""
CALL algo.pageRank.stream(
    'MATCH (a:Article) 
     WHERE exists ((a)−[:CITED_BY]−>()) 
     RETURN ID(a) AS id',
    'MATCH (a1:Article)−[:CITED_BY]−>(a2:Article)
     RETURN id (a1) AS source, ID(a2) AS target',
    {graph : 'cypher', iterations:20, dampingFactor:0.85}) 
YIELD nodeId, score 
MATCH (a:Article) WHERE ID(a) = nodeId
RETURN a.title AS title, score 
ORDER BY score DESC;"""

graph.run(q_page_rank).data()[:5]

[{'title': '"Bump":  using a mobile app to enhance learning in simulation scenarios.',
  'score': 1.3380366005469115},
 {'title': 'Linear Hypopigmentation After Triamcinolone Injection: A Rare Complication of a Common Procedure',
  'score': 1.2711560409981755},
 {'title': 'Oleanolic acid suppresses the proliferation of lung carcinoma cells by miR-122/Cyclin G1/MEF2D axis',
  'score': 1.2702508100308478},
 {'title': 'Acetic acid as a sclerosing agent for renal cysts: Comparison with ethanol in follow-up results',
  'score': 1.2594134361017493},
 {'title': 'Restoration of blue scratches in digital image sequences',
  'score': 1.2585701291449365}]

## Triangle counting

https://neo4j.com/docs/graph-algorithms/current/labs-algorithms/triangle-counting-clustering-coefficient/

In [6]:
q_triangle_counting = """CALL algo.triangleCount.stream('Article', 'CITED_BY')
YIELD nodeId, triangles, coefficient
RETURN algo.asNode(nodeId).title AS title, algo.asNode(nodeId).id as id, triangles, coefficient
ORDER BY coefficient DESC"""

triangle_count = graph.run(q_triangle_counting).data()

In [7]:
triangle_count[:4]

[{'title': 'DICER1/Alu RNA dysmetabolism induces Caspase-8-mediated cell death in age-related macular degeneration.',
  'id': 'f9b4d45b0b6135dfecab5aa950b11f7b17267e91',
  'triangles': 212,
  'coefficient': 0.603988603988604},
 {'title': 'Evaluation of novel high-throughput embryonic stem cell tests with new molecular markers for screening embryotoxic chemicals in vitro.',
  'id': '421da3edd7d840ce58b811d2ad62dac3dafc7663',
  'triangles': 329,
  'coefficient': 0.5864527629233511},
 {'title': "A fuzzy nonlinear model for quality function deployment considering Kano's concept",
  'id': '6f06076133abe3cd3bfed4d2f20c3fd97eef3702',
  'triangles': 287,
  'coefficient': 0.5786290322580645},
 {'title': 'Predictors of clinical outcome in acute pulmonary embolism: Correlation of CT pulmonary angiography with clinical, echocardiography and laboratory findings.',
  'id': '7bbffaf73092220bfc3cb2a0a6d34ded8d680c7d',
  'triangles': 427,
  'coefficient': 0.5762483130904184}]

https://en.wikipedia.org/wiki/Clustering_coefficient

$${\displaystyle C={\frac {3\times {\mbox{number of triangles}}}{\mbox{number of all triplets}}}}$$

### Finding triangles 

In [8]:
q_triangle_descr = """CALL algo.triangle.stream('Article','CITED_BY')
YIELD nodeA, nodeB, nodeC
RETURN algo.asNode(nodeA) AS nodeA, algo.asNode(nodeB) AS nodeB, algo.asNode(nodeC) AS nodeC"""

triangle_count_nodes = graph.run(q_triangle_descr).data()

In [9]:
triangle_count_nodes[:1]

[{'nodeA': (_46:Article {doi: '10.1007/s10926-005-9006-z', doi_url: 'https://doi.org/10.1007/s10926-005-9006-z', id: '8fb9c95bf34a0f28dc05819cb4aada0cb94fe555', title: 'Development of Work Stress Scale for Correctional Officers', year: 2005}),
  'nodeB': (_72:Article {doi: '10.1046/j.1525-1497.1999.00018.x', doi_url: 'https://doi.org/10.1046/j.1525-1497.1999.00018.x', id: 'c4062742b4e0d13cfa0e992fdf2cebf2eb71c415', title: 'Patient preferences for colon cancer screening', year: 1999}),
  'nodeC': (_100:Article {doi: '10.1038/bjc.1990.274', doi_url: 'https://doi.org/10.1038/bjc.1990.274', id: 'f218ce53248d756db61726985f73e6e8c109b3e2', title: 'Coffee consumption and the risk of breast cancer. A prospective study of 14,593 Norwegian women.', year: 1990})}]

Printing only one triangle

![triangle_count](schemas/c_triangles_limit_1.png)

Printing how the first five triangles are connected

![triangle_count](schemas/c_triangles_limit_5.png)