# Neo4J

## Préliminaires

In [1]:
from neo4j import GraphDatabase

class Neo4jConnection:
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, 
auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

## Création de la connexion, de la DB

In [3]:
try:
    conn = Neo4jConnection(uri="bolt://localhost:11004", user="daphne", pwd="daphnetest")
except Exception as e:
    print("Query failed:", e)
print("connexion établie")

connexion établie


In [4]:
conn.query("CREATE OR REPLACE DATABASE coradb")
print("create or replace db done")

create or replace db done


## Le graphe

### nœuds

In [5]:
query_string = '''
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/ngshya/datasets/master/cora/cora_content.csv'
AS line FIELDTERMINATOR ','
CREATE (:Paper {id: line.paper_id, class: line.label})
'''

In [6]:
conn.query(query_string, db='coradb')
print("created nodes")

created nodes


### arêtes

In [7]:
query_string = '''
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/ngshya/datasets/master/cora/cora_cites.csv'
AS line FIELDTERMINATOR ','
MATCH (citing_paper:Paper {id: line.citing_paper_id}),(cited_paper:Paper {id: line.cited_paper_id})
CREATE (citing_paper)-[:CITES]->(cited_paper)
'''

In [8]:
conn.query(query_string, db='coradb')
print("created edges")

created edges


### attributs

Classes d'articles (améliorer la sortie ?)

In [9]:
query_string = '''
MATCH (p:Paper)
RETURN DISTINCT p.class
ORDER BY p.class
'''

In [13]:
conn.query(query_string, db='coradb')
print("match classes: done.")

match classes: done.


Articles les plus cités

In [11]:
query_string = '''
MATCH ()-->(p:Paper) 
RETURN id(p), count(*) as indegree 
ORDER BY indegree DESC LIMIT 10
'''

In [14]:
conn.query(query_string, db='coradb')
print("most cited papers: done.")

most cited papers: done.


### Créer le graphe

In [15]:
query_string = '''
CALL gds.graph.create(
  'coraGraph',
  'Paper',
  'CITES'
)
'''

In [16]:
conn.query(query_string, db='coradb')

[<Record nodeProjection={'Paper': {'label': 'Paper', 'properties': {}}} relationshipProjection={'CITES': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': 'CITES', 'properties': {}}} graphName='coraGraph' nodeCount=2708 relationshipCount=5429 createMillis=40>]

## Analyse

### PageRank

In [17]:
# page rank
print("pageRanking...")

query_string = '''
CALL gds.pageRank.write('coraGraph', {
  writeProperty: 'pagerank'
})
YIELD nodePropertiesWritten, ranIterations
'''
conn.query(query_string, db='coradb')
print("...done")

pageRanking...
...done


### Betweeness

In [18]:
query_string = '''
CALL gds.betweenness.write('coraGraph', { 
  writeProperty: 'betweenness' })
YIELD minimumScore, maximumScore, scoreSum, nodePropertiesWritten
'''
conn.query(query_string, db='coradb')

[<Record minimumScore=0.0 maximumScore=9523.5 scoreSum=179657.00000000003 nodePropertiesWritten=2708>]

## Avec Pandas

In [19]:
from pandas import DataFrame

In [20]:
query_string = '''
MATCH (p:Paper)
RETURN DISTINCT p.id, p.class, p.pagerank, p.betweenness
'''

In [21]:
dtf_data = DataFrame([dict(_) for _ in conn.query(query_string, db='coradb')])
print(dtf_data.sample(10))

         p.id             p.class  p.pagerank  p.betweenness
1644    22563     Neural_Networks    7.119965        3969.25
717    198653  Genetic_Algorithms    2.840305          63.00
949   1109439     Neural_Networks    0.150000           0.00
1534      936          Case_Based    0.706934           0.00
2537     1237     Neural_Networks    0.353605           0.00
1134   650807  Genetic_Algorithms    0.277500           0.00
40      13982              Theory    1.146035           0.00
1093  1136446     Neural_Networks    0.150000           0.00
238    119761     Neural_Networks    0.314134          18.00
2652     7867          Case_Based    0.503813           3.00


In [None]:
conn.close()