In [1]:
from neo4j import GraphDatabase
from tqdm.notebook import tqdm
import pandas as pd

# "Populer" une base Neo4j

Pré-requis :
1. Installation de Neo4j Browser: https://neo4j.com/download/
2. Création du Base de données Neo4j en local depuis le Neo4j browser :https://neo4j.com/developer/neo4j-desktop/

In [2]:
#/!\ Penser à changer les credentials 
db_params = {
    "uri":"bolt://localhost:7687",
    "user":"neo4j",
    "password":"treeoflife"
}

In [3]:
ranks = ['kingdom','phylum','class','family','order','genus','species']
df = pd.read_csv("../data/gbif_extract.csv",index_col=0)

In [4]:
df.head()

Unnamed: 0,key,nubKey,nameKey,taxonID,sourceTaxonKey,kingdom,phylum,order,family,kingdomKey,...,publishedIn,acceptedKey,accepted,proParteKey,genus,genusKey,species,speciesKey,basionymKey,basionym
0,8003,8003,6849425,gbif:8003,156957565.0,Animalia,Arthropoda,Amphipoda,Melitidae,1,...,,,,,,,,,,
1,8004,8004,7068178,gbif:8004,156957851.0,Animalia,Arthropoda,Amphipoda,Mimonectidae,1,...,,,,,,,,,,
2,8005,8005,7669892,gbif:8005,156957506.0,Animalia,Arthropoda,Amphipoda,Ochlesidae,1,...,,,,,,,,,,
3,8006,8006,7718541,gbif:8006,156957210.0,Animalia,Arthropoda,Amphipoda,Oedicerotidae,1,...,"LILLJEBORG, W. (1865). On the Lysianassa magel...",,,,,,,,,
4,8007,8007,7848133,gbif:8007,156085450.0,Animalia,Arthropoda,Amphipoda,Opisidae,1,...,"Lowry, J. K.; Stoddart, H. E. (1995). The Amph...",,,,,,,,,


### Préparation des données
restriction du tree of life au phylum Rotifera

In [5]:
data = df.loc[
    (df['rank']=='SPECIES') 
    & (df.kingdom == 'Animalia') 
    & (df.phylum == 'Rotifera'),
    ranks].dropna()

### Générations des éléments du graphes

In [6]:
def get_edges(df):
    '''génère l'esemble des couples distincts neoud noeud qui définissent une arête'''
    edges = []
    for i in range(len(ranks[:-1])):
        rank_pair = ranks[i:i+2]
        rank_edges = df[rank_pair].dropna(how='any').drop_duplicates().apply(tuple,axis=1).values
        edges.extend(rank_edges)
    return edges

def get_nodes(df):
    '''génère la liste des différents taxons (noeuds) contenu dans le dataset'''
    nodes = []
    for rank in ranks:
        rank_nodes = list(zip(df[rank].unique(), [rank for i in range(len(df[rank].unique()))]))
        nodes.extend(rank_nodes)
    return nodes

In [7]:
edges = get_edges(data)
nodes = get_nodes(data)

In [8]:
nodes[:3]

[('Animalia', 'kingdom'), ('Rotifera', 'phylum'), ('Eurotatoria', 'class')]

In [9]:
edges[:3]

[('Animalia', 'Rotifera'),
 ('Rotifera', 'Eurotatoria'),
 ('Rotifera', 'Pararotatoria')]

# Instanciation d'un driver Neo4j 
cf : https://neo4j.com/docs/api/python-driver/current/

/!\Vérifier préalablement depuis le Neo4j Browser que votre Bdd est bien en "running "


In [10]:
driver = GraphDatabase.driver(
    db_params['uri'], 
    auth=(db_params['user'], 
    db_params['password'])
)

### "Helper functions"pour créer le graphe

cf Cypher cheat-sheet : https://mpolinowski.github.io/neo-4-j-cypher-cheat-sheet

In [11]:
def create_node(tx, name, rank):
    '''créer un Noeud de type `Entity` ayant pour attributs un nom et un rang taxonomique'''
    return tx.run(
        "CREATE (a:Entity {name: $name, rank:$rank}) RETURN id(a)", 
        name=name, 
        rank=rank
    ).single().value()


def add_children(tx, name, child):
        '''lie 2 noeuds type `Entity` par une relation `HAS_CHILD` (arête orientée)'''
        tx.run('''MATCH (a: Entity)
                MATCH (b: Entity)
                WHERE a.name = $name AND b.name =$child AND a.name <> b.name
                CREATE (a)-[:HAS_CHILD]->(b)''',
                name=name, child=child)

### Ecriture en Base (Un peu long)

In [None]:
with driver.session() as session:
    for node in tqdm(nodes, desc='NODES '):
        session.write_transaction(create_node, node[0], node[1])
    for edge in tqdm(edges, desc ="EDGES "):
        session.write_transaction(add_children, edge[0], edge[1])
driver.close()

### Requetage avec py2neo
cf: https://py2neo.org/2020.0/

In [12]:
from py2neo import Graph, Node, Relationship
from py2neo.matching import *
from py2neo.ogm import Model, Property

In [13]:
graph = Graph(db_params['uri'], user=db_params['user'], password=db_params['password'])

In [14]:
graph.nodes.match("Node", name="Animalia").first()

Node('Node', name='Animalia', rank='kingdom')