In [1]:
import json
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from neo4j import GraphDatabase

from config import load_config
from graphs import (
    explode_columns,
    run_query,
    get_all_relationships,
    get_all_nodes,
    cleanup_database,
    create_relationship,
    create_node,
)

In [2]:
# try:
#     import cudf
#     import cugraph
# except RuntimeError as e:
#     print(f"Error: {e}")

## TODO

Sentences should be created in different graph components

Create graph relationships between sequences to point the textual order of sentences.

Instead sentences we should use noun phrases

write a code that reads the full graph from a neo4j data base, loads it as a cugraph object and plots it using networkX. Labels and Properties from nodes in neo4j need to be preserved in networkX graph, also relationship types and properties.




In [3]:
config, secrets = load_config()

## Some experimentations with queries

In [4]:
query = "MATCH (m)-[r]->(n) return m, r, n"

In [5]:
driver = GraphDatabase.driver(config.neo4j.uri, auth=(config.neo4j.username, secrets.neo4j_password))

In [6]:
session = driver.session(database=config.neo4j.database)

In [7]:
df = run_query(session, query)

In [8]:
# Standard
# rels = get_all_relationships(session)
# Different approach...
rels = session.execute_read(get_all_relationships)

In [9]:
rels_df = explode_columns(pd.DataFrame(rels), 'properties')
rels_df

In [10]:
# Standard
# nodes = get_all_nodes(session)
# Differente approach
nodes = session.execute_read(get_all_nodes)

In [11]:
nodes_df = explode_columns(pd.DataFrame(nodes), ["properties", "labels"])
nodes_df

In [12]:
session.close()
driver.close()

### Experiments with data creation from json

In [13]:
driver = GraphDatabase.driver(config.neo4j.uri, auth=(config.neo4j.username, secrets.neo4j_password))

In [14]:
graph_json_file = "graph_data.json"
with open(graph_json_file, 'r') as f:
        data = json.load(f)

In [15]:
# Show JSON nodes
print("NODES")
for node in data["nodes"]:
    print(node)
# Show JSON relationships
print("RELATIONSHIPS")
for rel in data['relationships']:
    print(rel)

NODES
{'uuid': 1, 'labels': 'PERSON', 'name': 'Alice', 'age': 30}
{'uuid': 2, 'labels': 'PERSON', 'name': 'Bob', 'age': 35}
{'uuid': 3, 'labels': 'EMPLOYEE', 'name': 'Charlie', 'age': 28}
{'uuid': 4, 'labels': 'COMPANY', 'name': 'TechCorp'}
{'uuid': 5, 'labels': ['EMPLOYEE', 'PERSON'], 'name': 'Alice'}
RELATIONSHIPS
{'source': 1, 'target': 2, 'type': 'LIKES', 'since': '2020-01-01', 'strength': 0.8}
{'source': 2, 'target': 4, 'type': 'WORKS_AT', 'since': '2018-05-15', 'position': 'Manager'}
{'source': 3, 'target': 4, 'type': 'WORKS_AT', 'since': '2019-03-01', 'position': 'Developer'}
{'source': 5, 'target': 4, 'type': 'WORKS_AT', 'since': '2018-01-08', 'position': 'CEO'}


In [16]:
with driver.session() as session:
    summary = cleanup_database(session)
    # print(f"Deleted {summary.counters.nodes_deleted} nodes and {summary.counters.relationships_deleted} relationships.")    
    for node in data["nodes"]:
        session.execute_write(create_node, node)
    for rel in data["relationships"]:
        session.execute_write(create_relationship, rel)

In [17]:
# Method 1
with driver.session() as session:
    df = run_query(session, query)

In [18]:
# Method 2
with driver.session() as session:
    df2 = session.execute_read(run_query, query)

In [19]:
df

Unnamed: 0,m,r,n
0,"(name, uuid, age)","(strength, since)","(name, uuid, age)"
1,"(name, uuid, age)","(position, since)","(name, uuid)"
2,"(name, uuid, age)","(position, since)","(name, uuid)"
3,"(name, uuid)","(position, since)","(name, uuid)"


In [20]:
df.iloc[0]['n']

<Node element_id='4:a94a1b6d-fb11-4844-a6e9-31362e907dd0:1' labels=frozenset({'PERSON'}) properties={'name': 'Bob', 'uuid': 2, 'age': 35}>

In [21]:
df2.iloc[0]['n']

<Node element_id='4:a94a1b6d-fb11-4844-a6e9-31362e907dd0:1' labels=frozenset({'PERSON'}) properties={'name': 'Bob', 'uuid': 2, 'age': 35}>