In [2]:
!pip install --quiet jupyterlab-vim jupytex
!jupyter labextension enable

[0m

# Neo4j

In [3]:
!pip install --quiet neo4j

[0m

In [71]:
import neo4j
print(neo4j.__version__)

5.22.0


In [84]:
import hneo4j

# Force reload.
# import importlib
# importlib.reload(hneo4j)

In [242]:
from pprint import pformat
from typing import Any

from pygments import highlight
from pygments.formatters import Terminal256Formatter
from pygments.lexers import PythonLexer

def to_bold(text):
    return "\033[1m" + text + "\033[0m"
    

def pformat_(obj: Any) -> str:
    """
    Pretty-print in color.
    """
    if hasattr(obj, "to_dict"):
        obj = obj.to_dict()
    res = highlight(pformat(obj), PythonLexer(), Terminal256Formatter())
    res = res.rstrip("\n")
    return res


def to_str_(obj, name=""):
    if name:
        txt = "%s= %s %s" % (to_bold(name), type(obj), pformat_(obj))
    else:
        txt = "%s %s" % (type(obj), pformat_(obj))
    return txt


def print_(obj, name=""):
    print(to_str_(obj, name))


def print_result(result):
    # The result contains information about the query results and summary of the query.
    records, summary, keys = result
    # `result.records` is the list of records returned by the query.
    print_(records, "records")
    print_(summary, "summary")
    # `result.keys` is the list of keys returned by the query.
    print_(keys, "keys")

In [72]:
from neo4j import GraphDatabase, RoutingControl

URI = "neo4j://neo4j:7687"
#URI = "bolt://neo4j:7687"
AUTH = ("neo4j", "testtest")

# def add_friend(driver, name, friend_name):
#     driver.execute_query(
#         "MERGE (a:Person {name: $name}) "
#         "MERGE (friend:Person {name: $friend_name}) "
#         "MERGE (a)-[:KNOWS]->(friend)",
#         name=name, friend_name=friend_name, database_="neo4j",
#     )


# def print_friends(driver, name):
#     records, _, _ = driver.execute_query(
#         "MATCH (a:Person)-[:KNOWS]->(friend) WHERE a.name = $name "
#         "RETURN friend.name ORDER BY friend.name",
#         name=name, database_="neo4j", routing_=RoutingControl.READ,
#     )
#     for record in records:
#         print(record["friend.name"])


# with GraphDatabase.driver(URI, auth=AUTH) as driver:
#     add_friend(driver, "Arthur", "Guinevere")
#     add_friend(driver, "Arthur", "Lancelot")
#     add_friend(driver, "Arthur", "Merlin")
#     print_friends(driver, "Arthur")

In [73]:
driver = GraphDatabase.driver(URI, auth=AUTH)

In [74]:
# Get the Neo4j version
hneo4j.print_neo4j_version(driver)

Name: Neo4j Kernel, Version: ['5.22.0'], Edition: community


In [75]:
session = driver.session(database="neo4j")

In [None]:
# # TODO(gp): It seems that it's not easy to have multiple DBs in neo4j community edition.
# with driver.session(database="system") as session:
#     # Run the CREATE DATABASE command
#     session.run(f"CREATE DATABASE {database_name}")
#     print(f"Database '{database_name}' created successfully.")

# # Example usage
# database_name = "wine"
# create_database(driver, database_name)

In [150]:
hneo4j.print_graph_stats(driver)

Number of nodes: 1
Number of edges: 0


# Example

- Every application using Neo4j needs a `driver` object
- A `driver` object holds the details to the connection to a Neo4j database (e.g., URIs, credentials, and configuration).

In [152]:
# Test the connection to the DB without executing any query.
driver.verify_connectivity()

In [153]:
driver.get_server_info()

<neo4j.api.ServerInfo at 0xffff6e1c3070>

### Populate the graph with one node.

In [261]:
hneo4j.print_graph_stats(driver)
print("Deleting ...")
hneo4j.delete_all(driver)
hneo4j.print_graph_stats(driver)

Number of nodes: 2
Number of edges: 1
Deleting ...
Number of nodes: 0
Number of edges: 0


In [262]:
query = 'CREATE (w:Wine {name:"Prancing Wolf", style: "ice wine", vintage: 2015})'
_ = driver.execute_query(query)
hneo4j.print_graph_stats(driver)

Number of nodes: 1
Number of edges: 0


### Performing a query using `driver`

In [354]:
# `execute_query()` wraps lower level APIs (e.g., Sessions) and it's used for simple cases.
query = "MATCH(n) RETURN COUNT(n) AS node_count"
result = driver.execute_query(query)

In [355]:
# The returned object is of type `Result`.
print_(result, "result")

[1mresult[0m= <class 'neo4j._work.eager_result.EagerResult'> EagerResult(records[38;5;241m=[39m[[38;5;241m<[39mRecord node_count[38;5;241m=[39m[38;5;241m1[39m[38;5;241m>[39m], summary[38;5;241m=[39m[38;5;241m<[39mneo4j[38;5;241m.[39m_work[38;5;241m.[39msummary[38;5;241m.[39mResultSummary [38;5;28mobject[39m at [38;5;241m0xffff6b981060[39m[38;5;241m>[39m, keys[38;5;241m=[39m[[38;5;124m'[39m[38;5;124mnode_count[39m[38;5;124m'[39m])


### 

In [398]:
import re

def type_to_str(obj):
    type_str = str(type(obj))
    # <class 'int'>
    m = re.search("(.*)<class '(.*)'>(.*)", type_str)
    if m:
        return "%s<%s>%s" % (m.group(1), m.group(2), m.group(3))
    return type_str

import helpers.hprint as hprint

def to_str(obj, depth=0):
    txt = ""
    if isinstance(obj, dict):
        import pprint
        txt = pprint.pformat(obj)
    if isinstance(obj, neo4j.graph.Node):
        txt = ""
        assert len(obj.labels) == 1
        txt += "label=%s\n" % (str(list(obj.labels)[0]))
        #txt += "properties=%s" % (str(dict(zip(obj.keys(), obj.items()))))
        txt += "properties=\n%s\n" % (to_str(dict(obj.items())))
    if isinstance(obj, neo4j.Record):
        record = obj
        assert len(record.keys()) == 1
        key = record.keys()[0]
        value = record[key]
        txt = "record=%s %s" % (type_to_str(value), to_str(value))
    if isinstance(obj, neo4j.EagerResult):
        txt = ""
        # The result contains information about the query results and summary of the query.
        records, summary, keys = result
        # `result.records` is the list of records returned by the query.
        txt += "records:\n" + to_str(records, depth=depth + 1)
        #txt += to_str_(summary, "summary") + "\n"
        # `result.keys` is the list of keys returned by the query.
        txt += "keys:\n" + to_str(keys, depth=depth + 1)
    if isinstance(obj, list):
        txt = []
        txt.append("%s [" % len(obj))
        for obj_tmp in obj:
            txt.append(to_str(obj_tmp, depth=depth + 1))
        txt.append("]\n")
        txt = "\n".join(txt)
    if isinstance(obj, (str, int, float, bool)):
        txt = "%s %s" % (type_to_str(obj), str(obj))
    if txt:
        txt = hprint.indent(txt, num_spaces=depth * 4)
        return txt
    raise ValueError("Invalid obj=%s of type=%s" % (obj, type(obj)))

In [365]:
result[0][0]["node_count"]

1

In [366]:
print(to_str(result[0]))

1 [
    record=<int> <int> 1
]



In [367]:
print(to_str(result))

records:
    1 [
            record=<int> <int> 1
    ]
keys:
    1 [
            <str> node_count
    ]



In [336]:
# The result contains information about the query results and summary of the query.
records, summary, keys = result

# `result.records` is the list of records returned by the query.
print_(records, "records")
print_(summary, "summary")
# `result.keys` is the list of keys returned by the query.
print_(keys, "keys")

[1mrecords[0m= <class 'list'> [[38;5;241m<[39mRecord node_count[38;5;241m=[39m[38;5;241m1[39m[38;5;241m>[39m]
[1msummary[0m= <class 'neo4j._work.summary.ResultSummary'> [38;5;241m<[39mneo4j[38;5;241m.[39m_work[38;5;241m.[39msummary[38;5;241m.[39mResultSummary [38;5;28mobject[39m at [38;5;241m0xffff6b725660[39m[38;5;241m>[39m
[1mkeys[0m= <class 'list'> [[38;5;124m'[39m[38;5;124mnode_count[39m[38;5;124m'[39m]


In [254]:
# Extract the first `record` returned by the query.
print_(records[0])

<class 'neo4j._data.Record'> [38;5;241m<[39mRecord node_count[38;5;241m=[39m[38;5;241m2[39m[38;5;241m>[39m


In [255]:
# Access the result.
records[0]["node_count"]

2

In [368]:
# Return a node.
query = "MATCH(n:Wine) RETURN n"
result = driver.execute_query(query)
print_result(result)

[1mrecords[0m= <class 'list'> [[38;5;241m<[39mRecord n[38;5;241m=[39m[38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21210[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m({[38;5;124m'[39m[38;5;124mWine[39m[38;5;124m'[39m}) properties[38;5;241m=[39m{[38;5;124m'[39m[38;5;124mvintage[39m[38;5;124m'[39m: [38;5;241m2015[39m, [38;5;124m'[39m[38;5;124mname[39m[38;5;124m'[39m: [38;5;124m'[39m[38;5;124mPrancing Wolf[39m[38;5;124m'[39m, [38;5;124m'[39m[38;5;124mstyle[39m[38;5;124m'[39m: [38;5;124m'[39m[38;5;124mice wine[39m[38;5;124m'[39m}[38;5;241m>>[39m]
[1msummary[0m= <class 'neo4j._work.summary.ResultSummary'> [38;5;241m<[39mneo4j[38;5;241m.[39m_work[38;5;241m.[39msummary[38;5;241m.[39mResultSummary [38;5;28mobject[39m at [38;5;241m0xffff6bc80370[39m[38;5;241m>[39m
[1mkeys[0m= <class 'list'> [[38;5;124m'[39m[38;5;124mn[39m[38;5;124m'

In [369]:
records = result[0]
print_(records, "records")
node = records[0]["n"]
print_(node, "node")

[1mrecords[0m= <class 'list'> [[38;5;241m<[39mRecord n[38;5;241m=[39m[38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21210[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m({[38;5;124m'[39m[38;5;124mWine[39m[38;5;124m'[39m}) properties[38;5;241m=[39m{[38;5;124m'[39m[38;5;124mvintage[39m[38;5;124m'[39m: [38;5;241m2015[39m, [38;5;124m'[39m[38;5;124mname[39m[38;5;124m'[39m: [38;5;124m'[39m[38;5;124mPrancing Wolf[39m[38;5;124m'[39m, [38;5;124m'[39m[38;5;124mstyle[39m[38;5;124m'[39m: [38;5;124m'[39m[38;5;124mice wine[39m[38;5;124m'[39m}[38;5;241m>>[39m]
[1mnode[0m= <class 'neo4j.graph.Node'> [38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21210[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m({[38;5;124m'[39m[38;5;124mWine[39m[38;5;124m'[39m}) properties[38;5;2

In [399]:
#to_str(node)
print(to_str(records))
#dict(records[0]["n"].items())

1 [
    record=<neo4j.graph.Node> label=Wine
    properties=
    {'name': 'Prancing Wolf', 'style': 'ice wine', 'vintage': 2015}

]



In [370]:
record = result[0][0]

In [371]:
record.keys()

['n']

In [165]:
# Access the properties of the node.
print(node["vintage"])
print(node["name"])
print(node["style"])

2015
Prancing Wolf
ice wine


In [166]:
# Return records.
query = "MATCH(n:Wine) RETURN n.name AS name, n.style as style"
result = driver.execute_query(query)
#
records = result[0]
print_(records[0], "records[0]")
#
record = records[0]
print_(record, "record")
#print_(record[0])

[1mrecords[0][0m= <class 'neo4j._data.Record'> [38;5;241m<[39mRecord name[38;5;241m=[39m[38;5;124m'[39m[38;5;124mPrancing Wolf[39m[38;5;124m'[39m style[38;5;241m=[39m[38;5;124m'[39m[38;5;124mice wine[39m[38;5;124m'[39m[38;5;241m>[39m
[1mrecord[0m= <class 'neo4j._data.Record'> [38;5;241m<[39mRecord name[38;5;241m=[39m[38;5;124m'[39m[38;5;124mPrancing Wolf[39m[38;5;124m'[39m style[38;5;241m=[39m[38;5;124m'[39m[38;5;124mice wine[39m[38;5;124m'[39m[38;5;241m>[39m


## Session

- Database activity is coordinated through `Session`s and `Transaction`s
- A `Session` is a container for a number of unit of works
    - Provide guarantees of causal consistency
    - Are lightweight opeation and not thread safe
- A `Transaction` is a unit of work that is either committed in its entirety or rolled back in case of failure

In [146]:
# Create `Session`.
session = driver.session(database="neo4j")
print("session.closed()=", session.closed())

# Run a query.
query = "MATCH (n) RETURN n"
_ = session.run(query)

# Close `Session`.
session.close()
print("session.closed()=", session.closed())

session.closed()= False
session.closed()= True


In [147]:
# Session can be created and destroyed using a block context, so that the session is closed
# properly in case of exceptions.
with driver.session() as session:
    result = session.run("MATCH (n) RETURN n")
    # ...

In [148]:
# Create `Session`.
session = driver.session(database="neo4j")
print("session.closed()=", session.closed())

session.closed()= False


- `driver.execute_query()` is a higher-level function introduced to simplify query execution,
  without needing to manage sessions and transactions explicitly.
- `session.run()` is used for executing queries within a specific session and it
  provides more control over the session and transaction lifecycle.                                             

In [29]:
# Count the number of nodes.
query = "MATCH(n) RETURN COUNT(n) AS node_count"

result = driver.execute_query(query)
print(type(result))
print(result)

<class 'neo4j._work.eager_result.EagerResult'>
EagerResult(records=[<Record node_count=2>], summary=<neo4j._work.summary.ResultSummary object at 0xffff6e073820>, keys=['node_count'])


- The returned result is typically a `neo4j.Result` object, which encapsulates the records, summary, and keys of the query execution.

In [30]:
# Parse the result into its components.
records, summary, keys = result
print(type(records), records)
print(type(summary), summary)
print(type(keys), keys)

<class 'list'> [<Record node_count=2>]
<class 'neo4j._work.summary.ResultSummary'> <neo4j._work.summary.ResultSummary object at 0xffff6e073820>
<class 'list'> ['node_count']


- `neo4j._data.Record` is a class in the Neo4j Python driver that represents a
  single row of results returned from a Cypher query
- Each `Record` object contains a series of named fields, corresponding to the
  columns of the result set

## Create 2 nodes

In [184]:
hneo4j.print_graph_stats(driver)
print("Deleting ...")
hneo4j.delete_all(driver)
hneo4j.print_graph_stats(driver)

Number of nodes: 3
Number of edges: 2
Deleting ...
Number of nodes: 0
Number of edges: 0


In [185]:
# `w` has `Wine` label and then various properties.
query = 'CREATE (w:Wine {name:"Prancing Wolf", style: "ice wine", vintage: 2015})'
_ = driver.execute_query(query)

In [186]:
# Create a node representing a publication.
query = 'CREATE (p:Publication {name: "Wine Expert Monthly"})'
_ = driver.execute_query(query)

In [187]:
# Since the publication reports on the wine, we can create an edge.
query = '''
    MATCH (p:Publication {name: "Wine Expert Monthly"}),
      (w:Wine {name: "Prancing Wolf", vintage: 2015})
      CREATE (p)-[r:reported_on]->(w)
    '''
_ = driver.execute_query(query)

In [188]:
hneo4j.print_graph_stats(driver)

Number of nodes: 2
Number of edges: 1


In [198]:
# Return a node.
#query = "MATCH(n:Wine) RETURN n"
#query = "MATCH (p:Publication) RETURN p"
query = "MATCH (p) RETURN p"
result = driver.execute_query(query)
#print_result(result)
records = result[0]
print(len(records), records)

2 [<Record p=<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21208' labels=frozenset({'Wine'}) properties={'vintage': 2015, 'name': 'Prancing Wolf', 'style': 'ice wine'}>>, <Record p=<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21209' labels=frozenset({'Publication'}) properties={'name': 'Wine Expert Monthly'}>>]


In [None]:
def to_str(obj):
    if isinstance(obj, Record):
        print

In [216]:
# Match a relationship.
query = """
    MATCH ()-[r]-()
    RETURN r
    """
result = driver.execute_query(query)
#print_result(result)
records = result[0]
#print(len(records), records)
record = records[0]
print_(record[0], "record")

relationship = record[0]
assert str(type(relationship)) == "<class 'abc.reported_on'>"

print(relationship.element_id)
# I guess it doesn't want to print/retrieve too much info from the nodes, but only
# keeps the internal IDs.
print(relationship.start_node)
print(relationship.end_node)

[1mrecord[0m= <class 'abc.reported_on'> [38;5;241m<[39mRelationship element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48262[39m[38;5;124m'[39m nodes[38;5;241m=[39m([38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21209[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m() properties[38;5;241m=[39m{}[38;5;241m>[39m, [38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21208[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m() properties[38;5;241m=[39m{}[38;5;241m>[39m) [38;5;28mtype[39m[38;5;241m=[39m[38;5;124m'[39m[38;5;124mreported_on[39m[38;5;124m'[39m properties[38;5;241m=[39m{}[38;5;241m>[39m
5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48262
<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21209' labels=frozenset() properties={}>
<Node eleme

In [229]:
query = """
    MATCH (a)-[r]->(b)
    WHERE a.name = 'Wine Expert Monthly' AND b.name = 'Prancing Wolf'
    RETURN r;
"""
result = driver.execute_query(query)
#print_result(result)
records = result[0]
print(len(records), records)

1 [<Record r=<Relationship element_id='5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48262' nodes=(<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21209' labels=frozenset() properties={}>, <Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21208' labels=frozenset() properties={}>) type='reported_on' properties={}>>]


In [228]:
# The edge direction matter, in fact there is no edge "Prancing Wolf" -> "Wine Expert Monthly",
# but only the other direction.
query = """
    MATCH (a)-[r]->(b)
    WHERE a.name = 'Prancing Wolf' AND b.name = 'Wine Expert Monthly'
    RETURN r;
"""
result = driver.execute_query(query)
#print_result(result)
records = result[0]
print(len(records), records)

0 []


In [230]:
# Search both direction.
query = """
    MATCH (a)-[r]-(b)
    WHERE a.name = 'Prancing Wolf' AND b.name = 'Wine Expert Monthly'
    RETURN r;
"""
result = driver.execute_query(query)
#print_result(result)
records = result[0]
print(len(records), records)

1 [<Record r=<Relationship element_id='5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48262' nodes=(<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21209' labels=frozenset() properties={}>, <Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21208' labels=frozenset() properties={}>) type='reported_on' properties={}>>]


In [221]:
# Match a relationship.
query = """
    MATCH ()-[r]-()
    WHERE elementId(r) = 0
    RETURN r
    """
result = driver.execute_query(query)
#print_result(result)
records = result[0]
print(len(records), records)
#record = records[0]
#print_(record[0], "record")

#relationship = record[0]

0 []


In [None]:
#def relationship_to_str(relationship):
    

In [213]:
def print_nested(obj, indent=0):
    """
    Recursively prints nested objects with indentation.
    
    Parameters:
    obj: The object to print. Can be a list, dictionary, or any other type.
    indent (int): The current indentation level. Defaults to 0.
    """
    spacing = ' ' * indent
    if isinstance(obj, dict):
        print(f"{spacing}{{")
        for key, value in obj.items():
            print(f"{spacing}  {key}:")
            print_nested(value, indent + 4)
        print(f"{spacing}}}")
    elif isinstance(obj, list):
        print(f"{spacing}[")
        for item in obj:
            print_nested(item, indent + 4)
        print(f"{spacing}]")
    else:
        print(f"{spacing}{obj}")

In [215]:
print_nested(record)

<Record r=<Relationship element_id='5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48262' nodes=(<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21209' labels=frozenset() properties={}>, <Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:21208' labels=frozenset() properties={}>) type='reported_on' properties={}>>
