In [1]:
!pip install --quiet jupyterlab-vim jupytex
!jupyter labextension enable

%load_ext autoreload
%autoreload 2

[0m

In [2]:
!pip install --quiet neo4j

[0m

In [3]:
import neo4j
print(neo4j.__version__)

5.22.0


In [237]:
import helpers.hdbg as hdbg
import helpers.hprint as hprint
import hneo4j

from hneo4j import to_str, print_

# Force reload.
# import importlib
# importlib.reload(hneo4j)

# Neo4j

In [179]:
from neo4j import GraphDatabase, RoutingControl

URI = "neo4j://neo4j:7687"
#URI = "bolt://neo4j:7687"
AUTH = ("neo4j", "testtest")

# def add_friend(driver, name, friend_name):
#     driver.execute_query(
#         "MERGE (a:Person {name: $name}) "
#         "MERGE (friend:Person {name: $friend_name}) "
#         "MERGE (a)-[:KNOWS]->(friend)",
#         name=name, friend_name=friend_name, database_="neo4j",
#     )


# def print_friends(driver, name):
#     records, _, _ = driver.execute_query(
#         "MATCH (a:Person)-[:KNOWS]->(friend) WHERE a.name = $name "
#         "RETURN friend.name ORDER BY friend.name",
#         name=name, database_="neo4j", routing_=RoutingControl.READ,
#     )
#     for record in records:
#         print(record["friend.name"])


# with GraphDatabase.driver(URI, auth=AUTH) as driver:
#     add_friend(driver, "Arthur", "Guinevere")
#     add_friend(driver, "Arthur", "Lancelot")
#     add_friend(driver, "Arthur", "Merlin")
#     print_friends(driver, "Arthur")

In [180]:
driver = GraphDatabase.driver(URI, auth=AUTH)

In [181]:
# Get the Neo4j version
hneo4j.print_neo4j_version(driver)

Name: Neo4j Kernel, Version: ['5.22.0'], Edition: community


In [182]:
session = driver.session(database="neo4j")

In [183]:
# # TODO(gp): It seems that it's not easy to have multiple DBs in neo4j community edition.
# with driver.session(database="system") as session:
#     # Run the CREATE DATABASE command
#     session.run(f"CREATE DATABASE {database_name}")
#     print(f"Database '{database_name}' created successfully.")

# # Example usage
# database_name = "wine"

In [184]:
hneo4j.print_graph_stats(driver)

Number of nodes: 2
Number of edges: 1


# Example

- Every application using Neo4j needs a `driver` object
- A `driver` object holds the details to the connection to a Neo4j database (e.g., URIs, credentials, and configuration).

In [185]:
# Test the connection to the DB without executing any query.
driver.verify_connectivity()

In [186]:
driver.get_server_info()

<neo4j.api.ServerInfo at 0xffff5e29b250>

### Populate the graph with one node.

In [187]:
hneo4j.print_graph_stats(driver)
print("Deleting ...")
hneo4j.delete_all(driver)
hneo4j.print_graph_stats(driver)

Number of nodes: 2
Number of edges: 1
Deleting ...
Number of nodes: 0
Number of edges: 0


In [188]:
query = 'CREATE (w:Wine {name:"Prancing Wolf", style: "ice wine", vintage: 2015})'
_ = driver.execute_query(query)
hneo4j.print_graph_stats(driver)

Number of nodes: 1
Number of edges: 0


## Performing a query using `driver`

### Query returning the number of nodes

In [189]:
# `execute_query()` wraps lower level APIs (e.g., Sessions) and it's used for simple cases.
query = "MATCH(n) RETURN COUNT(n) AS node_count"
result = driver.execute_query(query)

# The returned object is of type `Result`.
hdbg.dassert_isinstance(result, neo4j.EagerResult)

In [190]:
print(to_str(result))

records:
  1 [
      1 [
      <str> node_count ->
            <int> 1
      ]

  ]
keys:
  1 [
      <str> node_count
  ]



In [154]:
result[0][0]["node_count"]

1

In [155]:
# The result contains information about the query results and summary of the query.
records, summary, keys = result

# `result.records` is the list of records returned by the query.
print_(records, "records")
print_(summary, "summary")
# `result.keys` is the list of keys returned by the query.
print_(keys, "keys")

[1mrecords[0m= <class 'list'> [[38;5;241m<[39mRecord node_count[38;5;241m=[39m[38;5;241m1[39m[38;5;241m>[39m]
[1msummary[0m= <class 'neo4j._work.summary.ResultSummary'> [38;5;241m<[39mneo4j[38;5;241m.[39m_work[38;5;241m.[39msummary[38;5;241m.[39mResultSummary [38;5;28mobject[39m at [38;5;241m0xffff5e2994e0[39m[38;5;241m>[39m
[1mkeys[0m= <class 'list'> [[38;5;124m'[39m[38;5;124mnode_count[39m[38;5;124m'[39m]


In [156]:
# Extract the first `record` returned by the query.
print_(records[0])

<class 'neo4j._data.Record'> [38;5;241m<[39mRecord node_count[38;5;241m=[39m[38;5;241m1[39m[38;5;241m>[39m


In [157]:
# Access the result.
records[0]["node_count"]

1

### Query matching a node

In [158]:
# Return a node.
query = "MATCH(n:Wine) RETURN n"
result = driver.execute_query(query)
print(to_str(result))

records:
  1 [
      1 [
      <str> n ->
              label=['Wine']
              properties={'name': 'Prancing Wolf', 'style': 'ice wine', 'vintage': 2015}

      ]

  ]
keys:
  1 [
      <str> n
  ]



In [159]:
records = result[0]
print_(records, "records")
node = records[0]["n"]
print_(node, "node")

[1mrecords[0m= <class 'list'> [[38;5;241m<[39mRecord n[38;5;241m=[39m[38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:33[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m({[38;5;124m'[39m[38;5;124mWine[39m[38;5;124m'[39m}) properties[38;5;241m=[39m{[38;5;124m'[39m[38;5;124mvintage[39m[38;5;124m'[39m: [38;5;241m2015[39m, [38;5;124m'[39m[38;5;124mname[39m[38;5;124m'[39m: [38;5;124m'[39m[38;5;124mPrancing Wolf[39m[38;5;124m'[39m, [38;5;124m'[39m[38;5;124mstyle[39m[38;5;124m'[39m: [38;5;124m'[39m[38;5;124mice wine[39m[38;5;124m'[39m}[38;5;241m>>[39m]
[1mnode[0m= <class 'neo4j.graph.Node'> [38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:33[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m({[38;5;124m'[39m[38;5;124mWine[39m[38;5;124m'[39m}) properties[38;5;241m=[

In [160]:
print(to_str(records))

1 [
  1 [
  <str> n ->
        label=['Wine']
        properties={'name': 'Prancing Wolf', 'style': 'ice wine', 'vintage': 2015}

  ]

]



In [162]:
# Access the properties of the node.
print(node["vintage"])
print(node["name"])
print(node["style"])

2015
Prancing Wolf
ice wine


### Query returning multiple records

In [163]:
# Return records.
query = "MATCH(n:Wine) RETURN n.name AS name, n.style as style"
result = driver.execute_query(query)
print(to_str(result))

records:
  1 [
      2 [
      <str> name ->
            <str> Prancing Wolf
      <str> style ->
            <str> ice wine
      ]

  ]
keys:
  2 [
      <str> name
      <str> style
  ]



## Session

- Database activity is coordinated through `Session`s and `Transaction`s
- A `Session` is a container for a number of unit of works
    - Provide guarantees of causal consistency
    - Are lightweight opeation and not thread safe
- A `Transaction` is a unit of work that is either committed in its entirety or rolled back in case of failure

In [None]:
# Create `Session`.
session = driver.session(database="neo4j")
print("session.closed()=", session.closed())

# Run a query.
query = "MATCH (n) RETURN n"
_ = session.run(query)

# Close `Session`.
session.close()
print("session.closed()=", session.closed())

In [None]:
# Session can be created and destroyed using a block context, so that the session is closed
# properly in case of exceptions.
with driver.session() as session:
    result = session.run("MATCH (n) RETURN n")
    # ...

In [None]:
# Create `Session`.
session = driver.session(database="neo4j")
print("session.closed()=", session.closed())

- `driver.execute_query()` is a higher-level function introduced to simplify query execution,
  without needing to manage sessions and transactions explicitly.
- `session.run()` is used for executing queries within a specific session and it
  provides more control over the session and transaction lifecycle.                                             

In [None]:
# Count the number of nodes.
query = "MATCH(n) RETURN COUNT(n) AS node_count"

result = driver.execute_query(query)
print(type(result))
print(result)

- The returned result is typically a `neo4j.Result` object, which encapsulates the records, summary, and keys of the query execution.

In [None]:
# Parse the result into its components.
records, summary, keys = result
print(type(records), records)
print(type(summary), summary)
print(type(keys), keys)

- `neo4j._data.Record` is a class in the Neo4j Python driver that represents a
  single row of results returned from a Cypher query
- Each `Record` object contains a series of named fields, corresponding to the
  columns of the result set

## Create 2 nodes

In [191]:
hneo4j.print_graph_stats(driver)
print("Deleting ...")
hneo4j.delete_all(driver)
hneo4j.print_graph_stats(driver)

Number of nodes: 1
Number of edges: 0
Deleting ...
Number of nodes: 0
Number of edges: 0


In [192]:
# `w` has `Wine` label and then various properties.
query = 'CREATE (w:Wine {name:"Prancing Wolf", style: "ice wine", vintage: 2015})'
_ = driver.execute_query(query)

In [193]:
# Create a node representing a publication.
query = 'CREATE (p:Publication {name: "Wine Expert Monthly"})'
_ = driver.execute_query(query)

In [194]:
# Since the publication reports on the wine, we can create an edge.
query = '''
    MATCH (p:Publication {name: "Wine Expert Monthly"}),
      (w:Wine {name: "Prancing Wolf", vintage: 2015})
      CREATE (p)-[r:reported_on]->(w)
    '''
_ = driver.execute_query(query)

In [195]:
hneo4j.print_graph_stats(driver)

Number of nodes: 2
Number of edges: 1


### Query matching 2 nodes

In [199]:
# Return a node.
#query = "MATCH(n:Wine) RETURN n"
#query = "MATCH (p:Publication) RETURN p"
query = "MATCH (p) RETURN p"
result = driver.execute_query(query)
print(to_str(result))

records:
  2 [
      1 [
      <str> p ->
              label=['Wine']
              properties={'name': 'Prancing Wolf', 'style': 'ice wine', 'vintage': 2015}

      ]

      1 [
      <str> p ->
              label=['Publication']
              properties={'name': 'Wine Expert Monthly'}

      ]

  ]
keys:
  1 [
      <str> p
  ]



### Match a relationship

In [212]:
# Match a relationship.
query = """
    MATCH ()-[r]-()
    RETURN r
    """
result = driver.execute_query(query)
#print_result(result)
records = result[0]
#print(len(records), records)
record = records[0]
print_(record[0], "record")

relationship = record[0]
assert str(type(relationship)) == "<class 'abc.reported_on'>"

print(relationship.element_id)
# I guess it doesn't want to print/retrieve too much info from the nodes, but only
# keeps the internal IDs.
print(dir(relationship))
print(relationship.start_node)
print(relationship.end_node)

[1mrecord[0m= <class 'abc.reported_on'> [38;5;241m<[39mRelationship element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48286[39m[38;5;124m'[39m nodes[38;5;241m=[39m([38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:38[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m() properties[38;5;241m=[39m{}[38;5;241m>[39m, [38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:37[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m() properties[38;5;241m=[39m{}[38;5;241m>[39m) [38;5;28mtype[39m[38;5;241m=[39m[38;5;124m'[39m[38;5;124mreported_on[39m[38;5;124m'[39m properties[38;5;241m=[39m{}[38;5;241m>[39m
5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48286
['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__contains__', '__delattr__', '__dict

In [220]:
print(relationship.values)

<bound method Entity.values of <Relationship element_id='5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48286' nodes=(<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:38' labels=frozenset() properties={}>, <Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:37' labels=frozenset() properties={}>) type='reported_on' properties={}>>


In [210]:
query = """
    MATCH (a)-[r]->(b)
    WHERE a.name = 'Wine Expert Monthly' AND b.name = 'Prancing Wolf'
    RETURN r;
"""
result = driver.execute_query(query)
print(to_str(result))

records:
  1 [
      1 [
      <str> r ->
              start_node=  label=[]
              properties={}
              end_node=  label=[]
              properties={}
              type=<str> reported_on  properties={}

      ]

  ]
keys:
  1 [
      <str> r
  ]



In [205]:
# The edge direction matter, in fact there is no edge "Prancing Wolf" -> "Wine Expert Monthly",
# but only the other direction.
query = """
    MATCH (a)-[r]->(b)
    WHERE a.name = 'Prancing Wolf' AND b.name = 'Wine Expert Monthly'
    RETURN r;
"""
result = driver.execute_query(query)
print(to_str(result))

records:
  0 [
  ]
keys:
  1 [
      <str> r
  ]



In [243]:
# Search both direction.
query = """
    MATCH (a)-[r]-(b)
    WHERE a.name = 'Prancing Wolf' AND b.name = 'Wine Expert Monthly'
    RETURN r;
"""
result = driver.execute_query(query)
hneo4j.print_result(result)

# Store the relationship id for the next query.
relationship_id = result[0][0]["r"].element_id
print(hprint.to_str("relationship_id"))

start_node = result[0][0]["r"].start_node.element_id
print(hprint.to_str("start_node"))

end_node = result[0][0]["r"].end_node.element_id
print(hprint.to_str("end_node"))

[1mrecords[0m= <class 'list'> [[38;5;241m<[39mRecord r[38;5;241m=[39m[38;5;241m<[39mRelationship element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48286[39m[38;5;124m'[39m nodes[38;5;241m=[39m([38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:38[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m() properties[38;5;241m=[39m{}[38;5;241m>[39m, [38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:37[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m() properties[38;5;241m=[39m{}[38;5;241m>[39m) [38;5;28mtype[39m[38;5;241m=[39m[38;5;124m'[39m[38;5;124mreported_on[39m[38;5;124m'[39m properties[38;5;241m=[39m{}[38;5;241m>>[39m]
[1msummary[0m= <class 'neo4j._work.summary.ResultSummary'> [38;5;241m<[39mneo4j[38;5;241m.[39m_work[38;5;241m.[39msum

In [244]:
# Find the relationship by element_id.
query = """
    MATCH ()-[r]->()
    WHERE elementId(r) = $relationship_id
    RETURN startNode(r) AS start_node, endNode(r) AS end_node
    """
result = driver.execute_query(query, relationship_id=relationship_id)
print(result)

EagerResult(records=[<Record start_node=<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:38' labels=frozenset({'Publication'}) properties={'name': 'Wine Expert Monthly'}> end_node=<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:37' labels=frozenset({'Wine'}) properties={'vintage': 2015, 'name': 'Prancing Wolf', 'style': 'ice wine'}>>], summary=<neo4j._work.summary.ResultSummary object at 0xffff5dbe6f80>, keys=['start_node', 'end_node'])


In [245]:
# Find a node by element_id.
query = """
    MATCH (n)
    WHERE elementId(n) = $node_id
    RETURN n
    """
result = driver.execute_query(query, node_id=start_node)
print(result)

EagerResult(records=[<Record n=<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:38' labels=frozenset({'Publication'}) properties={'name': 'Wine Expert Monthly'}>>], summary=<neo4j._work.summary.ResultSummary object at 0xffff5dbe6590>, keys=['n'])


In [247]:
# Find two nodes and add a relationship between them.
query = """
    MATCH (p:Publication {name: "Wine Expert Monthly"}), (w:Wine {name: "Prancing Wolf"})
      CREATE (p)-[r:reported_on {rating: 97}]->(w)
      """
_ = driver.execute_query(query, node_id=start_node)

In [None]:
#
CREATE (g:GrapeType {name: "Riesling"})k