In [1]:
!pip install --quiet jupyterlab-vim jupytex
!jupyter labextension enable

%load_ext autoreload
%autoreload 2

[0m

In [2]:
!pip install --quiet neo4j

[0m

In [3]:
import neo4j
print(neo4j.__version__)

5.22.0


In [237]:
import helpers.hdbg as hdbg
import helpers.hprint as hprint
import hneo4j

from hneo4j import to_str, print_

# Force reload.
# import importlib
# importlib.reload(hneo4j)

# Neo4j

In [179]:
from neo4j import GraphDatabase, RoutingControl

URI = "neo4j://neo4j:7687"
#URI = "bolt://neo4j:7687"
AUTH = ("neo4j", "testtest")

# def add_friend(driver, name, friend_name):
#     driver.execute_query(
#         "MERGE (a:Person {name: $name}) "
#         "MERGE (friend:Person {name: $friend_name}) "
#         "MERGE (a)-[:KNOWS]->(friend)",
#         name=name, friend_name=friend_name, database_="neo4j",
#     )


# def print_friends(driver, name):
#     records, _, _ = driver.execute_query(
#         "MATCH (a:Person)-[:KNOWS]->(friend) WHERE a.name = $name "
#         "RETURN friend.name ORDER BY friend.name",
#         name=name, database_="neo4j", routing_=RoutingControl.READ,
#     )
#     for record in records:
#         print(record["friend.name"])


# with GraphDatabase.driver(URI, auth=AUTH) as driver:
#     add_friend(driver, "Arthur", "Guinevere")
#     add_friend(driver, "Arthur", "Lancelot")
#     add_friend(driver, "Arthur", "Merlin")
#     print_friends(driver, "Arthur")

In [180]:
driver = GraphDatabase.driver(URI, auth=AUTH)

In [181]:
# Get the Neo4j version
hneo4j.print_neo4j_version(driver)

Name: Neo4j Kernel, Version: ['5.22.0'], Edition: community


In [182]:
session = driver.session(database="neo4j")

In [183]:
# # TODO(gp): It seems that it's not easy to have multiple DBs in neo4j community edition.
# with driver.session(database="system") as session:
#     # Run the CREATE DATABASE command
#     session.run(f"CREATE DATABASE {database_name}")
#     print(f"Database '{database_name}' created successfully.")

# # Example usage
# database_name = "wine"

In [184]:
hneo4j.print_graph_stats(driver)

Number of nodes: 2
Number of edges: 1


# Example

- Every application using Neo4j needs a `driver` object
- A `driver` object holds the details to the connection to a Neo4j database (e.g., URIs, credentials, and configuration).

In [185]:
# Test the connection to the DB without executing any query.
driver.verify_connectivity()

In [186]:
driver.get_server_info()

<neo4j.api.ServerInfo at 0xffff5e29b250>

### Populate the graph with one node.

In [337]:
hneo4j.print_graph_stats(driver)
print("Deleting ...")
hneo4j.delete_all(driver)
hneo4j.print_graph_stats(driver)

Number of nodes: 1
Number of edges: 0
Deleting ...
Number of nodes: 0
Number of edges: 0


In [341]:
# Constraints help sanitize data inputs by preventing writes that don't satisfy certain criteria.
# E.g., every `Wine` node in the graph should have a unique constraint.
query = 'CREATE CONSTRAINT IF NOT EXISTS FOR (w:Wine) REQUIRE w.name IS UNIQUE'
hneo4j.execute_query(driver, query)

In [344]:
query = 'CREATE (w:Wine {name:"Prancing Wolf", style: "ice wine", vintage: 2015})'
_ = driver.execute_query(query)
hneo4j.print_graph_stats(driver)

ConstraintError: {code: Neo.ClientError.Schema.ConstraintValidationFailed} {message: Node(52) already exists with label `Wine` and property `name` = 'Prancing Wolf'}

## Performing a query using `driver`

### Query returning the number of nodes

In [326]:
# `execute_query()` wraps lower level APIs (e.g., Sessions) and it's used for simple cases.
query = "MATCH(n) RETURN COUNT(n) AS node_count"
result = driver.execute_query(query)

# The returned object is of type `Result`.
hdbg.dassert_isinstance(result, neo4j.EagerResult)

In [327]:
print(to_str(result))

records:
  1 [
      1 [
      <str> node_count ->
            <int> 0
      ]

  ]
keys:
  1 [
      <str> node_count
  ]



In [328]:
result[0][0]["node_count"]

0

In [329]:
# The result contains information about the query results and summary of the query.
records, summary, keys = result

# `result.records` is the list of records returned by the query.
print_(records, "records")
print_(summary, "summary")
# `result.keys` is the list of keys returned by the query.
print_(keys, "keys")

[1mrecords[0m= <class 'list'> [[38;5;241m<[39mRecord node_count[38;5;241m=[39m[38;5;241m0[39m[38;5;241m>[39m]
[1msummary[0m= <class 'neo4j._work.summary.ResultSummary'> [38;5;241m<[39mneo4j[38;5;241m.[39m_work[38;5;241m.[39msummary[38;5;241m.[39mResultSummary [38;5;28mobject[39m at [38;5;241m0xffff5d8566e0[39m[38;5;241m>[39m
[1mkeys[0m= <class 'list'> [[38;5;124m'[39m[38;5;124mnode_count[39m[38;5;124m'[39m]


In [330]:
# Extract the first `record` returned by the query.
print_(records[0])

<class 'neo4j._data.Record'> [38;5;241m<[39mRecord node_count[38;5;241m=[39m[38;5;241m0[39m[38;5;241m>[39m


In [331]:
# Access the result.
records[0]["node_count"]

0

### Query matching a node

In [332]:
# Return a node.
query = "MATCH(n:Wine) RETURN n"
result = driver.execute_query(query)
print(to_str(result))

records:
  0 [
  ]
keys:
  1 [
      <str> n
  ]



In [333]:
records = result[0]
print_(records, "records")
node = records[0]["n"]
print_(node, "node")

[1mrecords[0m= <class 'list'> []


IndexError: list index out of range

In [334]:
print(to_str(records))

0 [
]



In [162]:
# Access the properties of the node.
print(node["vintage"])
print(node["name"])
print(node["style"])

2015
Prancing Wolf
ice wine


### Query returning multiple records

In [163]:
# Return records.
query = "MATCH(n:Wine) RETURN n.name AS name, n.style as style"
result = driver.execute_query(query)
print(to_str(result))

records:
  1 [
      2 [
      <str> name ->
            <str> Prancing Wolf
      <str> style ->
            <str> ice wine
      ]

  ]
keys:
  2 [
      <str> name
      <str> style
  ]



## Session

- Database activity is coordinated through `Session`s and `Transaction`s
- A `Session` is a container for a number of unit of works
    - Provide guarantees of causal consistency
    - Are lightweight opeation and not thread safe
- A `Transaction` is a unit of work that is either committed in its entirety or rolled back in case of failure

In [None]:
# Create `Session`.
session = driver.session(database="neo4j")
print("session.closed()=", session.closed())

# Run a query.
query = "MATCH (n) RETURN n"
_ = session.run(query)

# Close `Session`.
session.close()
print("session.closed()=", session.closed())

In [None]:
# Session can be created and destroyed using a block context, so that the session is closed
# properly in case of exceptions.
with driver.session() as session:
    result = session.run("MATCH (n) RETURN n")
    # ...

In [None]:
# Create `Session`.
session = driver.session(database="neo4j")
print("session.closed()=", session.closed())

- `driver.execute_query()` is a higher-level function introduced to simplify query execution,
  without needing to manage sessions and transactions explicitly.
- `session.run()` is used for executing queries within a specific session and it
  provides more control over the session and transaction lifecycle.                                             

In [None]:
# Count the number of nodes.
query = "MATCH(n) RETURN COUNT(n) AS node_count"

result = driver.execute_query(query)
print(type(result))
print(result)

- The returned result is typically a `neo4j.Result` object, which encapsulates the records, summary, and keys of the query execution.

In [None]:
# Parse the result into its components.
records, summary, keys = result
print(type(records), records)
print(type(summary), summary)
print(type(keys), keys)

- `neo4j._data.Record` is a class in the Neo4j Python driver that represents a
  single row of results returned from a Cypher query
- Each `Record` object contains a series of named fields, corresponding to the
  columns of the result set

## Create 2 nodes

In [249]:
hneo4j.print_graph_stats(driver)
print("Deleting ...")
hneo4j.delete_all(driver)
hneo4j.print_graph_stats(driver)

Number of nodes: 2
Number of edges: 4
Deleting ...
Number of nodes: 0
Number of edges: 0


In [250]:
# `w` has `Wine` label and then various properties.
query = 'CREATE (w:Wine {name:"Prancing Wolf", style: "ice wine", vintage: 2015})'
_ = driver.execute_query(query)

In [251]:
# Create a node representing a publication.
query = 'CREATE (p:Publication {name: "Wine Expert Monthly"})'
_ = driver.execute_query(query)

In [252]:
# Since the publication reports on the wine, we can create an edge.
query = '''
    MATCH (p:Publication {name: "Wine Expert Monthly"}),
      (w:Wine {name: "Prancing Wolf", vintage: 2015})
      CREATE (p)-[r:reported_on]->(w)
    '''
_ = driver.execute_query(query)

In [253]:
hneo4j.print_graph_stats(driver)

Number of nodes: 2
Number of edges: 1


### Query matching 2 nodes

In [199]:
# Return a node.
#query = "MATCH(n:Wine) RETURN n"
#query = "MATCH (p:Publication) RETURN p"
query = "MATCH (p) RETURN p"
result = driver.execute_query(query)
print(to_str(result))

records:
  2 [
      1 [
      <str> p ->
              label=['Wine']
              properties={'name': 'Prancing Wolf', 'style': 'ice wine', 'vintage': 2015}

      ]

      1 [
      <str> p ->
              label=['Publication']
              properties={'name': 'Wine Expert Monthly'}

      ]

  ]
keys:
  1 [
      <str> p
  ]



### Match a relationship

In [212]:
# Match a relationship.
query = """
    MATCH ()-[r]-()
    RETURN r
    """
result = driver.execute_query(query)
#print_result(result)
records = result[0]
#print(len(records), records)
record = records[0]
print_(record[0], "record")

relationship = record[0]
assert str(type(relationship)) == "<class 'abc.reported_on'>"

print(relationship.element_id)
# I guess it doesn't want to print/retrieve too much info from the nodes, but only
# keeps the internal IDs.
print(dir(relationship))
print(relationship.start_node)
print(relationship.end_node)

[1mrecord[0m= <class 'abc.reported_on'> [38;5;241m<[39mRelationship element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48286[39m[38;5;124m'[39m nodes[38;5;241m=[39m([38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:38[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m() properties[38;5;241m=[39m{}[38;5;241m>[39m, [38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:37[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m() properties[38;5;241m=[39m{}[38;5;241m>[39m) [38;5;28mtype[39m[38;5;241m=[39m[38;5;124m'[39m[38;5;124mreported_on[39m[38;5;124m'[39m properties[38;5;241m=[39m{}[38;5;241m>[39m
5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48286
['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__contains__', '__delattr__', '__dict

In [220]:
print(relationship.values)

<bound method Entity.values of <Relationship element_id='5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48286' nodes=(<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:38' labels=frozenset() properties={}>, <Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:37' labels=frozenset() properties={}>) type='reported_on' properties={}>>


In [210]:
query = """
    MATCH (a)-[r]->(b)
    WHERE a.name = 'Wine Expert Monthly' AND b.name = 'Prancing Wolf'
    RETURN r;
"""
result = driver.execute_query(query)
print(to_str(result))

records:
  1 [
      1 [
      <str> r ->
              start_node=  label=[]
              properties={}
              end_node=  label=[]
              properties={}
              type=<str> reported_on  properties={}

      ]

  ]
keys:
  1 [
      <str> r
  ]



In [205]:
# The edge direction matter, in fact there is no edge "Prancing Wolf" -> "Wine Expert Monthly",
# but only the other direction.
query = """
    MATCH (a)-[r]->(b)
    WHERE a.name = 'Prancing Wolf' AND b.name = 'Wine Expert Monthly'
    RETURN r;
"""
result = driver.execute_query(query)
print(to_str(result))

records:
  0 [
  ]
keys:
  1 [
      <str> r
  ]



In [243]:
# Search both direction.
query = """
    MATCH (a)-[r]-(b)
    WHERE a.name = 'Prancing Wolf' AND b.name = 'Wine Expert Monthly'
    RETURN r;
"""
result = driver.execute_query(query)
hneo4j.print_result(result)

# Store the relationship id for the next query.
relationship_id = result[0][0]["r"].element_id
print(hprint.to_str("relationship_id"))

start_node = result[0][0]["r"].start_node.element_id
print(hprint.to_str("start_node"))

end_node = result[0][0]["r"].end_node.element_id
print(hprint.to_str("end_node"))

[1mrecords[0m= <class 'list'> [[38;5;241m<[39mRecord r[38;5;241m=[39m[38;5;241m<[39mRelationship element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m5:907b90c5-77b7-40ee-bd2b-900a55534cf9:48286[39m[38;5;124m'[39m nodes[38;5;241m=[39m([38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:38[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m() properties[38;5;241m=[39m{}[38;5;241m>[39m, [38;5;241m<[39mNode element_id[38;5;241m=[39m[38;5;124m'[39m[38;5;124m4:907b90c5-77b7-40ee-bd2b-900a55534cf9:37[39m[38;5;124m'[39m labels[38;5;241m=[39m[38;5;28mfrozenset[39m() properties[38;5;241m=[39m{}[38;5;241m>[39m) [38;5;28mtype[39m[38;5;241m=[39m[38;5;124m'[39m[38;5;124mreported_on[39m[38;5;124m'[39m properties[38;5;241m=[39m{}[38;5;241m>>[39m]
[1msummary[0m= <class 'neo4j._work.summary.ResultSummary'> [38;5;241m<[39mneo4j[38;5;241m.[39m_work[38;5;241m.[39msum

In [244]:
# Find the relationship by element_id.
query = """
    MATCH ()-[r]->()
    WHERE elementId(r) = $relationship_id
    RETURN startNode(r) AS start_node, endNode(r) AS end_node
    """
result = driver.execute_query(query, relationship_id=relationship_id)
print(result)

EagerResult(records=[<Record start_node=<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:38' labels=frozenset({'Publication'}) properties={'name': 'Wine Expert Monthly'}> end_node=<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:37' labels=frozenset({'Wine'}) properties={'vintage': 2015, 'name': 'Prancing Wolf', 'style': 'ice wine'}>>], summary=<neo4j._work.summary.ResultSummary object at 0xffff5dbe6f80>, keys=['start_node', 'end_node'])


In [245]:
# Find a node by element_id.
query = """
    MATCH (n)
    WHERE elementId(n) = $node_id
    RETURN n
    """
result = driver.execute_query(query, node_id=start_node)
print(result)

EagerResult(records=[<Record n=<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:38' labels=frozenset({'Publication'}) properties={'name': 'Wine Expert Monthly'}>>], summary=<neo4j._work.summary.ResultSummary object at 0xffff5dbe6590>, keys=['n'])


In [254]:
# Find two nodes and add a relationship between them.
query = """
    MATCH (p:Publication {name: "Wine Expert Monthly"}), (w:Wine {name: "Prancing Wolf"})
        CREATE (p)-[r:reported_on {rating: 97}]->(w)
      """
_ = driver.execute_query(query, node_id=start_node)

In [258]:
# We could add "Riesling" as a property to the node, or create a new node.
query = 'CREATE (g:GrapeType {name: "Riesling"})'
_ = driver.execute_query(query)

query = """
    MATCH (w:Wine {name: "Prancing Wolf"}), (g:GrapeType {name: "Riesling"})
        CREATE (w)-[r:grape_type]->(g)
    """
_ = driver.execute_query(query)

In [259]:
# Create a new node.
query = 'CREATE (e: EphemeralNode {name: "short lived"})'
_ = driver.execute_query(query)

# Establish a relationship between the new node and an existing one.
query = """
    MATCH (w:Wine {name: "Prancing Wolf"}), (e:EphemeralNode {name: "short lived"})
        CREATE (w)-[r:short_lived_relationship]->(e)
"""
_ = driver.execute_query(query)

In [269]:
# You can't delete a node that has still relationships associated with it.
query = "MATCH (e:EphemeralNode) DELETE e"
try:
    _ = driver.execute_query(query)
except neo4j.exceptions.Neo4jError as e:
    print("ERROR\n" + str(e))

ERROR
{code: Neo.ClientError.Schema.ConstraintValidationFailed} {message: Cannot delete node<43>, because it still has relationships. To delete this node, you must first delete its relationships.}


In [278]:
# query = "MATCH (e:EphemeralNode) RETURN e"
# result = driver.execute_query(query)
# print(to_str(result))
# result[0][0]

records:
  1 [
      1 [
      <str> e ->
              label=['EphemeralNode']
              properties={'name': 'short lived'}

      ]

  ]
keys:
  1 [
      <str> e
  ]



<Record e=<Node element_id='4:907b90c5-77b7-40ee-bd2b-900a55534cf9:43' labels=frozenset({'EphemeralNode'}) properties={'name': 'short lived'}>>

In [281]:
# Delete the relationship and then the node.
query = "MATCH ()-[r:short_lived_relationship]-() DELETE r"
_ = driver.execute_query(query)
query = "MATCH (e:EphemeralNode) DELETE e"
_ = driver.execute_query(query)

In [None]:
# Assume that there are multiple wineries and each winery produces more than one wine
# In a RDBMS we could use
# a separate table for each winery, storing wines they produce as rows
# ```
# 
# 
# - a table storing wines as rows

In [282]:
query = 'CREATE (wr:Winery {name: "Prancing Wolf Winery"})'
_ = driver.execute_query(query)

query = """
    MATCH (w:Wine {name: "Prancing Wolf"}), (wr:Winery {name: "Prancing Wolf Winery"})
        CREATE (wr)-[r:prodced]->(w)
    """
_ = driver.execute_query(query)

In [295]:
query = """
$ CREATE (w:Wine {name:"Prancing Wolf", style: "Kabinett", vintage: 2002})
$ CREATE (w:Wine {name: "Prancing Wolf", style: "Spatlese", vintage: 2010})
$ MATCH (wr:Winery {name: "Prancing Wolf"}), (w:Wine {name: "Prancing Wolf"})
      CREATE (wr)-[r:produced]->(w)
$ MATCH (w:Wine),(g:GrapeType {name: "Riesling"})
      CREATE (w)-[r:grape_type]->(g)
    """
queries = hneo4j.extract_chunks(query)
hneo4j.execute_query(driver, queries)

## Schemaless social

In [296]:
# Alice likes ice wine.
query = """
$ CREATE (p:Person {name: "Alice"})
$ MATCH (p:Person {name: "Alice"}), (w:Wine {name: "Prancing Wolf", style: "ice wine"})
    CREATE (p)-[r:likes]->(w)
"""
queries = hneo4j.extract_chunks(query)
hneo4j.execute_query(driver, queries)

In [297]:
# Tom likes Kabinett and ice wine, and trusts anything written by Wine Expert Monthly.
query = """
$ CREATE (p:Person {name: "Tom"})
$ MATCH (p:Person {name: "Tom"}), (w:Wine {name: "Prancing Wolf", style: "ice wine"})
    CREATE (p)-[r:likes]->(w)
$ MATCH (p:Person {name: "Tom"}), (pub:Publication {name: "Wine Expert Monthly"})
    CREATE (p)-[r:trusts]->(pub)
"""
queries = hneo4j.extract_chunks(query)
hneo4j.execute_query(driver, queries)

In [318]:
# Patty is friends with both Tom and Alice but has no favorite wine.
query = """
$ CREATE (p:Person {name: "Patty"})
$ MATCH (p1:Person {name: "Patty"}), (p2:Person {name: "Tom"})
    CREATE (p1)-[r:friends]->(p2)
$ MATCH (p1:Person {name: "Patty"}), (p2:Person {name: "Alice"})
    CREATE (p1)-[r:friends]->(p2)
"""
queries = hneo4j.extract_chunks(query)
hneo4j.execute_query(driver, queries)

In [None]:
## CREATE CONSTRAINT unique_person_id IF NOT EXISTS FOR (n:Person) REQUIRE n.id IS UNIQUE
# MERGE (n:Person {id: 123})
# ON CREATE SET n.name = 'John Doe', n.age = 30

In [319]:
net = hneo4j.plot_graph(driver)
net.show("ex.html")

ex.html


### Stepping stones

In [None]:
query = 'MATCH (p:Person {name: "Alice"})-->(n) RETURN n'

In [347]:
# Constraints help sanitize data inputs by preventing writes that don't satisfy certain criteria.
# E.g., every `Wine` node in the graph should have a unique constraint.
query = 'CREATE CONSTRAINT IF NOT EXISTS FOR (w:Wine) REQUIRE w.name IS UNIQUE'
hneo4j.execute_query(driver, query)