In [None]:
#!pip install py2neo
#!pip install sentence-transformers
#!pip install neo4j




[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
# Import the necessary libraries
import os
from neo4j import GraphDatabase
from sentence_transformers import SentenceTransformer, util


  session.write_transaction(add_node, "Node", node)
  session.write_transaction(add_relationship, "Node", edge[0], "Node", edge[1], "RELATED_TO")


In [None]:
# Get connection parameters from environment variables

uri = os.getenv('NEO4J_URI', 'neo4j://b92ae674.databases.neo4j.io')
user = os.getenv('NEO4J_USER', 'neo4j')
password = os.getenv('NEO4J_PASSWORD', 'password')

In [None]:
# Clear the Neo4j database beforehand if needed
def clear_db(uri, user, password):
    driver = GraphDatabase.driver(uri, auth=(user, password))
    with driver.session() as session:
        session.run("MATCH (n) DETACH DELETE n")

In [None]:
# Function to create nodes and relationships in Neo4j
def create_knowledge_graph(uri, user, password, nodes, edges):
    driver = GraphDatabase.driver(uri, auth=(user, password))
    
    def add_node(tx, label, name):
        tx.run("MERGE (a:" + label + " {name: $name})", name=name)
    
    def add_relationship(tx, label1, name1, label2, name2, relationship):
        tx.run("MATCH (a:" + label1 + " {name: $name1}), (b:" + label2 + " {name: $name2}) "
               "MERGE (a)-[r:" + relationship + "]->(b)",
               name1=name1, name2=name2)
    
    with driver.session() as session:
        for node in nodes:
            session.execute_write(add_node, "Node", node)
        for edge in edges:
            session.execute_write(add_relationship, "Node", edge[0], "Node", edge[1], "RELATED_TO")
    
    driver.close()

# Function to extract nodes and edges from text using sentence-transformers
def extract_nodes_edges(text):
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    sentences = text.split('.')
    embeddings = model.encode(sentences, convert_to_tensor=True)
    
    # Use cosine similarity to find related sentences
    cosine_scores = util.pytorch_cos_sim(embeddings, embeddings)
    
    nodes = set()
    edges = set()
    
    for i in range(len(sentences)):
        nodes.add(sentences[i].strip())
        for j in range(i+1, len(sentences)):
            if cosine_scores[i][j] > 0.7:  # Threshold for similarity
                edges.add((sentences[i].strip(), sentences[j].strip()))
    
    return list(nodes), list(edges)


In [None]:
# Process the text document
filename = 'US-Constitution-With-Amendments.txt'

# Read the full text data
with open(filename, "r") as file:
    text = file.read()

# Extract nodes and edges from the text
nodes, edges = extract_nodes_edges(text)

# NOTE: This ran for 72 mins on Windows 10!
# Create the knowledge graph
create_knowledge_graph(uri, user, password, nodes, edges)

In [None]:
# Helper function to run a query and return the results
def run_query(uri, user, password, query):
    driver = GraphDatabase.driver(uri, auth=(user, password))
    results = []
    
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    
    driver.close()
    return results

# Get connection parameters from environment variables
uri = os.getenv('NEO4J_URI', 'neo4j://b92ae674.databases.neo4j.io')
user = os.getenv('NEO4J_USER', 'neo4j')
password = os.getenv('NEO4J_PASSWORD', 'password')


Example Neo4j Queries against this data...

In [20]:
# Query 1: Get all nodes
query = "MATCH (n) RETURN n LIMIT 10"
nodes = run_query(uri, user, password, query)
nodes

[<Record n=<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:0' labels=frozenset({'IP'}) properties={'address': '192.168.202.100'}>>,
 <Record n=<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:1' labels=frozenset({'IP'}) properties={'address': '192.168.202.76'}>>,
 <Record n=<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:2' labels=frozenset({'IP'}) properties={'address': '192.168.202.89'}>>,
 <Record n=<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:3' labels=frozenset({'IP'}) properties={'address': '192.168.202.85'}>>,
 <Record n=<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:4' labels=frozenset({'IP'}) properties={'address': '192.168.202.102'}>>,
 <Record n=<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:5' labels=frozenset({'IP'}) properties={'address': '192.168.202.93'}>>,
 <Record n=<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:6' labels=frozenset({'IP'}) properties={'address': '192.168.202.97'}>>,
 <Record n=<Node e

In [22]:
# Query 2: Get all relationships
query = "MATCH ()-[r]->() RETURN r LIMIT 10"
relationships = run_query(uri, user, password, query)
relationships

[<Record r=<Relationship element_id='5:015e1c75-ac37-496e-85a1-1fb0d86f4b10:1152922604118474752' nodes=(<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:0' labels=frozenset() properties={}>, <Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:34' labels=frozenset() properties={}>) type='USES_PORT' properties={}>>,
 <Record r=<Relationship element_id='5:015e1c75-ac37-496e-85a1-1fb0d86f4b10:1155174403932160000' nodes=(<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:0' labels=frozenset() properties={}>, <Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:43' labels=frozenset() properties={}>) type='USES_PORT' properties={}>>,
 <Record r=<Relationship element_id='5:015e1c75-ac37-496e-85a1-1fb0d86f4b10:1157426203745845248' nodes=(<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:0' labels=frozenset() properties={}>, <Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:313' labels=frozenset() properties={}>) type='USES_PORT' properties={}>>,
 <Record r=<

In [26]:
# Query 3: Find nodes related to a specific node
specific_node = "Article I"
query = f"MATCH (n)-[r:RELATED_TO]->(m) WHERE n.name = '{specific_node}' RETURN n, r, m"
related_nodes = run_query(uri, user, password, query)
print(f"Nodes related to '{specific_node}':", related_nodes)

Nodes related to 'Article I': [<Record n=<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:7262' labels=frozenset({'Node'}) properties={'name': 'Article I'}> r=<Relationship element_id='5:015e1c75-ac37-496e-85a1-1fb0d86f4b10:1157429502280735838' nodes=(<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:7262' labels=frozenset({'Node'}) properties={'name': 'Article I'}>, <Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:5547' labels=frozenset({'Node'}) properties={'name': 'Article IV'}>) type='RELATED_TO' properties={}> m=<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:5547' labels=frozenset({'Node'}) properties={'name': 'Article IV'}>>, <Record n=<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:7262' labels=frozenset({'Node'}) properties={'name': 'Article I'}> r=<Relationship element_id='5:015e1c75-ac37-496e-85a1-1fb0d86f4b10:1164184901721791582' nodes=(<Node element_id='4:015e1c75-ac37-496e-85a1-1fb0d86f4b10:7262' labels=frozenset({'Node'}) properti

In [27]:
# Query 4: Count the number of nodes
query = "MATCH (n) RETURN count(n) as node_count"
node_count = run_query(uri, user, password, query)
print("Number of nodes:", node_count)

Number of nodes: [<Record node_count=7549>]


In [28]:
# Query 5: Count the number of relationships
query = "MATCH ()-[r]->() RETURN count(r) as relationship_count"
relationship_count = run_query(uri, user, password, query)
print("Number of relationships:", relationship_count)

Number of relationships: [<Record relationship_count=21088>]
