In [8]:
from neo4j import GraphDatabase

# Define the Neo4j database connection class
class Neo4jConnection:

    def __init__(self, uri, user, password):
        self._driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self._driver.close()

    def run_cypher(self, cypher_query):
        with self._driver.session() as session:
            result = session.run(cypher_query)
            return result.data()  # Fetch all results


In [27]:
import os
from utils import getNLOntology
from openai import OpenAI
from rdflib import Graph

####### STEP1: GET THE UNSTRUCTURED CONTENT #########################

# for each of the files in the 'content' directory
with open('content/hockney-mr-and-mrs-clark-and-percy.txt', 'r') as file:
    content = file.read().replace('\n', '')

# cnt, content = 0, []
# for f in os.listdir('content'):
#     print(f)
    
#     with open(f'content/{f}', 'r') as file:
#         content = file.read().replace('\n', '')
#     cnt+=1


In [14]:
####### STEP2: GET THE ONTOLOGY #####################################
g = Graph()
g.parse("art.ttl")

# OPTION 1 : Ontology in standard serialisation
ontology = g.serialize(format="ttl")

# OPTION 2 : Natural language description of the ontology
ontology = getNLOntology(g)

In [23]:
####### STEP3: PROMPT THE LLM ####################################### 

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

system = (
    "You are an expert in extracting structured information out of natural language text. "
    "You extract entities with their attributes and relationships between entities. "
    "You can produce the output as RDF triples or as Cypher write statements on request. "      
)

prompt = '''Given the ontology below run your best entity extraction over the content.
 The extracted entities and relationships must be described using exclusively the terms in the ontology 
 and in the way they are defined. This means that for attributes and relationships you will respect the domain and range constraints.
 You will never use terms not defined in the ontology. 
Return the output as Cypher using merge to allow for linkage of nodes from multiple passes. 
Absolutely no comments on the output. Just the structured output. ''' + '\n\nONTOLOGY: \n ' + ontology + '\n\nCONTENT: \n ' + content 

# if you want to inspect...
print(prompt)

Given the ontology below run your best entity extraction over the content.
 The extracted entities and relationships must be described using exclusively the terms in the ontology 
 and in the way they are defined. This means that for attributes and relationships you will respect the domain and range constraints.
 You will never use terms not defined in the ontology. 
Return the output as Cypher using merge to allow for linkage of nodes from multiple passes. 
Absolutely no comments on the output. Just the structured output. 

ONTOLOGY: 
 
CATEGORIES:
Artist: a creator of artworks
Artwork: an artistic creation
Person:

ATTRIBUTES:
artworkKnownAs: Attribute that applies to entities of type Artwork. It represents represent the title or an identifying description of the artwork
professionalName: Attribute that applies to entities of type Artist. It represents the name used to refer to an artist. May not always match their real name, can be a pseudonym.

RELATIONSHIPS:
influences: Relationsh

In [24]:
chat_completion = client.chat.completions.create(
    messages=[
        {
          'role': 'system',
          'content': system,
        },
        {
          'role': 'user',
          'content': prompt ,
        }
          ],
    model="gpt-4o",
)

cypher_script = chat_completion.choices[0].message.content[3:-3]

In [25]:
# The Cypher generated by the LLM 
print(cypher_script)


cypher
MERGE (artist1:Artist {professionalName: "David Hockney"})
MERGE (artist2:Artist {professionalName: "Pablo Picasso"})
MERGE (artwork1:Artwork {artworkKnownAs: "Mr and Mrs Clark and Percy"})
MERGE (artwork2:Artwork {artworkKnownAs: "Red Celia"})
MERGE (artwork3:Artwork {artworkKnownAs: "An Image of Celia"})
MERGE (person1:Person {name: "Celia Birtwell"})
MERGE (person2:Person {name: "Ossie Clark"})

MERGE (artwork1)-[:was_created_by]->(artist1)
MERGE (artwork2)-[:was_created_by]->(artist1)
MERGE (artwork3)-[:was_created_by]->(artist1)

MERGE (artwork1)-[:represents]->(person1)
MERGE (artwork3)-[:represents]->(person1)

MERGE (artist1)-[:influences]->(artist2)

MERGE (person1)-[:worksWith]->(person2)



In [28]:
####### STEP4: WRITE CONTENT TO THE DB ##############################

uri =  "neo4j+s://c3c99623.databases.neo4j.io"     # "bolt://localhost:7687"
user = "neo4j"  
password = "yM-voy_2oCE0cpYaTFr6VrmF-H3YnCREd22KjGijxUk"  
conn = Neo4jConnection(uri, user, password)


In [29]:
# Run the Cypher script and get the results
result = conn.run_cypher(cypher_script)

# Close the connection
conn.close()