In [8]:
from neo4j import GraphDatabase
import openai

import os
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
sys.path.append(project_root)
import keys

In [4]:
os.environ["OPENAI_API_KEY"] = keys.OPEN_AI_API_KEY

In [5]:
uri = keys.URI
username = "neo4j"
password = keys.NEO4J_PASS

In [6]:
def get_neo4j_schema():
    cypher_query = """
    MATCH (n)-[r]->(m) 
    RETURN DISTINCT labels(n) AS source_node, type(r) AS relationship, labels(m) AS target_node
    """
    
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_query)
        schema = []
        
        for record in result:
            source = ", ".join(record["source_node"]) if record["source_node"] else "Unknown"
            relationship = record["relationship"]
            target = ", ".join(record["target_node"]) if record["target_node"] else "Unknown"
            schema.append(f"- ({source})-[:{relationship}]->({target})")
        
        return "\n".join(schema)

schema_info = get_neo4j_schema()
print("Neo4j Schema:\n", schema_info)

Neo4j Schema:
 - (Chunk)-[:PART_OF]->(Document)
- (Chunk)-[:NEXT_CHUNK]->(Chunk)
- (Chunk)-[:HAS_ENTITY]->(Document, __Entity__, Book)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Organization, Project)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Person)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Date)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Language)
- (Chunk)-[:SIMILAR]->(Chunk)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Title)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Organization, Institution)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Book)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Feature)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Audience)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Purpose)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Institution)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Event)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Entity, Place, City)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Place)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, People, Group)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Society)
- (Chunk)-[:HAS_ENTITY]->(__Entity__, Pe

In [9]:
user_query = "Who were the main figures in THE EXPANSION OF ROME TO THE UNIFICATION OF THE ITALIAN PENINSULA: c. 509-265 B. C."

In [13]:
prompt = f"""
You are an expert in Neo4j and historical knowledge retrieval.
Given the following Neo4j database schema comming from the following Cypher query:

    MATCH (n)-[r]->(m) 
    RETURN DISTINCT labels(n) AS source_node, type(r) AS relationship, labels(m) AS target_node

From that query you'll get an schema directly from Neo4j, but knowing the query it comes from contextaulize so you that you know
how to identify the different entities, and relationships

{schema_info}

Generate an optimized Cypher query to retrieve relevant information for the question:
"{user_query}"

Ensure the query is valid, optimized, and does not use unnecessary clauses, and focus on contextualizing what the user is asking
and what you have available in the the scheme, like the following examples:

What are the most important events that ocurred in rome.

Rome is a location, so you might use that if that is an entity or node, and then you can use the relationships that node has to
further construct on the query the user had.
"""

In [14]:
def generate_cypher_query(prompt):
    """Uses OpenAI to generate a Cypher query based on user input and schema."""
    
    client = openai.OpenAI()

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You generate Cypher queries for a Neo4j historical knowledge graph."},
            {"role": "user", "content": prompt}
        ]
    )

    cypher_query = response.choices[0].message.content
    return cypher_query.strip()

cypher_query = generate_cypher_query(prompt)
print("Generated Cypher Query:\n", cypher_query)

Generated Cypher Query:
 Based on the question, to get the main figures in THE EXPANSION OF ROME TO THE UNIFICATION OF THE ITALIAN PENINSULA: c. 509-265 B. C., we can potentially look for the `Person` entities who had significant `HAS_TITLE`, `HAS_ROLE`, `HELD_POSITION`, `GUIDED` or `INVOLVED_IN` relationships during this `TimePeriod`. We can also focus on the entities (`Person` or `Event`) that `:MENTIONED_IN` the historical period of the expansion of Rome.

Here is the potential Cypher query:

```cypher
MATCH (p:__Entity__, Person)-[r:HAS_TITLE|:HAS_ROLE|:HELD_POSITION|:GUIDED|:INVOLVED_IN|:MENTIONED_IN]->(e:Event)
WHERE EXISTS {
    MATCH (e:Event)<--(:TimePeriod { name: 'THE EXPANSION OF ROME TO THE UNIFICATION OF THE ITALIAN PENINSULA: c. 509-265 B. C.' })
    OPTIONAL MATCH (e:Event)-->(:Location { name: 'Rome' }) 
}
RETURN p.name as Person, type(r) as Relation, e.name as Event
```

This query finds the `Person` entities who had significant roles or were mentioned during the spec

In [15]:
cypher_query="""MATCH (p:__Entity__, Person)-[r:HAS_TITLE|:HAS_ROLE|:HELD_POSITION|:GUIDED|:INVOLVED_IN|:MENTIONED_IN]->(e:Event)
WHERE EXISTS {
    MATCH (e:Event)<--(:TimePeriod { name: 'THE EXPANSION OF ROME TO THE UNIFICATION OF THE ITALIAN PENINSULA: c. 509-265 B. C.' })
    OPTIONAL MATCH (e:Event)-->(:Location { name: 'Rome' }) 
}
RETURN p.name as Person, type(r) as Relation, e.name as Event
"""

In [16]:
def query_neo4j(cypher_query):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_query)
        return [record.data() for record in result]

results = query_neo4j(cypher_query)
print(results)

CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input ',': expected a parameter, '&', ')', ':', 'WHERE', '{' or '|' (line 1, column 20 (offset: 19))
"MATCH (p:__Entity__, Person)-[r:HAS_TITLE|:HAS_ROLE|:HELD_POSITION|:GUIDED|:INVOLVED_IN|:MENTIONED_IN]->(e:Event)"
                    ^}

In [None]:
context = f"""
Structured Data:
{results}
"""
# Similar Historical Context:
# {similar_nodes}

final_prompt = f"""
You are a historian assistant. Using the structured data and similar context retrieved from a Neo4j knowledge graph, generate a detailed historical answer for the question:
"{user_query}"

Context:
{context}

Provide a well-structured, factual, and engaging historical explanation.
"""

In [None]:
def generate_historical_response(final_prompt):

    client = openai.OpenAI()

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "system", "content": "You are a historian AI assistant."},
                  {"role": "user", "content": context}]
    )

    return response.choices[0].message.content.strip()

# Generate a response
historical_answer = generate_historical_response(final_prompt)
print("Final Response:\n", historical_answer)
