In [25]:
from dotenv import load_dotenv
import os


import textwrap

# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_openai import ChatOpenAI
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain

# Warning control
import warnings
warnings.filterwarnings("ignore")

In [26]:
# Load from environment
load_dotenv('.env', override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_ENDPOINT = os.getenv('OPENAI_BASE_URL') + '/embeddings'



# Global constants
VECTOR_INDEX_NAME = 'NapoleonOpenAI'
VECTOR_NODE_LABEL = 'Napoleon_Chunk'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'textEmbeddingOpenAI'




In [27]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

In [28]:
kg.query("""
         CREATE VECTOR INDEX `NapoleonOpenAI` IF NOT EXISTS
          FOR (nc:Napoleon_Chunk) ON (nc.textEmbeddingOpenAI) 
          OPTIONS { indexConfig: {
            `vector.dimensions`: 1536,
            `vector.similarity_function`: 'cosine'    
         }}
""")



# kg.query("""DROP INDEX `Napoleon`""")


[]

In [29]:
kg.query("""
  SHOW VECTOR INDEXES
  """
)

[{'id': 4,
  'name': 'Napoleon',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Napoleon_Chunk'],
  'properties': ['textEmbedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 7, 25, 21, 15, 57, 658000000, tzinfo=<UTC>),
  'readCount': 20},
 {'id': 5,
  'name': 'NapoleonOpenAI',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Napoleon_Chunk'],
  'properties': ['textEmbeddingOpenAI'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 7, 29, 18, 22, 39, 777000000, tzinfo=<UTC>),
  'readCount': 70}]

# GraphRAG with Relationship Search

In [36]:
retrieval_qa_chat_prompt = """
Task:Generate Cypher statement to 
query a graph database.
Instructions:
Use only the provided relationship types and properties in the 
schema. Do not use any other relationship types or properties that
are not provided.
Remember the relationships are like Schema:
{schema}
if question say Talleyrand it menas Charles-Maurice de Talleyrand 
and if say Napoleon means Napoleon Bonaparte and if say waterloo is Battle of Waterloo.

Note: Do not include any explanations or apologies in your responses.
Do not include any text except the generated Cypher statement. Remember to correct the typo in names

Example 1: What was the story of napoleon in the battle of waterloo?
MATCH (Napoleon:Person)-[:RELATED_TO]->(waterloo:Event)-[:HAS_General_INFO]->(info:General_info)-[:HAS_Chunk_INFO]->(ChunkInfo:Waterloo_Chunk)
RETURN p, e, info, ChunkInfo.text

Example 2: What was the story of the battle of waterloo?
MATCH (waterloo:Event)-[:HAS_General_INFO]->(info:General_info)-[:HAS_Chunk_INFO]->(ChunkInfo:Waterloo_Chunk)
RETURN p, e, info, ChunkInfo.text

Example 3: tell me about Talleyrand and napoleon in 5 lines
MATCH (Talleyrand:Person)-[:RELATED_TO]->(Napoleon:Person)-[:HAS_Career_INFO]->(info:Career_info)-[:HAS_Chunk_INFO]->(ChunkInfo:Napoleon_Chunk)
RETURN Talleyrand, Napoleon

The question is:
{question}

"""

In [37]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], 
    template=retrieval_qa_chat_prompt
)

cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
)

def prettyCypherChain(question: str) -> str:
    response = cypherChain.run(question)
    print(textwrap.fill(response, 60))

# VectorRAG without Relationship Search

In [38]:
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Neo4jVector

vector_store = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name=VECTOR_INDEX_NAME,
    node_label=VECTOR_NODE_LABEL,
    text_node_properties=[VECTOR_SOURCE_PROPERTY],
    embedding_node_property=VECTOR_EMBEDDING_PROPERTY,
)


retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
combine_docs_chain = create_stuff_documents_chain(ChatOpenAI(temperature=0), retrieval_qa_chat_prompt)
retrival_chain = create_retrieval_chain(
    retriever=vector_store.as_retriever(), combine_docs_chain=combine_docs_chain
)




In [54]:
# Vector similarity search With Relationship 
prettyCypherChain("who was leading battle of waterloo?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (waterloo:Event)-[:HAS_Combatant_INFO]->(combatant:Combatant)
RETURN combatant.frenchCommander[0m
Full Context:
[32;1m[1;3m[{'combatant.frenchCommander': 'Napoleon Bonaparte'}][0m

[1m> Finished chain.[0m
Napoleon Bonaparte was leading the Battle of Waterloo.


In [55]:
# Simple Vector similarity search without considering Relationship between nodes
query = {"input": "who was leading battle of waterloo?"}

result = retrival_chain.invoke(input=query)
print(textwrap.fill(result['answer'], 60))

Wellington was leading the Battle of Waterloo.
