### **Load Environment variables from .env file**

In [1]:
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from dotenv import load_dotenv
import os
from IPython.display import display, HTML, JSON, Markdown, Image
from neo4j import GraphDatabase
from langchain.vectorstores import FAISS
from langchain.text_splitter import TokenTextSplitter
from langchain.document_loaders import WikipediaLoader
from langchain.chains import GraphCypherQAChain
from langchain_community.graphs import Neo4jGraph
from langchain.prompts.prompt import PromptTemplate
from langchain.schema.runnable import Runnable
from langchain_openai import ChatOpenAI

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_GPT4_DEPLOYMENT_NAME = os.getenv("OPENAI_GPT4_DEPLOYMENT_NAME")
OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME")
api_version = "2024-02-01"

NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE")

llm = AzureChatOpenAI(
    model=OPENAI_GPT4_DEPLOYMENT_NAME,
    azure_deployment=OPENAI_GPT4_DEPLOYMENT_NAME,
    api_key=OPENAI_API_KEY,
    azure_endpoint=OPENAI_DEPLOYMENT_ENDPOINT,
    openai_api_version=api_version,
)


In [2]:
def call_openAI(text):
    response = llm.chat.completions.create(
        model=OPENAI_GPT4_DEPLOYMENT_NAME,
        messages = text,
        temperature=0.0
    )
    return response.choices[0].message.content

In [3]:
# define embeddings 
embeddings = AzureOpenAIEmbeddings(
    model=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME,
    azure_endpoint=OPENAI_DEPLOYMENT_ENDPOINT,
    openai_api_version=api_version,
    chunk_size = 1
)

In [4]:
# connect to the vector store and load it into memory

vectorStore = FAISS.load_local("./dbs/documentation/faiss_index", embeddings, allow_dangerous_deserialization=True)
retriever = vectorStore.as_retriever(search_type="similarity", search_kwargs={"k": 3})  # returns 3 most similar vectors/documents
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)

In [5]:
# connect to the graph database
graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database=NEO4J_DATABASE,
    sanitize=True,
)

graph.refresh_schema()

### Graph + RAG = GraphRAG

In [9]:
from langchain.chains import GraphCypherQAChain
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_core.messages import HumanMessage
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# use faiss to search for similar documents using similarity search with embeddings - RAG only
def vector_search(question):
    system_prompt = (
        "Use the given context to answer the question. "
        "If you don't know the answer, say you don't know. "
        "Use three sentence maximum and keep the answer concise. "
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm, prompt)
    # using Faiss as a retriever
    chain = create_retrieval_chain(retriever, question_answer_chain)

    response = chain.invoke({"input": question})
    return response

# use the graphDB to search for answers to the question - graph only
def graph_search(question):
    # using neo4j graph db as the graph
    chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True)
    response = chain.invoke({"query": question})
    return response

def hybrid_search(question):
    v_response = vector_search(question)
    g_response = graph_search(question)

    prompt_template = """You are a helpful question-answering agent. Your task is to analyze 
    and synthesize information from two sources: the top result from a similarity search 
    (unstructured information) and relevant data from a graph database (structured information). 
    Given the user's query: {question}, provide a meaningful and efficient answer based 
    on the insights derived from the following data:

    Unstructured information: {vector_result}. 
    Structured information: {graph_result}.
    """

    prompt = PromptTemplate(
        input_variables=["question", "vector_result", "graph_result"],
        template=prompt_template,
    )

    message = HumanMessage(
        content=prompt_template.format(question=question,  vector_result=v_response, graph_result=g_response)
    )
    response = llm.invoke([message])
    return response

In [16]:
#RAG only answer
response = vector_search("Describe the families of Harry Potter's best friends.")
display(response)

{'input': "Describe the families of Harry Potter's best friends.",
 'context': [Document(metadata={'title': 'Harry Potter', 'summary': "Harry Potter is a series of seven fantasy novels written by British author J. K. Rowling. The novels chronicle the lives of a young wizard, Harry Potter, and his friends, Hermione Granger and Ron Weasley, all of whom are students at Hogwarts School of Witchcraft and Wizardry. The main story arc concerns Harry's conflict with Lord Voldemort, a dark wizard who intends to become immortal, overthrow the wizard governing body known as the Ministry of Magic, and subjugate all wizards and Muggles (non-magical people).\nThe series was originally published in English by Bloomsbury in the United Kingdom and Scholastic Press in the United States.  A series of many genres, including fantasy, drama, coming-of-age fiction, and the British school story (which includes elements of mystery, thriller, adventure, horror, and romance), the world of Harry Potter explores n

In [18]:
#Graph only answer
response = graph_search("Describe the families of Harry Potter's best friends.")
display(response)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (hp:Person {id: "Harry Potter"})-[:FRIEND_OF]->(bestFriends:Person)-[:CHILD_OF]->(parents:Person)-[:LIVED_WITH]->(family:Family)
RETURN bestFriends.id, COLLECT(parents.id) AS parents, family.id AS familyId
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': "Describe the families of Harry Potter's best friends.",
 'result': "I don't know the answer."}

In [17]:
#Graph and RAG hybrid answer
response = hybrid_search("Describe the families of Harry Potter's best friends.")
display(response)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Document)-[:MENTIONS]->(hp:Character {id: "Harry Potter"})
MATCH (hp)-[:FRIEND_OF]->(friends:Person)-[:CHILD_OF]->(parents:Person)-[:LIVED_WITH]->(family:Family)
RETURN family;
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


AIMessage(content='Based on the provided unstructured information, here\'s a description of the families of Harry Potter\'s best friends:\n\nHermione Granger\'s family:\nHermione Granger is the only child of Muggle parents who are both dentists. Despite not being magical themselves, her parents are very supportive of Hermione\'s magical abilities and her involvement in the wizarding world. They are portrayed as loving and proud of Hermione\'s achievements at Hogwarts.\n\nRon Weasley\'s family:\nRon Weasley comes from a large, pure-blood wizarding family known as the Weasleys. He is the sixth of seven children. The Weasley family is characterized by their red hair, financial modesty, and a warm and loving household. Ron\'s parents are Arthur and Molly Weasley. Arthur works at the Ministry of Magic, and Molly is a homemaker who dedicates her life to raising her children. The Weasleys value traits such as courage, loyalty, and humor. Despite their lack of wealth, they are generous and wel