### **Load Environment variables from .env file**

In [2]:
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from dotenv import load_dotenv
import os
from IPython.display import display, HTML, JSON, Markdown, Image
from langchain.text_splitter import TokenTextSplitter
from langchain.document_loaders import WikipediaLoader
from langchain.chains import RetrievalQA


load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_GPT4_DEPLOYMENT_NAME = os.getenv("OPENAI_GPT4_DEPLOYMENT_NAME")
OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME")
api_version = "2024-02-01"

NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

llm = AzureChatOpenAI(
    model=OPENAI_GPT4_DEPLOYMENT_NAME,
    azure_deployment=OPENAI_GPT4_DEPLOYMENT_NAME,
    api_key=OPENAI_API_KEY,
    azure_endpoint=OPENAI_DEPLOYMENT_ENDPOINT,
    openai_api_version=api_version,
)


In [3]:
# download the wikipedia pages for "Napoleon Bonaparte"

raw_documents = WikipediaLoader(query="Napoleon Bonaparte").load()
raw_documents[:5]



  lis = BeautifulSoup(html).find_all('li')


[Document(metadata={'title': 'Napoleon', 'summary': 'Napoleon Bonaparte (born Napoleone di Buonaparte; 15 August 1769 – 5 May 1821), later known by his regnal name Napoleon I, was a French military and political leader who rose to prominence during the French Revolution and led a series of successful campaigns across Europe during the Revolutionary Wars and Napoleonic Wars from 1796 to 1815. He was the leader of the French Republic as First Consul from 1799 to 1804, then of the French Empire as Emperor of the French from 1804 to 1814, and briefly again in 1815.\nBorn on the island of Corsica to a family of Italian origin, Napoleon moved to mainland France in 1779 and was commissioned as an officer in the French Army in 1785. He supported the French Revolution in 1789, and promoted its cause in Corsica. He rose rapidly in the ranks after breaking the siege of Toulon in 1793 and firing on royalist insurgents in Paris on 13 Vendémiaire in 1795. In 1796, Napoleon commanded a military campa

In [23]:
# split the documents into chunks

text_splitter = TokenTextSplitter(chunk_size=1024, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents[:3])

## Build the Graph

In [7]:
# convert the text documents to graph documents
from langchain_experimental.graph_transformers import LLMGraphTransformer

llm_transformer = LLMGraphTransformer(llm=llm)
#your can filter the nodes and relationships that you want to include in the graph
# llm_transformer_filtered = LLMGraphTransformer(
#     llm=llm,
#     allowed_nodes=["Person", "Country", "Organization"],
#     allowed_relationships=["NATIONALITY", "LOCATED_IN", "WORKED_AT", "SPOUSE"],
# )

graph_documents = llm_transformer.convert_to_graph_documents(documents)



In [8]:
for node in graph_documents[0].nodes:
    print(node)

id='Napoleon Bonaparte' type='Person'
id='Napoleone Di Buonaparte' type='Person'
id='Napoleon I' type='Person'
id='French Revolution' type='Event'
id='Revolutionary Wars' type='Event'
id='Napoleonic Wars' type='Event'
id='French Republic' type='Country'
id='First Consul' type='Position'
id='French Empire' type='Country'
id='Emperor Of The French' type='Position'
id='Corsica' type='Location'
id='French Army' type='Organization'
id='Siege Of Toulon' type='Event'
id='Paris' type='Location'
id='War Of The First Coalition' type='Event'
id='Egypt And Syria' type='Location'
id='Coup Of 18 Brumaire' type='Event'
id='Battle Of Marengo' type='Event'
id='War Of The Second Coalition' type='Event'
id='Louisiana' type='Location'
id='United States' type='Country'
id='Treaty Of Amiens' type='Event'
id='War Of The Third Coalition' type='Event'
id='Battle Of Austerlitz' type='Event'
id='Holy Roman Empire' type='Country'
id='War Of The Fourth Coalition' type='Event'
id='Battle Of Jena–Auerstedt' type='Ev

In [9]:
for relation in graph_documents[0].relationships:
    print(relation)

source=Node(id='Napoleon Bonaparte', type='Person') target=Node(id='Napoleone Di Buonaparte', type='Person') type='ALIAS'
source=Node(id='Napoleon Bonaparte', type='Person') target=Node(id='Napoleon I', type='Person') type='REGNAL_NAME'
source=Node(id='Napoleon Bonaparte', type='Person') target=Node(id='French Revolution', type='Event') type='INVOLVEMENT'
source=Node(id='Napoleon Bonaparte', type='Person') target=Node(id='Revolutionary Wars', type='Event') type='LEADERSHIP'
source=Node(id='Napoleon Bonaparte', type='Person') target=Node(id='Napoleonic Wars', type='Event') type='LEADERSHIP'
source=Node(id='Napoleon Bonaparte', type='Person') target=Node(id='French Republic', type='Country') type='LEADERSHIP'
source=Node(id='First Consul', type='Position') target=Node(id='French Republic', type='Country') type='POSITION_OF'
source=Node(id='Napoleon Bonaparte', type='Person') target=Node(id='First Consul', type='Position') type='POSITION'
source=Node(id='Napoleon Bonaparte', type='Person'

In [11]:
#load the graph documents into Neo4j
from langchain_community.graphs import Neo4jGraph

url = "neo4j+s://{}:7687".format(NEO4J_URI)

graph = Neo4jGraph(url=url, username=NEO4J_USERNAME, password=NEO4J_PASSWORD)

graph.add_graph_documents(graph_documents, baseEntityLabel=True, include_source=True)

## Add embeddings

In [12]:
from langchain_openai import AzureOpenAIEmbeddings
from langchain_community.vectorstores import Neo4jVector
from langchain_community.graphs import Neo4jGraph
from langchain_experimental.graph_transformers import LLMGraphTransformer


# define embeddings 
embeddings = AzureOpenAIEmbeddings(
    model=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME,
    azure_endpoint=OPENAI_DEPLOYMENT_ENDPOINT,
    openai_api_version=api_version,
    chunk_size = 1
)

vector_index = Neo4jVector.from_existing_graph(
    embeddings,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding",
    url=url, 
    username=NEO4J_USERNAME, 
    password=NEO4J_PASSWORD
)

## Search the graph

In [53]:
from langchain.chains import GraphCypherQAChain

query = "Count how many persons, locations, or events?"

chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True)
response = chain.invoke({"query": query})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n) WHERE n:Person OR n:Location OR n:Event RETURN COUNT(n);[0m
Full Context:
[32;1m[1;3m[{'COUNT(n)': 60}][0m

[1m> Finished chain.[0m


{'query': 'Count how many persons, locations, or events?',
 'result': 'There are 60 persons, locations, or events.'}

## Search the embeddings

In [64]:
# call OpenAI to get the answer
from openai import AzureOpenAI
clientOpenAI = AzureOpenAI(
  azure_endpoint = OPENAI_DEPLOYMENT_ENDPOINT, 
  api_key=OPENAI_API_KEY,  
  api_version="2023-05-15"
)

def call_openAI(question, answer):
    prompt = 'Question: {}'.format(question) + '\n' + 'Information: {}'.format(answer)
    # prepare prompt
    messages = [{"role": "system", "content": "You are a HELPFUL assistant answering users questions. Answer the question using the provided information and do not add anything else."},
            {"role": "user", "content": prompt}]
    response = clientOpenAI.chat.completions.create(
        model=OPENAI_GPT4_DEPLOYMENT_NAME,
        messages = messages,
        temperature=0.7,
        max_tokens=800,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
    )

    return response.choices[0].message.content


In [67]:
query = "Count how many persons, locations, or events are mentioned"
# let's take 1 results from the vector index
results = vector_index.similarity_search(query, k=1)
answer = call_openAI(query, results[0].page_content)
display(HTML(answer))

In [68]:
query = "Count how many persons, locations, or events are mentioned"
# let's take 10 results from the vector index
results = vector_index.similarity_search(query, k=10)
answer = call_openAI(query, results[0].page_content)
display(HTML(answer))

####  The answers may change depending on the number of articles retrieved from the vector search

## GraphRAG = Graph + vector search

In [71]:
# first search the whole graph
query = "list all persons mentioned in the text"

chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True)
response = chain.invoke({"query": query})
print(response['result'])



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person) RETURN p;[0m
Full Context:
[32;1m[1;3m[{'p': {'id': 'Napoleon Bonaparte'}}, {'p': {'id': 'Napoleone Di Buonaparte'}}, {'p': {'id': 'Napoleon I'}}, {'p': {'id': 'Joseph Bonaparte'}}, {'p': {'id': 'Napoleon Iii'}}, {'p': {'id': 'Louis Napoleon Bonaparte'}}, {'p': {'id': 'Louis Bonaparte'}}, {'p': {'id': 'Hortense De Beauharnais'}}, {'p': {'id': 'Napoleon Ii'}}, {'p': {'id': 'Georges-Eugène Haussmann'}}][0m

[1m> Finished chain.[0m
Napoleon Bonaparte, Napoleone Di Buonaparte, Napoleon I, Joseph Bonaparte, Napoleon III, Louis Napoleon Bonaparte, Louis Bonaparte, Hortense De Beauharnais, Napoleon II, and Georges-Eugène Haussmann are all persons mentioned in the text.


In [72]:
# Let's use the graph answer to search the vector index
query = "List all persons, locations or events mentioned related to the following " + response['result']

results = vector_index.similarity_search(query, k=5)
answer = call_openAI(query, results[0].page_content)
display(HTML(answer))