In [None]:
%pip install python-dotenv
%pip install openai==1.13.3
%pip install -qU langchain-openai
%pip install neo4j
%pip install langchain langchain-community

In [1]:
from dotenv import load_dotenv
import os
from openai import AzureOpenAI
from langchain_community.graphs import Neo4jGraph
from langchain_openai import AzureChatOpenAI
from langchain.chains import GraphCypherQAChain
from langchain.prompts import PromptTemplate
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.chains import LLMChain

In [2]:
load_dotenv()

# Azure OpenAI variables
OPENAI_API_VERSION = os.getenv("OPENAI_API_VERSION")
AZURE_OPENAI_ENDPOINT =os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")

# Neo4j variables
NEO4J_URL = os.getenv("NEO4J_URL")
NEO4J_USERNAME =os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

### Connecting to the graph and making a Cypher query

In [3]:
#Connecting to the graph
graph = Neo4jGraph(
    url=NEO4J_URL,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD
)

In [16]:
#Quering the graph unsing Cypher
result = graph.query("""
MATCH (ner{uri:'tag:stardog:api:kabengele_munanga'})
MATCH p=(thesis)-[r:mentions]->(ner)
RETURN thesis.title, thesis.abstract, thesis.repository
LIMIT 15
""")

In [17]:
result

[{'thesis.title': 'A inserção de alunos imigrantes africanos negros na rede estadual de ensino na cidade de São Paulo (2014-2016)',
  'thesis.abstract': 'This work analyses the insertion of black immigrant students in a secondary school of the state educational system of São Paulo, in São Paulo city, SP, Brazil, among the years of 2014 and 2016. That choice is based on the professional position occupied by the researcher, who works for the same state education system specifically with its racial relations issues. The research aims to describe and evaluate the insertion of those students in the school routine, through identifying the relations among the black African immigrant students, the other students, the teachers, the school administrators and the school staff in general, as well as the relations among cultures in the school environment. To reach those aims, the researcher observed the classes, free times, entrance and leaving of students, how they arrange themselves in the workro

In [6]:
# Printing the graph schema
graph.refresh_schema()
print(graph.schema)

Node properties:
Resource {label: STRING, uri: STRING, family_name: STRING, givenname: STRING, repository: STRING, created: STRING, abstract: STRING, title: STRING, identifier: STRING}
_GraphConfig {_dataTypePropertyLabel: STRING, _subPropertyOfRel: STRING, _classNamePropName: STRING, _handleVocabUris: INTEGER, _applyNeo4jNaming: BOOLEAN, _relNamePropName: STRING, _domainRel: STRING, _keepLangTag: BOOLEAN, _keepCustomDataTypes: BOOLEAN, _handleMultival: INTEGER, _objectPropertyLabel: STRING, _rangeRel: STRING, _classLabel: STRING, _handleRDFTypes: INTEGER, _subClassOfRel: STRING}
Program {uri: STRING, label: STRING}
NamedIndividual {family_name: STRING, label: STRING, uri: STRING, givenname: STRING, acronym: STRING, identifier: STRING, created: STRING, abstract: STRING, title: STRING, repository: STRING}
Department {label: STRING, uri: STRING}
Person {family_name: STRING, label: STRING, givenname: STRING, uri: STRING}
University {label: STRING, acronym: STRING, uri: STRING}
Subject {la

### Using a cypher query to ground the LLM

In [7]:
# Connection to the Azure OpenAI model
client = AzureOpenAI(
  azure_endpoint=AZURE_OPENAI_ENDPOINT, 
  api_key=AZURE_OPENAI_API_KEY,  
  api_version=OPENAI_API_VERSION
)


chat_llm = AzureChatOpenAI(deployment_name=OPENAI_DEPLOYMENT_NAME,temperature=0)

In [22]:
# Using information queried from the knowledge graph
ground_information =  graph.query(""" 
MATCH (ner{uri:'tag:stardog:api:ana_lucia_silva_souza'})
MATCH p=(thesis)-[r:mentions]->(ner)
RETURN thesis.title, thesis.abstract, thesis.repository
LIMIT 10
""")
#ground_information =  graph.query(""" MATCH (n:Wellbore {WellboreName: "31/2-22 S"}) RETURN n;""")


prompt = PromptTemplate(template="""You are a bot assistant having a conversation about academic thesis and dissertations. You should strictly follow the information from your context and always provide the references (title of the document and link for the repository immediately after you mention one fact).

Chat History: {chat_history}
Context: {context}
Question: {question}
""", input_variables=["chat_history", "context", "question"])

In [23]:
memory = ConversationBufferMemory(memory_key="chat_history", input_key="question", return_messages=True)

chat_chain = LLMChain(llm=chat_llm, prompt=prompt, memory=memory) #LLMChain

while True:
    question = input("> ")
    print(question)
    response = chat_chain.invoke({
        "context": ground_information,
        "question": question
        })
    print(response["text"])
    if question == "quit":
        break

Quem foi Ana Lucia Silva Souza?
Ana Lucia Silva Souza é uma autora que desenvolveu a teoria dos Letramentos de Reexistências, utilizada em algumas das teses mencionadas no contexto. Você pode encontrar mais informações sobre ela em: https://repositorio.ufba.br/ri/bitstream/ri/34885/1/Disserta%C3%A7%C3%A3o%20Ana%20L%C3%BAcia%20Silva%20Souza.pdf.
quit
Goodbye! Don't hesitate to ask if you have any more questions.


In [29]:
question = input("> ")
#print (question)


In [6]:
# Connection to the Azure OpenAI model
client = AzureOpenAI(
  azure_endpoint=AZURE_OPENAI_ENDPOINT, 
  api_key=AZURE_OPENAI_API_KEY,  
  api_version=OPENAI_API_VERSION
)


llm = AzureChatOpenAI(deployment_name=OPENAI_DEPLOYMENT_NAME,temperature=0)

### Using LLM to query the graph using GraphCypherQAChain library

In [9]:
# FIRST PROMPT - Transform a natural language query in a Cypher query
CYPHER_GENERATION_TEMPLATE = """
You are an expert Neo4j Developer translating user questions into Cypher to answer questions about the offshore oil and gas industry.
Convert the user's question based on the schema.
Do not include any text except the generated Cypher statement.

Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
The property existence syntax `... exists(variable.property)` is no longer supported. Please use `variable.property IS NOT NULL` instead.

Schema: {schema}
Question: {question}
"""

cypher_generation_prompt = PromptTemplate(
    template=CYPHER_GENERATION_TEMPLATE,
    input_variables=["schema", "question"],
)

# Memory for chat conversation
memory = ConversationBufferMemory(memory_key="chat_history", input_key="query", return_messages=True)

# SECOND PROMPT - Use the Cypher query to answer the user query
cypher_chain = GraphCypherQAChain.from_llm(
    llm,
    graph=graph,
    cypher_prompt=cypher_generation_prompt,
    verbose=True,
    memory=memory
)


# Chat loop
while True:
    question = input("> ")
    response = cypher_chain.invoke({
        "query": question
        })
    
    print(response['result'])



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mWhat are the names of the facilities that are responsible for drilling wellbore with NpdidWellbore '5693-L-008'? 

Cypher statement:
MATCH (:Wellbore {NpdidWellbore: '5693-L-008'})<-[:DRILLED]-(f:Facility) WHERE EXISTS((:Company)-[:IS_RESPONSIBLE_FOR]->(f)) RETURN f.Name[0m


ValueError: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'What': expected
  "ALTER"
  "CALL"
  "CREATE"
  "DEALLOCATE"
  "DELETE"
  "DENY"
  "DETACH"
  "DROP"
  "DRYRUN"
  "ENABLE"
  "FINISH"
  "FOREACH"
  "GRANT"
  "INSERT"
  "LOAD"
  "MATCH"
  "MERGE"
  "NODETACH"
  "OPTIONAL"
  "REALLOCATE"
  "REMOVE"
  "RENAME"
  "RETURN"
  "REVOKE"
  "SET"
  "SHOW"
  "START"
  "STOP"
  "TERMINATE"
  "UNWIND"
  "USE"
  "USING"
  "WITH" (line 1, column 1 (offset: 0))
"What are the names of the facilities that are responsible for drilling wellbore with NpdidWellbore '5693-L-008'?"
 ^}

-   How many facilities belongs to the "GULLFAKS" oil field?
-   Give me the name and FactPage URL of the facilities of the "GULLFAKS" oil field?