# Neo4j DB QA chain

In [4]:
# !pip3 install wikipedia tiktoken neo4j

LLM 

In [3]:
import openai
from dotenv import load_dotenv
import os
from langchain.chat_models import ChatOpenAI

# Load environment variables from the .env file
load_dotenv()

api_key = os.getenv("OPENAI_AI_KEY")
organization = os.getenv("OPEN_AI_ORG")
# elevenlabs_key = os.getenv("ELEVENLABS_KEY")


llm = ChatOpenAI(
    openai_api_key=api_key,
    openai_organization=organization,
    max_tokens=1500
    )  # Can be any LLM you want.

Neo4jGraph

In [4]:
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph

graph = Neo4jGraph(
    url="bolt://localhost:7687", username="neo4j", password="12345678"
)

In [5]:
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document

## Seeding the database

Assuming your database is empty, you can populate it using Cypher query language. The following Cypher statement is idempotent, which means the database information will be the same if you run it one or multiple time

In [6]:
graph.query(
    """
MERGE (m:Movie {name:"Top Gun"})
WITH m
UNWIND ["Tom Cruise", "Val Kilmer", "Anthony Edwards", "Meg Ryan"] AS actor
MERGE (a:Actor {name:actor})
MERGE (a)-[:ACTED_IN]->(m)
"""
)

[]

## Refresh graph schema information

If the schema of database changes, you can refresh the schema information needed to generate Cypher statements.

In [7]:
graph.refresh_schema()
print(graph.schema)

Node properties are the following:
Movie {name: STRING},Actor {name: STRING}
Relationship properties are the following:

The relationships are the following:
(:Actor)-[:ACTED_IN]->(:Movie)


## Querying the graph

We can now use the graph cypher QA chain to ask question of the graph

In [8]:
chain = GraphCypherQAChain.from_llm(
    llm, graph=graph, verbose=True
)

In [9]:
chain.run("Who played in Top Gun?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (actor:Actor)-[:ACTED_IN]->(movie:Movie {name: 'Top Gun'})
RETURN actor.name[0m
Full Context:
[32;1m[1;3m[{'actor.name': 'Tom Cruise'}, {'actor.name': 'Val Kilmer'}, {'actor.name': 'Anthony Edwards'}, {'actor.name': 'Meg Ryan'}][0m

[1m> Finished chain.[0m


'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'

## Limit the number of results

You can limit the number of results from the Cypher QA Chain using the top_k parameter. The default is 10.



In [11]:
chain = GraphCypherQAChain.from_llm(
    llm, graph=graph, verbose=True, top_k=2
)

In [12]:
chain.run("Who played in Top Gun?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: "Top Gun"})
RETURN a.name[0m
Full Context:
[32;1m[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}][0m

[1m> Finished chain.[0m


'Tom Cruise and Val Kilmer played in Top Gun.'

## Return intermediate results

You can return intermediate steps from the Cypher QA Chain using the return_intermediate_steps parameter

In [14]:
chain = GraphCypherQAChain.from_llm(
    llm, graph=graph, verbose=True, return_intermediate_steps=True
)

In [15]:
result = chain("Who played in Top Gun?")
print(f"Intermediate steps: {result['intermediate_steps']}")
print(f"Final answer: {result['result']}")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})
RETURN a.name[0m
Full Context:
[32;1m[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}][0m

[1m> Finished chain.[0m
Intermediate steps: [{'query': "MATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\nRETURN a.name"}, {'context': [{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]}]
Final answer: The actors who played in Top Gun are Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan.


## Return direct results

You can return direct results from the Cypher QA Chain using the return_direct parameter



In [16]:
chain = GraphCypherQAChain.from_llm(
   llm, graph=graph, verbose=True, return_direct=True
)

In [17]:
chain.run("Who played in Top Gun?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})
RETURN a.name[0m

[1m> Finished chain.[0m


[{'a.name': 'Tom Cruise'},
 {'a.name': 'Val Kilmer'},
 {'a.name': 'Anthony Edwards'},
 {'a.name': 'Meg Ryan'}]

## Add examples in the Cypher generation prompt

You can define the Cypher statement you want the LLM to generate for particular questions

In [18]:
from langchain.prompts.prompt import PromptTemplate

CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher statements for particular questions:
# How many people played in Top Gun?
MATCH (m:Movie {{title:"Top Gun"}})<-[:ACTED_IN]-()
RETURN count(*) AS numberOfActors

The question is:
{question}"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=graph,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
)

In [19]:
chain.run("How many people played in Top Gun?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Movie {name:"Top Gun"})<-[:ACTED_IN]-(:Actor)
RETURN count(*) AS numberOfActors[0m
Full Context:
[32;1m[1;3m[{'numberOfActors': 4}][0m

[1m> Finished chain.[0m


'Four people played in Top Gun.'

## Use separate LLMs for Cypher and answer generation

You can use the cypher_llm and qa_llm parameters to define different llms

In [20]:
chain = GraphCypherQAChain.from_llm(
    graph       = graph,
    cypher_llm  = ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
    qa_llm      = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
    verbose     = True,
)

In [21]:
chain.run("Who played in Top Gun?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})
RETURN a.name[0m
Full Context:
[32;1m[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}][0m

[1m> Finished chain.[0m


'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'

## Ignore specified node and relationship types

You can use include_types or exclude_types to ignore parts of the graph schema when generating Cypher statements.

In [23]:
chain = GraphCypherQAChain.from_llm(
    graph       =  graph,
    cypher_llm  = ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
    qa_llm      = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k"),
    verbose     = True,
    exclude_types = ["Movie"],
)

In [24]:
# Inspect graph schema
print(chain.graph_schema)

Node properties are the following:
Actor {name: STRING}
Relationship properties are the following:

The relationships are the following:



## Validate generated Cypher statements

You can use the validate_cypher parameter to validate and correct relationship directions in generated Cypher statements

In [27]:
chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    verbose=True,
    validate_cypher=True,
)

In [28]:
chain.run("Who played in Top Gun?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)
WHERE m.name = 'Top Gun'
RETURN a.name[0m
Full Context:
[32;1m[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}][0m

[1m> Finished chain.[0m


'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'