### Install Dependencies

```python
!pip3 install --upgrade --quiet langchain langchain-community langchain-openai langchain_mistralai neo4j
```

In [1]:
!pip show langchain neo4j langchain_mistralai

Name: langchain
Version: 0.1.16
Summary: Building applications with LLMs through composability
Home-page: https://github.com/langchain-ai/langchain
Author: 
Author-email: 
License: MIT
Location: /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages
Requires: aiohttp, dataclasses-json, jsonpatch, langchain-community, langchain-core, langchain-text-splitters, langsmith, numpy, pydantic, PyYAML, requests, SQLAlchemy, tenacity
Required-by: 
---
Name: neo4j
Version: 5.19.0
Summary: Neo4j Bolt driver for Python
Home-page: 
Author: 
Author-email: "Neo4j, Inc." <drivers@neo4j.com>
License: Apache License, Version 2.0
Location: /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages
Requires: pytz
Required-by: 
---
Name: langchain-mistralai
Version: 0.1.0
Summary: An integration package connecting Mistral and LangChain
Home-page: https://github.com/langchain-ai/langchain
Author: 
Author-email: 
License: MIT
Location: /Library/Frameworks/Python.f

### Load environment variables

In [2]:
from dotenv import load_dotenv
import os
load_dotenv()

print("NEO4J_URI = " + os.getenv("NEO4J_URI"))
print("NEO4J_USERNAME = " + os.getenv("NEO4J_USERNAME"))
print("NEO4J_PASSWORD = " + os.getenv("NEO4J_PASSWORD"))

NEO4J_URI = bolt://localhost:7687
NEO4J_USERNAME = neo4j
NEO4J_PASSWORD = neo4j123


### Create Neo4jGraph

Make sure the Neo4j instance is running and the environment variables are set correctly.

In [3]:
from langchain_community.graphs import Neo4jGraph
graph = Neo4jGraph()
# graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="neo4j123")


### Check graph schema

In [4]:
graph.schema

'Node properties are the following:\nPerson {name: STRING, tags: STRING, description: STRING},SoftwareSystem {description: STRING, tags: STRING, name: STRING},Container {name: STRING, tags: STRING, description: STRING},Component {name: STRING, tags: STRING, description: STRING}\nRelationship properties are the following:\nUses {description: STRING}\nThe relationships are the following:\n(:Person)-[:Uses]->(:Container),(:Person)-[:Uses]->(:SoftwareSystem),(:Person)-[:Uses]->(:Person),(:SoftwareSystem)-[:Uses]->(:Person),(:SoftwareSystem)-[:Uses]->(:SoftwareSystem),(:SoftwareSystem)-[:Contains]->(:Container),(:Container)-[:Uses]->(:Component),(:Container)-[:Uses]->(:Container),(:Container)-[:Uses]->(:SoftwareSystem),(:Container)-[:Contains]->(:Component),(:Component)-[:Uses]->(:Component),(:Component)-[:Uses]->(:Container),(:Component)-[:Uses]->(:SoftwareSystem)'

In [5]:
graph.structured_schema

{'node_props': {'Person': [{'property': 'name', 'type': 'STRING'},
   {'property': 'tags', 'type': 'STRING'},
   {'property': 'description', 'type': 'STRING'}],
  'SoftwareSystem': [{'property': 'description', 'type': 'STRING'},
   {'property': 'tags', 'type': 'STRING'},
   {'property': 'name', 'type': 'STRING'}],
  'Container': [{'property': 'name', 'type': 'STRING'},
   {'property': 'tags', 'type': 'STRING'},
   {'property': 'description', 'type': 'STRING'}],
  'Component': [{'property': 'name', 'type': 'STRING'},
   {'property': 'tags', 'type': 'STRING'},
   {'property': 'description', 'type': 'STRING'}]},
 'rel_props': {'Uses': [{'property': 'description', 'type': 'STRING'}]},
 'relationships': [{'start': 'Person', 'type': 'Uses', 'end': 'Container'},
  {'start': 'Person', 'type': 'Uses', 'end': 'SoftwareSystem'},
  {'start': 'Person', 'type': 'Uses', 'end': 'Person'},
  {'start': 'SoftwareSystem', 'type': 'Uses', 'end': 'Person'},
  {'start': 'SoftwareSystem', 'type': 'Uses', 'end

### Create Cypher Query

In [6]:
## What are the persons using systems?
graph.query("MATCH (n:Person) RETURN n")

[{'n': {'name': 'Personal Banking Customer',
   'description': 'A customer of the bank, with personal bank accounts.',
   'tags': 'Element,Person,Customer'}},
 {'n': {'name': 'Customer Service Staff',
   'description': 'Customer service staff within the bank.',
   'tags': 'Element,Person,Bank Staff'}},
 {'n': {'name': 'Back Office Staff',
   'description': 'Administration and support staff within the bank.',
   'tags': 'Element,Person,Bank Staff'}}]

In [7]:
## What software systems are used by customers?
graph.query("""
    MATCH (p:Person)-[:Uses]->(ss:SoftwareSystem)
    WHERE p.tags CONTAINS "Customer"
    RETURN DISTINCT ss.name
    """)

[{'ss.name': 'ATM'}, {'ss.name': 'Internet Banking System'}]

In [8]:
## What software systems are used by customers to withdraw cash?
graph.query("""
    MATCH (p:Person)-[:Uses]->(ss:SoftwareSystem)
    WHERE p.tags CONTAINS "Customer" and ss.description CONTAINS "withdraw cash"
    RETURN DISTINCT ss.name
    """)

[{'ss.name': 'ATM'}]

### Create LLM

In [17]:
# from langchain_openai import ChatOpenAI
# os.environ["OPENAI_API_KEY"] = .......
# llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

from langchain_mistralai.chat_models import ChatMistralAI
llm = ChatMistralAI(mistral_api_key=os.getenv("MISTRAL_API_KEY"), model=os.getenv("MISTRAL_MODEL"), temperature=0)

print("MISTRAL_MODEL = " + os.getenv("MISTRAL_MODEL"))

MISTRAL_MODEL = open-mistral-7b


### Create GraphCypherQAChain

Please refer to API documentation for more details.

https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.cypher.GraphCypherQAChain.html

In [18]:
from langchain.chains import GraphCypherQAChain
#  input_key: str = "query"  
#  output_key: str = "result" 

chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True)
# top_k=2
# return_intermediate_steps=True
# return_direct=True


# class GraphCypherQAChain(Chain):
#     graph: GraphStore = Field(exclude=True)
#     cypher_generation_chain: LLMChain
#     qa_chain: LLMChain
#     graph_schema: str
#     input_key: str = "query"  #: :meta private:
#     output_key: str = "result"  #: :meta private:
#     top_k: int = 10
#     """Number of results to return from the query"""
#     return_intermediate_steps: bool = False
#     """Whether or not to return the intermediate steps along with the final answer."""
#     return_direct: bool = False
#     """Whether or not to return the result of querying the graph directly."""
#     cypher_query_corrector: Optional[CypherQueryCorrector] = None
#     """Optional cypher validation tool"""

# from_llm(
#         cls,
#         llm: Optional[BaseLanguageModel] = None,
#         *,
#         qa_prompt: Optional[BasePromptTemplate] = None,
#         cypher_prompt: Optional[BasePromptTemplate] = None,
#         cypher_llm: Optional[BaseLanguageModel] = None,
#         qa_llm: Optional[BaseLanguageModel] = None,
#         exclude_types: List[str] = [],
#         include_types: List[str] = [],
#         validate_cypher: bool = False,
#         qa_llm_kwargs: Optional[Dict[str, Any]] = None,
#         cypher_llm_kwargs: Optional[Dict[str, Any]] = None,
#         **kwargs: Any,
#     ) -> GraphCypherQAChain:

### Check the default prompts


In [11]:
from langchain.chains.graph_qa.prompts import CYPHER_GENERATION_PROMPT, CYPHER_QA_PROMPT

print("CYPHER_GENERATION_PROMPT = ", flush=True)
print(CYPHER_GENERATION_PROMPT)

print("CYPHER_QA_PROMPT = ", flush=True)
print(CYPHER_QA_PROMPT)

CYPHER_GENERATION_PROMPT = 
input_variables=['question', 'schema'] template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'
CYPHER_QA_PROMPT = 
input_variables=['context', 'question'] template="You are an assistant that helps to form nice and human understandable answers.\nThe information part contains the provided information that you must use to construct an answer.\nThe provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.\nMake the answer sound as a response 

### Ask question to QA chain

The **question** must be passed as a dictionary with a key "query" and the value is the question.

The QA chain will generate a cypher query statement using `CYPHER_GENERATION_PROMPT` and return the answer to the question.


In [19]:
response = chain.invoke({"query": "What software systems are used by Back Office Staff?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Person {name: "Back Office Staff"})-[:Uses]->(ss:SoftwareSystem)
RETURN ss.name
[0m
Full Context:
[32;1m[1;3m[{'ss.name': 'Mainframe Banking System'}][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by Back Office Staff?',
 'result': 'The Back Office Staff uses the Mainframe Banking System.'}

In [20]:
response = chain.invoke({"query": "What software systems are used by Personal Banking Customer?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Person {name: "Personal Banking Customer"})-[:Uses]->(ss:SoftwareSystem)
RETURN ss.name
[0m
Full Context:
[32;1m[1;3m[{'ss.name': 'ATM'}, {'ss.name': 'Internet Banking System'}][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by Personal Banking Customer?',
 'result': 'Personal Banking Customers use ATM and Internet Banking System software systems.'}

In [22]:
response = chain.invoke({"query": "What software systems are used by customers?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Person)-[:Uses]->(ss:SoftwareSystem)
RETURN ss.name
[0m
Full Context:
[32;1m[1;3m[{'ss.name': 'ATM'}, {'ss.name': 'Internet Banking System'}, {'ss.name': 'Mainframe Banking System'}, {'ss.name': 'Mainframe Banking System'}][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by customers?',
 'result': 'Customers use ATM, Internet Banking System, and Mainframe Banking System software systems.'}

### Promt Refinement

https://python.langchain.com/docs/integrations/graphs/memgraph/#prompt-refinement


In [32]:
from langchain_core.prompts import PromptTemplate

# ## Refine the CYPHER_GENERATION_TEMPLATE by adding examples

MY_CYPHER_GENERATION_TEMPLATE = """
Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.

Schema:
{schema}

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

Here are a few examples of generated Cypher statements for particular questions:
# What software systems are used by Bank Staff?
MATCH (p:Person)-[:Uses]->(ss:SoftwareSystem)
WHERE p.tags CONTAINS "Bank Staff"
RETURN DISTINCT ss.name
# What software systems are used by customers to withdraw cash?
MATCH (p:Person)-[:Uses]->(ss:SoftwareSystem)
WHERE p.tags CONTAINS "Customer" and ss.description CONTAINS "withdraw cash"
RETURN DISTINCT ss.name
    
The question is:
{question}
"""

MY_CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=MY_CYPHER_GENERATION_TEMPLATE
)

## Use the default CYPHER_GENERATION_PROMPT
# MY_CYPHER_GENERATION_PROMPT = CYPHER_GENERATION_PROMPT

## Use the default CYPHER_QA_PROMPT
# MY_CYPHER_QA_PROMPT = CYPHER_QA_PROMPT
MY_CYPHER_QA_TEMPLATE = """
You are an assistant that helps to form nice and human understandable answers.
The context part contains the provided information that you must use to construct an answer.
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
Make the answer sound as a response to the question. 
Do not mention that you based the result on the given context.
Do not make up anything which does not exist in the provided context.

Here is an example:
Question: What software systems are used by customers?
Context: ATM, Internet Banking System
Helpful Answer: ATM and Internet Banking System.

Follow this example when generating answers. If the provided context is empty, say that you don't know the answer.

Context: {context}

Question: {question}
Helpful Answer:
"""

MY_CYPHER_QA_PROMPT = PromptTemplate(
    input_variables=['context', 'question'], template=MY_CYPHER_QA_TEMPLATE
)

chain = GraphCypherQAChain.from_llm(llm=llm, graph=graph, qa_prompt=MY_CYPHER_QA_PROMPT,cypher_prompt=MY_CYPHER_GENERATION_PROMPT,verbose=True)

responseresponse = chain.invoke({"query": "What software systems are used by customers?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:Uses]->(ss:SoftwareSystem)
WHERE p.tags CONTAINS "Customer"
RETURN DISTINCT ss.name[0m
Full Context:
[32;1m[1;3m[{'ss.name': 'ATM'}, {'ss.name': 'Internet Banking System'}][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by customers?',
 'result': 'Customers use ATM, Internet Banking System, and Mainframe Banking System software systems.'}

In [33]:
responseresponse = chain.invoke({"query": "What software systems are used by managers?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:Uses]->(ss:SoftwareSystem)
WHERE p.tags CONTAINS "Manager"
RETURN DISTINCT ss.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by customers?',
 'result': 'Customers use ATM, Internet Banking System, and Mainframe Banking System software systems.'}

In [29]:
responseresponse = chain.invoke({"query": "What systems can be used by customers to withdraw cash?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:Uses]->(ss:SoftwareSystem)
WHERE p.tags CONTAINS "Customer" and ss.description CONTAINS "withdraw cash"
RETURN DISTINCT ss.name[0m
Full Context:
[32;1m[1;3m[{'ss.name': 'ATM'}][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by customers?',
 'result': 'Customers use ATM, Internet Banking System, and Mainframe Banking System software systems.'}

### Few-shot examples
    

In [40]:
examples = [
    {
        "question": "who are the users?",
        "query": "MATCH (p:Person) RETURN distinct p",
    },
    {
        "question": "what software systems are used by customers",
        "query": "MATCH (p:Person)-[:Uses]->(s:SoftwareSystem) WHERE p.tags contains 'Customer' RETURN DISTINCT s.name,s.description",
    },
    {
        "question": "what software systems are used by staff",
        "query": "MATCH (p:Person)-[:Uses]->(s:SoftwareSystem) WHERE p.tags contains 'Bank Staff' RETURN DISTINCT s.name,s.description",
    },
    {
        "question": "what software systems are used by customers to withdraw cash",
        "query": "MATCH (p:Person)-[:Uses]->(s:SoftwareSystem) WHERE p.tags contains 'Customer' AND s.description CONTAINS 'withdraw cash' RETURN DISTINCT s.name,s.description",
    }
]

In [35]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template(
    "User input: {question}\nCypher query: {query}"
)
prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

In [36]:
print(prompt.format(question="What software systems are used by customers to withdraw cash?", schema="***dummy***"))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information
***dummy***.

Below are a number of examples of questions and their corresponding Cypher queries.

User input: who are the users?
Cypher query: MATCH (p:Person) RETURN distinct p

User input: what software systems are used by customer
Cypher query: MATCH (p:Person)-[:Uses]->(s:SoftwareSystem) WHERE p.tags contains 'Customer' RETURN DISTINCT s.name,s.description

User input: what software systems are used by staff
Cypher query: MATCH (p:Person)-[:Uses]->(s:SoftwareSystem) WHERE p.tags contains 'Bank Staff' RETURN DISTINCT s.name,s.description

User input: what software systems are used by customer to withdraw cash
Cypher query: MATCH (p:Person)-[:Uses]->(s:SoftwareSystem) WHERE p.tags contains 'Customer' AND s.description CONTAINS 'withdraw cash' RETURN DISTINCT s.name,s.description

User input: What software systems are used by customers to withdraw cash?

In [43]:
# chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, cypher_prompt=prompt, verbose=True)
# response = chain.invoke({"query": "What software systems are used by customers?"})
# response