### Check Dependencies

In [1]:
#!pip3 install --upgrade --quiet langchain langchain-community langchain-openai langchain_mistralai neo4j==5.19.0 tiktoken tokenizers

In [2]:
!pip3 show langchain neo4j langchain_mistralai

Name: langchain
Version: 0.1.17
Summary: Building applications with LLMs through composability
Home-page: https://github.com/langchain-ai/langchain
Author: 
Author-email: 
License: MIT
Location: /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages
Requires: aiohttp, dataclasses-json, jsonpatch, langchain-community, langchain-core, langchain-text-splitters, langsmith, numpy, pydantic, PyYAML, requests, SQLAlchemy, tenacity
Required-by: 
---
Name: neo4j
Version: 5.19.0
Summary: Neo4j Bolt driver for Python
Home-page: 
Author: 
Author-email: "Neo4j, Inc." <drivers@neo4j.com>
License: Apache License, Version 2.0
Location: /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages
Requires: pytz
Required-by: 
---
Name: langchain-mistralai
Version: 0.1.5
Summary: An integration package connecting Mistral and LangChain
Home-page: https://github.com/langchain-ai/langchain
Author: 
Author-email: 
License: MIT
Location: /Library/Frameworks/Python.f

### Load environment variables

In [3]:
from dotenv import load_dotenv
import os
load_dotenv()

print("NEO4J_URI = " + os.getenv("NEO4J_URI"))
print("NEO4J_USERNAME = " + os.getenv("NEO4J_USERNAME"))
print("NEO4J_PASSWORD = " + os.getenv("NEO4J_PASSWORD"))

NEO4J_URI = bolt://localhost:7687
NEO4J_USERNAME = neo4j
NEO4J_PASSWORD = neo4j123


### Create Neo4jGraph

Make sure the Neo4j instance is running and the environment variables are set correctly.

In [4]:
from langchain_community.graphs import Neo4jGraph
graph = Neo4jGraph()
# graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="neo4j123")


### Check graph schema

In [5]:
graph.schema

'Node properties:\nElement {tags: STRING, name: STRING, type: STRING, source: STRING, parent: STRING, description: STRING}\nRelationship properties:\nUses {technology: STRING, description: STRING}\nThe relationships:\n(:Element)-[:Uses]->(:Element)\n(:Element)-[:Contains]->(:Element)'

In [6]:
graph.structured_schema

{'node_props': {'Element': [{'property': 'tags', 'type': 'STRING'},
   {'property': 'name', 'type': 'STRING'},
   {'property': 'type', 'type': 'STRING'},
   {'property': 'source', 'type': 'STRING'},
   {'property': 'parent', 'type': 'STRING'},
   {'property': 'description', 'type': 'STRING'}]},
 'rel_props': {'Uses': [{'property': 'technology', 'type': 'STRING'},
   {'property': 'description', 'type': 'STRING'}]},
 'relationships': [{'start': 'Element', 'type': 'Uses', 'end': 'Element'},
  {'start': 'Element', 'type': 'Contains', 'end': 'Element'}],
 'metadata': {'constraint': [], 'index': []}}

### Create Cypher Query

In [7]:
## What are the persons using systems?
graph.query("MATCH (n:Element) WHERE n.type='Person' RETURN n")

[{'n': {'parent': '',
   'name': 'Personal Banking Customer',
   'description': 'A customer of the bank, with personal bank accounts.',
   'source': 'Big Bank plc',
   'type': 'Person',
   'tags': 'Element,Person,Customer'}},
 {'n': {'parent': '',
   'name': 'Customer Service Staff',
   'description': 'Customer service staff within the bank.',
   'source': 'Big Bank plc',
   'type': 'Person',
   'tags': 'Element,Person,Bank Staff'}},
 {'n': {'parent': '',
   'name': 'Back Office Staff',
   'description': 'Administration and support staff within the bank.',
   'source': 'Big Bank plc',
   'type': 'Person',
   'tags': 'Element,Person,Bank Staff'}}]

In [9]:
## What software systems are used by customers?
graph.query("""
    MATCH (p:Element)-[:Uses]->(ss:Element)
    WHERE p.type='Person' AND p.tags CONTAINS "Customer" AND ss.type='SoftwareSystem'
    RETURN DISTINCT ss.name, ss.description
    """)

[{'ss.name': 'ATM', 'ss.description': 'Allows customers to withdraw cash.'},
 {'ss.name': 'Internet Banking System',
  'ss.description': 'Allows customers to view information about their bank accounts, and make payments.'}]

In [10]:
## What software systems are used by customers to withdraw cash?
graph.query("""
    MATCH (p:Element)-[:Uses]->(ss:Element)
    WHERE p.type='Person' AND p.tags CONTAINS "Customer" AND ss.type='SoftwareSystem' AND ss.description CONTAINS "withdraw cash"
    RETURN DISTINCT ss.name, ss.description
    """)

[{'ss.name': 'ATM', 'ss.description': 'Allows customers to withdraw cash.'}]

### Create LLM

In [11]:
# from langchain_openai import ChatOpenAI
# os.environ["OPENAI_API_KEY"] = .......
# llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

from langchain_mistralai.chat_models import ChatMistralAI
llm = ChatMistralAI(mistral_api_key=os.getenv("MISTRAL_API_KEY"), model=os.getenv("MISTRAL_MODEL"), temperature=0)

print("MISTRAL_MODEL = " + os.getenv("MISTRAL_MODEL"))

MISTRAL_MODEL = open-mistral-7b


### Create GraphCypherQAChain

Please refer to API documentation for more details.

https://api.python.langchain.com/en/latest/chains/langchain.chains.graph_qa.cypher.GraphCypherQAChain.html

In [12]:
from langchain.chains import GraphCypherQAChain
#  input_key: str = "query"  
#  output_key: str = "result" 

chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True)
# top_k=2
# return_intermediate_steps=True
# return_direct=True


# class GraphCypherQAChain(Chain):
#     graph: GraphStore = Field(exclude=True)
#     cypher_generation_chain: LLMChain
#     qa_chain: LLMChain
#     graph_schema: str
#     input_key: str = "query"  #: :meta private:
#     output_key: str = "result"  #: :meta private:
#     top_k: int = 10
#     """Number of results to return from the query"""
#     return_intermediate_steps: bool = False
#     """Whether or not to return the intermediate steps along with the final answer."""
#     return_direct: bool = False
#     """Whether or not to return the result of querying the graph directly."""
#     cypher_query_corrector: Optional[CypherQueryCorrector] = None
#     """Optional cypher validation tool"""

# from_llm(
#         cls,
#         llm: Optional[BaseLanguageModel] = None,
#         *,
#         qa_prompt: Optional[BasePromptTemplate] = None,
#         cypher_prompt: Optional[BasePromptTemplate] = None,
#         cypher_llm: Optional[BaseLanguageModel] = None,
#         qa_llm: Optional[BaseLanguageModel] = None,
#         exclude_types: List[str] = [],
#         include_types: List[str] = [],
#         validate_cypher: bool = False,
#         qa_llm_kwargs: Optional[Dict[str, Any]] = None,
#         cypher_llm_kwargs: Optional[Dict[str, Any]] = None,
#         **kwargs: Any,
#     ) -> GraphCypherQAChain:

### Check the default prompts


In [13]:
from langchain.chains.graph_qa.prompts import CYPHER_GENERATION_PROMPT, CYPHER_QA_PROMPT

print("CYPHER_GENERATION_PROMPT = ", flush=True)
print(CYPHER_GENERATION_PROMPT)

print("CYPHER_QA_PROMPT = ", flush=True)
print(CYPHER_QA_PROMPT)

CYPHER_GENERATION_PROMPT = 
input_variables=['question', 'schema'] template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'
CYPHER_QA_PROMPT = 
input_variables=['context', 'question'] template="You are an assistant that helps to form nice and human understandable answers.\nThe information part contains the provided information that you must use to construct an answer.\nThe provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.\nMake the answer sound as a response 

### Ask question to QA chain

The **question** must be passed as a dictionary with a key "query" and the value is the question.

The QA chain will generate a cypher query statement using `CYPHER_GENERATION_PROMPT` and return the answer to the question.


In [15]:
response = chain.invoke({"query": "What software systems are used by Back Office Staff?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (e:Element {type: "SoftwareSystem", parent: "Back Office Staff"})-[:Uses]->(ss:Element)
RETURN ss.name AS SoftwareSystem
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by Back Office Staff?',
 'result': "I'm unable to provide an answer as the given information does not mention any software systems used by Back Office Staff."}

In [16]:
response = chain.invoke({"query": "What software systems are used by Personal Banking Customer?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Element {type: "Personal Banking Customer"})-[:Uses]->(s:Element)
RETURN s.name as Software_System
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by Personal Banking Customer?',
 'result': "I'm unable to provide an answer as the given information does not mention any specific software systems used by Personal Banking Customers."}

In [17]:
response = chain.invoke({"query": "What software systems are used by customers?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (e1:Element)-[:Uses]->(e2:Element)
WHERE e1.type = 'Software' AND e2.type = 'Customer'
RETURN e1.name AS Software_Name, e2.name AS Customer_Name
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by customers?',
 'result': "I don't have information on the specific software systems used by customers."}

### Promt Refinement

https://python.langchain.com/docs/integrations/graphs/memgraph/#prompt-refinement


In [18]:
from langchain_core.prompts import PromptTemplate

# ## Refine the CYPHER_GENERATION_TEMPLATE by adding examples

MY_CYPHER_GENERATION_TEMPLATE = """
Task: Generate a syntactically correct cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.

Schema:
{schema}

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

Here are a few examples of generated Cypher statements for particular questions:
# What software systems are used by Bank Staff?
MATCH (p:Element)-[:Uses]->(ss:Element)
WHERE p.type="Person" AND p.tags CONTAINS "Bank Staff" AND ss.type="SoftwareSystem"
RETURN DISTINCT ss.name, ss.description
# What software systems are used by customers to withdraw cash?
MATCH (p:Element)-[:Uses]->(ss:Element)
WHERE p.type="Person" AND p.tags CONTAINS "Customer" AND ss.type="SoftwareSystem" AND ss.description CONTAINS "withdraw cash"
RETURN DISTINCT ss.name, ss.description
    
The question is:
{question}
"""

MY_CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=MY_CYPHER_GENERATION_TEMPLATE
)

## Use the default CYPHER_GENERATION_PROMPT
# MY_CYPHER_GENERATION_PROMPT = CYPHER_GENERATION_PROMPT

## Use the default CYPHER_QA_PROMPT
# MY_CYPHER_QA_PROMPT = CYPHER_QA_PROMPT
MY_CYPHER_QA_TEMPLATE = """
You are an assistant that helps to form nice and human understandable answers.
The context part contains the provided information that you must use to construct an answer.
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
Make the answer sound as a response to the question. 
Do not mention that you based the result on the given context.
Do not make up anything which does not exist in the provided context.

Here is an example:
Question: What software systems are used by customers?
Context: ATM, Internet Banking System
Helpful Answer: ATM and Internet Banking System.

Follow this example when generating answers. If the provided context is empty, say that you don't know the answer.

Context: {context}

Question: {question}
Helpful Answer:
"""

MY_CYPHER_QA_PROMPT = PromptTemplate(
    input_variables=['context', 'question'], template=MY_CYPHER_QA_TEMPLATE
)

chain = GraphCypherQAChain.from_llm(llm=llm, graph=graph, qa_prompt=MY_CYPHER_QA_PROMPT,cypher_prompt=MY_CYPHER_GENERATION_PROMPT,verbose=True)



In [19]:
responseresponse = chain.invoke({"query": "What software systems are used by customers?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Element)-[:Uses]->(ss:Element)
WHERE p.type="Person" AND p.tags CONTAINS "Customer" AND ss.type="SoftwareSystem"
RETURN DISTINCT ss.name, ss.description[0m
Full Context:
[32;1m[1;3m[{'ss.name': 'ATM', 'ss.description': 'Allows customers to withdraw cash.'}, {'ss.name': 'Internet Banking System', 'ss.description': 'Allows customers to view information about their bank accounts, and make payments.'}][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by customers?',
 'result': "I don't have information on the specific software systems used by customers."}

In [20]:
responseresponse = chain.invoke({"query": "What systems can be used by customers to withdraw cash?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Element)-[:Uses]->(ss:Element)
WHERE p.type="Person" AND p.tags CONTAINS "Customer" AND ss.type="SoftwareSystem" AND ss.description CONTAINS "withdraw cash"
RETURN DISTINCT ss.name, ss.description[0m
Full Context:
[32;1m[1;3m[{'ss.name': 'ATM', 'ss.description': 'Allows customers to withdraw cash.'}][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by customers?',
 'result': "I don't have information on the specific software systems used by customers."}

### Few-shot examples
    

In [21]:
examples = [
    {
        "question": "who are the users",
        "query": "MATCH (p:Element) WHERE p.type='Person' RETURN distinct p",
    },
    {
        "question": "what software systems are used by customers",
        "query": """MATCH (p:Element)-[:Uses]->(ss:Element) WHERE p.type="Person" AND p.tags CONTAINS "Customer" AND ss.type="SoftwareSystem" RETURN DISTINCT ss.name,s.description""",
    },
    {
        "question": "what software systems are used by customers to withdraw cash",
        # "query": "MATCH (p:Person)-[:Uses]->(s:SoftwareSystem) WHERE p.tags contains 'Customer' AND s.description CONTAINS 'withdraw cash' RETURN DISTINCT s.name,s.description",
        "query": """MATCH (p:Element)-[:Uses]->(ss:Element) WHERE p.type="Person" AND p.tags CONTAINS "Customer" AND ss.type="SoftwareSystem" AND ss.description CONTAINS 'withdraw cash' RETURN DISTINCT ss.name,ss.description""",
    },
    {
        "question": "what software systems are available",
        # "query": "MATCH (s:SoftwareSystem) RETURN DISTINCT s.name,s.description",
        "query": "MATCH (s:Element) WHERE s.type='SoftwareSystem' RETURN DISTINCT s.name,s.description",
    }
]

In [22]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template(
    "User input: {question}\nCypher query: {query}"
)

promt_prefix = """
Task: Generate a syntactically correct cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.

Schema:
{schema}

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

Here are a few examples of generated Cypher statements for particular questions:
"""

promt_suffix="""
User input: {question}
Cypher query:
"""

fewShotPrompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=promt_prefix,
    suffix=promt_suffix,
    input_variables=["question", "schema"],
)

chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, cypher_prompt=fewShotPrompt, verbose=True)


In [23]:
print(fewShotPrompt.format(question="What software systems are used by customers to withdraw cash?", schema=graph.structured_schema))


Task: Generate a syntactically correct cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.

Schema:
{'node_props': {'Element': [{'property': 'tags', 'type': 'STRING'}, {'property': 'name', 'type': 'STRING'}, {'property': 'type', 'type': 'STRING'}, {'property': 'source', 'type': 'STRING'}, {'property': 'parent', 'type': 'STRING'}, {'property': 'description', 'type': 'STRING'}]}, 'rel_props': {'Uses': [{'property': 'technology', 'type': 'STRING'}, {'property': 'description', 'type': 'STRING'}]}, 'relationships': [{'start': 'Element', 'type': 'Uses', 'end': 'Element'}, {'start': 'Element', 'type': 'Contains', 'end': 'Element'}], 'metadata': {'constraint': [], 'index': []}}

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher

### Ask a predefined question

Ask a predefined question and the predefined query should be used to generate the answer.

```json
    {
        "question": "what software systems are used by customers",
        "query": "MATCH (p:Person)-[:Uses]->(s:SoftwareSystem) WHERE p.tags contains 'Customer' RETURN DISTINCT s.name,s.description",
    }
```

In [24]:
response = chain.invoke("What software systems are used by customers?")
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Element)-[:Uses]->(ss:Element)
 WHERE p.type="Person" AND p.tags CONTAINS "Customer"
 AND ss.type="SoftwareSystem"
 RETURN DISTINCT ss.name,ss.description[0m
Full Context:
[32;1m[1;3m[{'ss.name': 'ATM', 'ss.description': 'Allows customers to withdraw cash.'}, {'ss.name': 'Internet Banking System', 'ss.description': 'Allows customers to view information about their bank accounts, and make payments.'}][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by customers?',
 'result': 'Customers use ATM software and Internet Banking System.'}

### Ask a question that is not in the predefined list

The QA Chain should be able to find the similar question in the predefined list and use the corresponding query to generate the answer.

In [25]:
response = chain.invoke("What software systems are used by bank staff?")
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Element)-[:Uses]->(ss:Element)
 WHERE p.type="Person" AND p.tags CONTAINS "BankStaff" AND ss.type="SoftwareSystem"
 RETURN DISTINCT ss.name,ss.description[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'What software systems are used by bank staff?',
 'result': "I'm unable to provide an answer as the information provided does not mention any specific software systems used by bank staff."}

In [26]:
response = chain.invoke("what software systems can be used to withdraw cash?")
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Element)-[:Uses]->(ss:Element)
 WHERE p.tags CONTAINS "SoftwareSystem" AND ss.description CONTAINS 'withdraw cash'
 RETURN DISTINCT ss.name,ss.description[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'what software systems can be used to withdraw cash?',
 'result': "I'm unable to provide an answer to that question based on the given information."}

### Semantic Search

The cypher statements generated for the above examples are using simple matching against keywords, e.g. `p.tags contains 'Bank Staff'` 
Question :
`What software systems are used by bank staff?`

Generated Cypher:
`MATCH (p:Person)-[:Uses]->(s:SoftwareSystem) WHERE p.tags contains 'Bank Staff' RETURN DISTINCT s.name,s.description`

Result: 
`The Mainframe Banking System is used by bank staff to store all of the core banking information about customers, accounts, and transactions.`

if we ask the question in another way, QA Chain will not be able to answer correctly, it should use semnatic search instead to resolve the problem. 

https://python.langchain.com/docs/use_cases/graph/semantic/


In [27]:
response = chain.invoke("what software systems can be used to store customer information?")
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Element)-[:Uses]->(ss:Element)
 WHERE p.type="Person" AND p.tags CONTAINS "Customer"
 AND ss.type="SoftwareSystem" AND ss.description CONTAINS "store customer information"
 RETURN DISTINCT ss.name,ss.description[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'what software systems can be used to store customer information?',
 'result': 'There are several software systems that can be used to store customer information. Some common options include Customer Relationship Management (CRM) systems such as Salesforce, Microsoft Dynamics, or HubSpot. Other options include database management systems like MySQL or Oracle, or cloud storage services like Google Drive or Amazon S3. The specific system used may depend on the size and needs of the business.'}

In [28]:
response = chain.invoke("what software systems can be used by customer to view their banking information?")
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Element)-[:Uses]->(ss:Element)
 WHERE p.type="Person" AND p.tags CONTAINS "Customer" AND ss.type="SoftwareSystem" AND ss.description CONTAINS 'banking information'
 RETURN DISTINCT ss.name,ss.description[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'what software systems can be used by customer to view their banking information?',
 'result': 'The provided information does not specify which software systems can be used by customers to view their banking information. Therefore, I cannot provide a definitive answer.'}

#### Create Neo4j Vector Store from an existing graph database

Create embedding vector for the SoftwareSystem node from existing neo4j graph database. A new **embedding** property will be added to the SoftwareSystem node.

- [MistralAIEmbeddings](https://python.langchain.com/docs/integrations/text_embedding/mistralai/)

- [Neo4j Vector](https://python.langchain.com/docs/integrations/vectorstores/neo4jvector/)

- [Create Neo4jVector from an existing graph](https://api.python.langchain.com/en/latest/_modules/langchain_community/vectorstores/neo4j_vector.html#Neo4jVector.from_existing_graph)

In [29]:
from langchain_community.vectorstores import Neo4jVector
from langchain_mistralai import MistralAIEmbeddings

# # Initialize and return a Neo4jVector instance from an existing graph.
# retrieval_query = """
# RETURN "\n\nname: "+node.name+"\ntype: "+node.type+"\ndescription: "+node.description AS text, score, {tags: node.tags, name: node.name, type: node.type, parent: node.parent, source: node.source} AS metadata
# """
vectorestore = Neo4jVector.from_existing_graph(
    embedding=MistralAIEmbeddings(mistral_api_key=os.getenv("MISTRAL_API_KEY")),
    node_label="Element",
    embedding_node_property="embedding",
    text_node_properties=["name", "type", "description"], #combime properties to create a single text property
    index_name="Element_index",
    # retrieval_query=retrieval_query, #default retrival query will contain all the text node properties
    url=os.getenv("NEO4J_URI"), username=os.getenv("NEO4J_USERNAME"), password=os.getenv("NEO4J_PASSWORD")
)



In [30]:
# results = vectorestore.similarity_search_with_score("what software systems can be used to store customer information?", k=1)

# print(f"{results[0].metadata.type} : {results[0].metadata.name} - {results[0].page_content}",flush=True)
results = vectorestore.similarity_search("what software systems can be used to store customer information?", k=1)
results

[Document(page_content='\nname: Mainframe Banking System\ntype: SoftwareSystem\ndescription: Stores all of the core banking information about customers, accounts, transactions, etc.', metadata={'tags': 'Element,Software System,Existing System', 'source': 'Big Bank plc', 'parent': ''})]

In [31]:
results = vectorestore.similarity_search("Mainframe Banking System", k=1)
results

[Document(page_content='\nname: Mainframe Banking System\ntype: SoftwareSystem\ndescription: Stores all of the core banking information about customers, accounts, transactions, etc.', metadata={'tags': 'Element,Software System,Existing System', 'source': 'Big Bank plc', 'parent': ''})]

In [32]:
from langchain_core.prompts import ChatPromptTemplate, format_document

# DEFAULT_DOCUMENT_PROMPT = ChatPromptTemplate.from_template(template="\nname: {name}\ntype: {type}{page_content}")
DEFAULT_DOCUMENT_PROMPT = ChatPromptTemplate.from_template(template="{page_content}")
def _extract_context_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    context_content = document_separator.join(doc_strings)
    
    return context_content

print(_extract_context_documents(results))

Human: 
name: Mainframe Banking System
type: SoftwareSystem
description: Stores all of the core banking information about customers, accounts, transactions, etc.


In [33]:
results = vectorestore.similarity_search_with_score("what software systems can be used by customer to view their banking information?", k=1)
results

[(Document(page_content='\nname: Internet Banking System\ntype: SoftwareSystem\ndescription: Allows customers to view information about their bank accounts, and make payments.', metadata={'tags': 'Element,Software System', 'source': 'Big Bank plc', 'parent': ''}),
  0.8941677808761597)]

#### Create Neo4j Vector Store from an existing embedding index

- [MistralAIEmbeddings](https://python.langchain.com/docs/integrations/text_embedding/mistralai/)

- [Neo4j Vector](https://python.langchain.com/docs/integrations/vectorstores/neo4jvector/)

- [Create Neo4jVector from an existing index](https://api.python.langchain.com/en/latest/_modules/langchain_community/vectorstores/neo4j_vector.html#Neo4jVector.from_existing_index)

In [51]:
# # First we create sample data and index in graph
# vectorestore.query(
#     "MERGE (p1)-[:Uses {description:'example text', embedding:$embedding}]->(p2)",
#     params={"embedding": MistralAIEmbeddings(mistral_api_key=os.getenv("MISTRAL_API_KEY")).embed_query("example text")},
# )
# # Create a vector index
# relationship_index = "relationship_vector"
# vectorestore.query(
#     """
# CREATE VECTOR INDEX $relationship_index
# IF NOT EXISTS
# FOR ()-[r:Uses]-() ON (r.embedding)
# OPTIONS {indexConfig: {
#  `vector.dimensions`: 1536,
#  `vector.similarity_function`: 'cosine'
# }}
# """,
#     params={"relationship_index": relationship_index},
# )

# relationship_vector = Neo4jVector.from_existing_relationship_index(
#     MistralAIEmbeddings(mistral_api_key=os.getenv("MISTRAL_API_KEY")),
#     url=os.getenv("NEO4J_URI"), username=os.getenv("NEO4J_USERNAME"), password=os.getenv("NEO4J_PASSWORD"),
#     index_name=relationship_index,
#     text_node_property="description",
# )
# relationship_vector.similarity_search("Example")

In [34]:
# Create Neo4jVector instance from an existing index.
from langchain_community.vectorstores import Neo4jVector
from langchain_mistralai import MistralAIEmbeddings

embedding=MistralAIEmbeddings(mistral_api_key=os.getenv("MISTRAL_API_KEY"))

# retrieval_query = """
# RETURN node.text AS text, score, {tags: node.`metadata.tags`, name: node.`metadata.name`} AS metadata
# """

retrieval_query = """
RETURN "\n\nname: "+node.name+"\ntype: "+node.type+"\ndescription: "+node.description AS text, score, {tags: node.tags, name: node.name, type: node.type, parent: node.parent, source: node.source} AS metadata
"""

vectorestore = Neo4jVector.from_existing_index(
    embedding=embedding,
    url=os.getenv("NEO4J_URI"), username=os.getenv("NEO4J_USERNAME"), password=os.getenv("NEO4J_PASSWORD"),
    index_name="Element_index",
    retrieval_query=retrieval_query,
    embedding_node_property="embedding",
)

In [37]:

embedding_dimension, index_type = vectorestore.retrieve_existing_index()

print(f"embedding_dimension: {embedding_dimension}\nindex_type: {index_type}")


embedding_dimension: 1024
index_type: NODE


In [38]:
results = vectorestore.similarity_search("what software systems can be used to store customer information?", k=1)
results

[Document(page_content='\n\nname: Mainframe Banking System\ntype: SoftwareSystem\ndescription: Stores all of the core banking information about customers, accounts, transactions, etc.', metadata={'tags': 'Element,Software System,Existing System', 'source': 'Big Bank plc', 'name': 'Mainframe Banking System', 'parent': '', 'type': 'SoftwareSystem'})]

In [40]:
results = vectorestore.similarity_search("what software systems can be used to withdraw cash?", k=1)
results

[Document(page_content='\n\nname: ATM\ntype: SoftwareSystem\ndescription: Allows customers to withdraw cash.', metadata={'tags': 'Element,Software System,Existing System', 'source': 'Big Bank plc', 'name': 'ATM', 'parent': '', 'type': 'SoftwareSystem'})]

In [41]:
retriever = vectorestore.as_retriever()

# from langchain_core.runnables import RunnableParallel, RunnablePassthrough
# retrieval = RunnableParallel(
#     {"question": RunnablePassthrough() | retriever | _extract_context_documents}
# )

In [42]:
from langchain.chains import RetrievalQAWithSourcesChain
chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm, chain_type="stuff", retriever=retriever
)

In [43]:
chain.invoke(
    {"question": "what software systems can be used to store customer information?"},
    return_only_outputs=True,
)

{'answer': 'The following software systems can be used to store customer information: a Mainframe Banking System, an Internet Banking System, an ATM, and a Database.\n\n',
 'sources': 'Big Bank plc.'}

In [44]:
chain.invoke(
    {"question": "what software systems can be used to withdraw cash?"},
    return_only_outputs=True,
)

{'answer': 'The software systems that can be used to withdraw cash are ATM systems.\n\n',
 'sources': 'Big Bank plc.'}

In [45]:

chain.invoke(
    {"question": "what can provide a summary of a customer's bank accounts"},
    return_intermediate_steps=True
)

{'question': "what can provide a summary of a customer's bank accounts",
 'answer': 'The "Accounts Summary Controller" component in the "Big Bank plc" software system provides a summary of a customer\'s bank accounts.\n\n',
 'sources': ''}

In [57]:
results = vectorestore.similarity_search_with_score("what can provide a summary of a customer's bank accounts?")
results

[(Document(page_content='\n\nname: Accounts Summary Controller\ntype: Component\ndescription: Provides customers with a summary of their bank accounts.', metadata={'tags': 'Element,Component', 'source': 'Big Bank plc', 'name': 'Accounts Summary Controller', 'parent': 'API Application', 'type': 'Component'}),
  0.8886620402336121),
 (Document(page_content='\n\nname: Personal Banking Customer\ntype: Person\ndescription: A customer of the bank, with personal bank accounts.', metadata={'tags': 'Element,Person,Customer', 'source': 'Big Bank plc', 'name': 'Personal Banking Customer', 'parent': '', 'type': 'Person'}),
  0.8539392352104187),
 (Document(page_content='\n\nname: Internet Banking System\ntype: SoftwareSystem\ndescription: Allows customers to view information about their bank accounts, and make payments.', metadata={'tags': 'Element,Software System', 'source': 'Big Bank plc', 'name': 'Internet Banking System', 'parent': '', 'type': 'SoftwareSystem'}),
  0.8382985591888428),
 (Docum