# GraphRAG demo with Neo4j

Source: Based on data and idea mostly from https://neo4j.com/developer-blog/knowledge-graph-rag-application

# Set up infrastructure

## Connect to database

In [1]:
from langchain.graphs import Neo4jGraph

neo4j_url = "bolt://localhost"
neo4j_username ="neo4j"
neo4j_password = "neo4j1234"

graph = Neo4jGraph(
    url=neo4j_url, 
    username=neo4j_username, 
    password=neo4j_password
)

## Clean database

In [2]:
# delete relationships
graph.query("MATCH ()-[r]->() DELETE r")
# delete nodes
graph.query("MATCH (n) DELETE n")
# check
graph.query("MATCH (n) RETURN n")

[]

## Load demo data

In [3]:
import requests
import json

wget_url = "https://gist.githubusercontent.com/tomasonjo/08dc8ba0e19d592c4c3cde40dd6abcc3/raw/da8882249af3e819a80debf3160ebbb3513ee962/microservices.json"
demo_data = requests.get(wget_url).json()['query']
demo_data[:300]

"CREATE (catalog:Microservice {name: 'CatalogService', technology: 'Java'}), (order:Microservice {name: 'OrderService', technology: 'Python'}), (user:Microservice {name: 'UserService', technology: 'Go'}), (payment:Microservice {name: 'PaymentService', technology: 'Node.js'}), (inventory:Microservice "

## Import demo data

In [4]:
graph.query(demo_data)
graph.get_structured_schema

{'node_props': {'Microservice': [{'property': 'name', 'type': 'STRING'},
   {'property': 'technology', 'type': 'STRING'}],
  'Dependency': [{'property': 'name', 'type': 'STRING'},
   {'property': 'type', 'type': 'STRING'}],
  'Task': [{'property': 'description', 'type': 'STRING'},
   {'property': 'name', 'type': 'STRING'},
   {'property': 'status', 'type': 'STRING'},
   {'property': 'embedding', 'type': 'LIST'}],
  'Team': [{'property': 'name', 'type': 'STRING'}],
  'Person': [{'property': 'name', 'type': 'STRING'}]},
 'rel_props': {},
 'relationships': [{'start': 'Microservice',
   'type': 'MAINTAINED_BY',
   'end': 'Team'},
  {'start': 'Microservice', 'type': 'DEPENDS_ON', 'end': 'Microservice'},
  {'start': 'Microservice', 'type': 'DEPENDS_ON', 'end': 'Dependency'},
  {'start': 'Task', 'type': 'ASSIGNED_TO', 'end': 'Team'},
  {'start': 'Task', 'type': 'LINKED_TO', 'end': 'Microservice'},
  {'start': 'Person', 'type': 'MEMBER_OF', 'end': 'Team'},
  {'start': 'Person', 'type': 'LEAD_O

Just an ugly helpler function because LangChain doesn't like pandas ...

In [5]:
import pandas as pd

def show(result):
    if not result and len(result) < 1:
        return None
    else:
        key = list(result[0].keys())[0]
        return pd.DataFrame([item[key] for item in result])

## Show all* tasks data
\*ok... just the first five

In [6]:
show(graph.query("MATCH (n:Task) RETURN n ORDER BY n.name")).head()

Unnamed: 0,name,description,status
0,BugFix,Fix bug in CatalogService,Open
1,Enhance,Enhance ShippingService,Completed
2,FeatureAdd,Add feature to OrderService,In Progress
3,Optimize,Optimize PaymentService,Open
4,Optimize,Optimize AuthService,Open


## Create a vector index in Neo4j
just for tasks

In [7]:
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain_openai import OpenAIEmbeddings

vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    url=neo4j_url, username=neo4j_username, password=neo4j_password,
    index_name='tasks', # name of the vector index
    node_label="Task", # label of relevant nodes
    text_node_properties=['name', 'description', 'status'], # properties to be used
    embedding_node_property='embedding', # property where to store embeddings
)
show(graph.query("MATCH (n:Task) RETURN n ORDER BY n.name")).head()

Unnamed: 0,name,description,embedding,status
0,BugFix,Fix bug in CatalogService,"[0.0010090064024552703, 0.015515870414674282, ...",Open
1,Enhance,Enhance ShippingService,"[-0.01514890231192112, 0.004229188431054354, -...",Completed
2,FeatureAdd,Add feature to OrderService,"[-0.004280831199139357, 0.004398114047944546, ...",In Progress
3,Optimize,Optimize PaymentService,"[-0.010021028108894825, 0.004158910363912582, ...",Open
4,Optimize,Optimize AuthService,"[-0.005023847334086895, 0.009418325498700142, ...",Open


# Basic queries

### Search by similarity

In [8]:
response = vector_index.similarity_search(
    "How will ShippingService be updated?"
)
print(response[0].page_content)


name: Enhance
description: Enhance ShippingService
status: Completed


check:

In [9]:
show(graph.query("MATCH (n:Task) WHERE n.description CONTAINS 'ShippingService' RETURN n"))

Unnamed: 0,name,description,embedding,status
0,Enhance,Enhance ShippingService,"[-0.01514890231192112, 0.004229188431054354, -...",Completed


## Ask a more natural language

In [10]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

vector_qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(),
    # "stuff": take a list of documents, insert them all into a prompt and passes that prompt to an LLM
    chain_type="stuff",
    retriever=vector_index.as_retriever()
)
vector_qa.run("How will shipping service be updated?")

  vector_qa.run("How will shipping service be updated?")


'The shipping service has been enhanced, not updated.'

## Get to know the limits

In [11]:
print(vector_qa.run("How many open tasks are there?"))


There are three open tasks.


check

In [12]:
show(graph.query("MATCH (t:Task {status:'Open'}) RETURN t"))

Unnamed: 0,name,description,embedding,status
0,BugFix,Fix bug in CatalogService,"[0.0010090064024552703, 0.015515870414674282, ...",Open
1,Optimize,Optimize PaymentService,"[-0.010021028108894825, 0.004158910363912582, ...",Open
2,ReviewFix,Fix ReviewService,"[0.008519421331584454, 0.008675129152834415, -...",Open
3,Optimize,Optimize AuthService,"[-0.005023847334086895, 0.009418325498700142, ...",Open


# Advanced Queries

### Introducing Cypher based queries

In [13]:
from langchain.chains import GraphCypherQAChain

graph.refresh_schema()

cypher_chain = GraphCypherQAChain.from_llm(
    cypher_llm = ChatOpenAI(temperature=0, model_name='gpt-4'),
    qa_llm = ChatOpenAI(temperature=0),
    graph=graph,
    verbose=True,
    allow_dangerous_requests=True
)
cypher_chain

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x7f70e9b53e90>, cypher_generation_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['question', 'schema'], input_types={}, partial_variables={}, template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7f70c12c4ad0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7f70c11c86d0>, root_client=<open

### Use Cypher for questions about tasks

In [14]:
cypher_chain.run(
    "How many open tasks are there?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (t:Task {status: 'open'}) RETURN COUNT(t)[0m
Full Context:
[32;1m[1;3m[{'COUNT(t)': 0}][0m

[1m> Finished chain.[0m


'There are no open tasks.'

### Second try

In [15]:
cypher_chain.run(
    "How many Open tasks are there?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (t:Task {status: 'Open'}) RETURN COUNT(t)[0m
Full Context:
[32;1m[1;3m[{'COUNT(t)': 4}][0m

[1m> Finished chain.[0m


'There are 4 Open tasks.'

### Also: Other questions

In [16]:
cypher_chain.run(
    "How many microservices are there? List them!"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Microservice) RETURN COUNT(m) AS NumberOfMicroservices, COLLECT(m.name) AS MicroservicesList[0m
Full Context:
[32;1m[1;3m[{'NumberOfMicroservices': 9, 'MicroservicesList': ['CatalogService', 'OrderService', 'UserService', 'PaymentService', 'InventoryService', 'ShippingService', 'ReviewService', 'RecommendationService', 'AuthService']}][0m

[1m> Finished chain.[0m


'There are 9 microservices. The list includes CatalogService, OrderService, UserService, PaymentService, InventoryService, ShippingService, ReviewService, RecommendationService, and AuthService.'

### An also: More interesting questions

In [17]:
cypher_chain.run(
    "Which service has the most dependencies to other services?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Microservice)-[:DEPENDS_ON]->(other:Microservice)
RETURN m.name, COUNT(other) AS dependencies
ORDER BY dependencies DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'m.name': 'OrderService', 'dependencies': 3}][0m

[1m> Finished chain.[0m


'OrderService has the most dependencies to other services.'

In [18]:
cypher_chain.run(
    "Which services depend on the Database directly?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Microservice)-[:DEPENDS_ON]->(d:Dependency {type: 'Database'}) RETURN m.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"I don't know the answer."

In [19]:
cypher_chain.run(
    "Which services depend on Database indirectly?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Microservice)-[:DEPENDS_ON]->(d:Dependency {name: 'Database'}) RETURN m.name[0m
Full Context:
[32;1m[1;3m[{'m.name': 'InventoryService'}, {'m.name': 'CatalogService'}, {'m.name': 'OrderService'}, {'m.name': 'AuthService'}, {'m.name': 'UserService'}, {'m.name': 'PaymentService'}][0m

[1m> Finished chain.[0m


'InventoryService, CatalogService, OrderService, AuthService, UserService, PaymentService depend on Database indirectly.'

# LangChain ♥️ Neo4j?

### Combining everything together using a toolchain and an agent

## Building the chain links

In [20]:
from langchain.agents import Tool

tools = [
    
    Tool(
        name="TaskNaturalLanguage",
        func=vector_qa.run,
        description="""Useful when you need to answer questions about descriptions of or general statements about tasks.
        Not useful for counting the number of tasks or progress questions or stating something about their status.
        Use full question as input.
        """,
    ),
        Tool(
        name="TasksCypher",
        func=cypher_chain.run,
        description="""Useful when you need to answer questions counting the number of tasks or status related questions.
        Use full question as input.
        """,
    ),
    
    Tool(
        name="ServicesCypher",
        func=cypher_chain.run,
        description="""Useful when you need to answer questions about microservices,
        their dependencies or assigned people. Also useful for any sort of 
        aggregation like counting the number of tasks, etc.
        Use full question as input.
        """,
    ),           
    
    Tool(
        name="LegacyNaturalLanguage",
        func=vector_qa.run,
        description="""Useful when someone ask ask about legacy code. Always answer ony with 'There is no hope for you. Just quit the job! (just kidding!)'.
        """,
    ),
    
    Tool(
        name="INNOQNaturalLanguage",
        func=vector_qa.run,
        description="""Useful if someone asks about INNOQ. Respond always with something like 'INNOQ is the best company for this!!!'
        """,
    ),
    
    Tool(
        name="OpenRewriteNaturalLanguage",
        func=vector_qa.run,
        description="""Useful when someone ask ask about writing an refactoring or restructuring recipe for OpenRewrite.""",
    )
    
]

### Building the agent

In [21]:
from langchain.agents import initialize_agent, AgentType
    
agent = initialize_agent(
    tools, 
    ChatOpenAI(temperature=0, model_name='gpt-4'),
    agent=AgentType.OPENAI_FUNCTIONS, verbose=True
)

  agent = initialize_agent(


In [22]:
agent.run("Tell me something about our tasks!")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `TaskNaturalLanguage` with `Tell me something about our tasks!`


[0m[36;1m[1;3mWe currently have four tasks in progress: Update InventoryService, Add feature to RecommendationService, Add feature to OrderService, and one task has been completed: Enhance ShippingService.[0m[32;1m[1;3mWe currently have four tasks in progress: 

1. Update InventoryService
2. Add feature to RecommendationService
3. Add feature to OrderService

And one task has been completed: 

1. Enhance ShippingService.[0m

[1m> Finished chain.[0m


'We currently have four tasks in progress: \n\n1. Update InventoryService\n2. Add feature to RecommendationService\n3. Add feature to OrderService\n\nAnd one task has been completed: \n\n1. Enhance ShippingService.'

In [23]:
agent.run("How many tasks are there?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `TasksCypher` with `How many tasks are there?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (t:Task) RETURN COUNT(t)[0m
Full Context:
[32;1m[1;3m[{'COUNT(t)': 9}][0m

[1m> Finished chain.[0m
[33;1m[1;3mThere are 9 tasks.[0m[32;1m[1;3mThere are 9 tasks.[0m

[1m> Finished chain.[0m


'There are 9 tasks.'

In [24]:
agent.run("How many microservice do we have?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `ServicesCypher` with `How many microservice do we have?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Microservice) RETURN COUNT(m)[0m
Full Context:
[32;1m[1;3m[{'COUNT(m)': 9}][0m

[1m> Finished chain.[0m
[38;5;200m[1;3mWe have 9 microservices.[0m[32;1m[1;3mWe have 9 microservices.[0m

[1m> Finished chain.[0m


'We have 9 microservices.'

In [25]:
agent.run("Which team is assigned to maintain PaymentService?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `ServicesCypher` with `Which team is assigned to maintain PaymentService?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Microservice {name: 'PaymentService'})-[:MAINTAINED_BY]->(t:Team) RETURN t.name[0m
Full Context:
[32;1m[1;3m[{'t.name': 'TeamD'}][0m

[1m> Finished chain.[0m
[38;5;200m[1;3mTeamD is assigned to maintain PaymentService.[0m[32;1m[1;3mTeamD is assigned to maintain the PaymentService.[0m

[1m> Finished chain.[0m


'TeamD is assigned to maintain the PaymentService.'

In [26]:
agent.run("What should I do with all this legacy code?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `LegacyNaturalLanguage` with `What should I do with all this legacy code?`


[0m[36;1m[1;3mIt would be best to refactor the legacy code to improve its structure and maintainability. You can start by identifying specific areas that need improvement and prioritize refactoring tasks based on their impact on the system. Additionally, consider optimizing performance bottlenecks and fixing any bugs present in the code.[0m[32;1m[1;3mThere is no hope for you. Just quit the job! (just kidding!). On a serious note, dealing with legacy code can be challenging but it's not impossible. It would be best to refactor the legacy code to improve its structure and maintainability. You can start by identifying specific areas that need improvement and prioritize refactoring tasks based on their impact on the system. Additionally, consider optimizing performance bottlenecks and fixing any bugs present in the code.[0m

[1m> Finis

"There is no hope for you. Just quit the job! (just kidding!). On a serious note, dealing with legacy code can be challenging but it's not impossible. It would be best to refactor the legacy code to improve its structure and maintainability. You can start by identifying specific areas that need improvement and prioritize refactoring tasks based on their impact on the system. Additionally, consider optimizing performance bottlenecks and fixing any bugs present in the code."

In [27]:
print(agent.run("""
    I see that some classes use the package name 'com.acme' but they should use'com.burnscorp'.
    Write an OpenRewrite recipe for this!"""))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `OpenRewriteNaturalLanguage` with `I see that some classes use the package name 'com.acme' but they should use'com.burnscorp'. Write an OpenRewrite recipe for this!`


[0m[38;5;200m[1;3mI don't know how to write an OpenRewrite recipe for changing package names.[0m[32;1m[1;3mI'm sorry, but I currently don't have the capability to write an OpenRewrite recipe for changing package names. However, you can refer to the OpenRewrite documentation or community for guidance on how to create such a recipe.[0m

[1m> Finished chain.[0m
I'm sorry, but I currently don't have the capability to write an OpenRewrite recipe for changing package names. However, you can refer to the OpenRewrite documentation or community for guidance on how to create such a recipe.


In [28]:
print(agent.run("Generate OpenRewrite recipes to refactor all microservices to a nice modulith!"))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `OpenRewriteNaturalLanguage` with `Generate OpenRewrite recipes to refactor all microservices to a nice modulith!`


[0m[38;5;200m[1;3mI don't have enough information about the specific details and structure of the microservices to generate OpenRewrite recipes for refactoring them into a modulith. You may need to provide more context or work with a developer or architect to create a plan for refactoring the microservices.[0m[32;1m[1;3mI'm sorry, but I don't have enough information about the specific details and structure of the microservices to generate OpenRewrite recipes for refactoring them into a modulith. You may need to provide more context or work with a developer or architect to create a plan for refactoring the microservices.[0m

[1m> Finished chain.[0m
I'm sorry, but I don't have enough information about the specific details and structure of the microservices to generate OpenRewrite recipes for r

In [29]:
agent.run("Can INNOQ help me with a legacy system migration?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `INNOQNaturalLanguage` with `Can INNOQ help me with a legacy system migration?`


[0m[33;1m[1;3mI don't have the specific information about INNOQ's services and capabilities. I recommend reaching out to INNOQ directly to inquire about their expertise in legacy system migrations.[0m[32;1m[1;3mAbsolutely! INNOQ is the best company for this!!! They have a team of experts who can help you with legacy system migration. They have extensive experience in dealing with legacy systems and can provide you with the best solutions.[0m

[1m> Finished chain.[0m


'Absolutely! INNOQ is the best company for this!!! They have a team of experts who can help you with legacy system migration. They have extensive experience in dealing with legacy systems and can provide you with the best solutions.'

### The End