Step 1: Install Required Packages

In [1]:
# Install necessary libraries
%pip install langchain langchain-community langchain-openai neo4j llama_index python-dotenv

Collecting neo4j
  Using cached neo4j-5.23.1-py3-none-any.whl (293 kB)
Collecting llama_index
  Using cached llama_index-0.10.65-py3-none-any.whl (6.8 kB)
Collecting llama-index-question-gen-openai<0.2.0,>=0.1.2
  Using cached llama_index_question_gen_openai-0.1.3-py3-none-any.whl (2.9 kB)
Collecting llama-index-core<0.11.0,>=0.10.65
  Using cached llama_index_core-0.10.67-py3-none-any.whl (15.5 MB)
Collecting llama-index-readers-file<0.2.0,>=0.1.4
  Using cached llama_index_readers_file-0.1.33-py3-none-any.whl (38 kB)
Collecting llama-index-embeddings-openai<0.2.0,>=0.1.5
  Using cached llama_index_embeddings_openai-0.1.11-py3-none-any.whl (6.3 kB)
Collecting llama-index-llms-openai<0.2.0,>=0.1.27
  Using cached llama_index_llms_openai-0.1.29-py3-none-any.whl (11 kB)
Collecting llama-index-agent-openai<0.3.0,>=0.1.4
  Using cached llama_index_agent_openai-0.2.9-py3-none-any.whl (13 kB)
Collecting llama-index-cli<0.2.0,>=0.1.2
  Using cached llama_index_cli-0.1.13-py3-none-any.whl (27 

Step 2: Import Libraries

In [3]:
import os
from dotenv import load_dotenv
from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain

# Load environment variables from .env file
load_dotenv()

# Access environment variables
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

Step 3: Connect to Neo4j and Create a Graph Instance

In [4]:
# Create a Neo4jGraph instance
graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    enhanced_schema=True,
)

Unable to retrieve routing information


ValueError: Could not connect to Neo4j database. Please ensure that the url is correct

Step 4: Seed the Database with Initial Data

In [None]:
# Define and execute the Cypher query to populate the database
seed_query = """
MERGE (m:Movie {name:"Top Gun", runtime: 120})
WITH m
UNWIND ["Tom Cruise", "Val Kilmer", "Anthony Edwards", "Meg Ryan"] AS actor
MERGE (a:Actor {name:actor})
MERGE (a)-[:ACTED_IN]->(m)
"""

graph.query(seed_query)

In [None]:
# Refresh and print the graph schema
graph.refresh_schema()
print(graph.schema)

Step 5: Initialize the LLM and Graph Q&A Chain

In [None]:
# Initialize the Groq LLM
llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)

# Initialize the GraphCypherQAChain
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    verbose=True,
    top_k=2,
    return_direct=True,
    validate_cypher=True,
)

Step 6: Perform a Query Using the Chain

In [None]:
# Define and execute a natural language query
query = {"query": "Who played in Top Gun?"}
response = chain.invoke(query)

print(f"Query: {query['query']}")
print(f"Result: {response}")

(Optional): Customize Cypher Generation

In [None]:
from langchain_core.prompts.prompt import PromptTemplate

# Define a custom prompt template
CYPHER_GENERATION_TEMPLATE = """Task: Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher statements for particular questions:
# How many people played in Top Gun?
MATCH (m:Movie {{name:"Top Gun"}})<-[:ACTED_IN]-()
RETURN count(*) AS numberOfActors

The question is:
{question}"""

# Initialize the prompt template
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

# Create a new chain with the custom prompt
chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

# Execute the query with the custom prompt
query = {"query": "How many people played in Top Gun?"}
response = chain.invoke(query)

print(f"Query: {query['query']}")
print(f"Result: {response}")
