In [1]:
from os import getenv
from dotenv import load_dotenv
from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain
from langchain_google_genai import ChatGoogleGenerativeAI

  from .autonotebook import tqdm as notebook_tqdm


#### You will need [Neo4j APOC](https://neo4j.com/docs/apoc/current/installation/) for langchain_community.graph.Neo4jGraph to function.
Follow the instructions in the link and then, in neo4j.conf, set 
`dbms.security.procedures.unrestricted=apoc.*`

In [2]:
load_dotenv()

True

In [3]:
neo4j_url= getenv("NEO4J_URL")
neo4j_user = getenv("NEO4J_USER")
neo4j_password = getenv("NEO4J_PW")
neo4j_database = getenv("NEO4J_DB")

In [4]:
graph = Neo4jGraph(neo4j_url, neo4j_user, neo4j_password, neo4j_database)

In [5]:
print(graph.schema)

Node properties are the following:
Movie {id: INTEGER, year: INTEGER, certificate: STRING, rating: FLOAT, tagline: STRING, runtime: STRING, title: STRING, box_office: STRING, budget: STRING, name: STRING, run_time: STRING, rank: INTEGER},Person {name: STRING},Genre {name: STRING},Certificate {name: STRING}
Relationship properties are the following:

The relationships are the following:
(:Movie)-[:HAS_GENRE]->(:Genre),(:Movie)-[:HAS_CERTIFICATE]->(:Certificate),(:Movie)-[:HAS_CAST]->(:Person),(:Person)-[:ACTED_IN]->(:Movie),(:Person)-[:DIRECTED]->(:Movie),(:Person)-[:WROTE]->(:Movie)


#### Gemini 

In [6]:
gemini_api = getenv("GEMINI_API_KEY")

In [7]:
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key = gemini_api ,temperature = 0)
chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True)

### Prompts

- #### Simple prompts.

In [8]:
questions = [
    "Which are the best movies released before 1960?",
    "Which are the 3 best crime dramas?",
    "Who is the director with highest average rating for their movies?",
    "In what movies have Robert DeNiro and Al Pacino together?",
    "Which director has Brad Pitt worked with the most?",
    "Best 5 thrillers made before 2000."
]

for question in questions:
    print("\n\n" + question)
    print(chain.invoke(question)["result"])



Which are the best movies released before 1960?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (m:Movie)
WHERE m.year < 1960
RETURN m.title, m.rating
ORDER BY m.rating DESC
LIMIT 10
[0m
Full Context:
[32;1m[1;3m[{'m.title': '12 Angry Men', 'm.rating': 9.0}, {'m.title': "It's a Wonderful Life", 'm.rating': 8.6}, {'m.title': 'Seven Samurai', 'm.rating': 8.6}, {'m.title': 'Rear Window', 'm.rating': 8.5}, {'m.title': 'Modern Times', 'm.rating': 8.5}, {'m.title': 'City Lights', 'm.rating': 8.5}, {'m.title': 'Casablanca', 'm.rating': 8.5}, {'m.title': 'The Great Dictator', 'm.rating': 8.4}, {'m.title': 'Paths of Glory', 'm.rating': 8.4}, {'m.title': 'Sunset Blvd.', 'm.rating': 8.4}][0m

[1m> Finished chain.[0m
Here are some of the best movies released before 1960, according to their ratings: 12 Angry Men (9.0), It's a Wonderful Life (8.6), Seven Samurai (8.6), Rear Window (8.5), Modern Times (8.5), City Lights (8.5), Casablanca (8.5),

- #### Some prompts need clarification. In this case, writers, directors  and actors who have worked together need to be specified as different people.

In [9]:
questions = [
    "Which actor and director have worked together the most?",
    "Which writer and director have worked together the most?",

    "Which actor and director have worked together the most? Remember they cannot be the same person.",
    "Which writer and director have worked together the most? Remember they cannot be the same person."
]

for question in questions:
    print("\n\n" + question)
    print(chain.invoke(question)["result"])



Which actor and director have worked together the most?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p1:Person)-[:ACTED_IN]->(m:Movie)<-[:DIRECTED]-(p2:Person)
WITH p1, p2, COUNT(m) AS collaboration_count
WHERE p1 <> p2
RETURN p1.name AS Actor, p2.name AS Director, collaboration_count
ORDER BY collaboration_count DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'Actor': 'Michael Caine', 'Director': 'Christopher Nolan', 'collaboration_count': 6}][0m

[1m> Finished chain.[0m
The actor Michael Caine and the director Christopher Nolan have worked together the most, with a collaboration count of 6.



Which writer and director have worked together the most?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (writer:Person)-[:WROTE]->(movie:Movie)<-[:DIRECTED]-(director:Person)
RETURN writer.name AS Writer, director.name AS Director, count(movie) AS CollaborationCount
ORDER BY CollaborationC

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 17
}
].


Generated Cypher:
[32;1m[1;3mcypher
MATCH (p1:Person)-[:ACTED_IN]->(m:Movie)<-[:DIRECTED]-(p2:Person)
WHERE p1 <> p2
RETURN p1.name AS Actor, p2.name AS Director, count(m) AS CollaborationCount
ORDER BY CollaborationCount DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'Actor': 'Michael Caine', 'Director': 'Christopher Nolan', 'CollaborationCount': 6}][0m


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 13
}
].
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 11
}
].
Retrying langchain_google_gena


[1m> Finished chain.[0m
The actor Michael Caine and the director Christopher Nolan have worked together the most, with 6 collaborations.



Which writer and director have worked together the most? Remember they cannot be the same person.


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (writer:Person)-[:WROTE]->(movie:Movie)<-[:DIRECTED]-(director:Person)
WHERE writer <> director
RETURN writer.name AS Writer, director.name AS Director, count(movie) AS CollaborationCount
ORDER BY CollaborationCount DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'Writer': 'Jonathan Nolan', 'Director': 'Christopher Nolan', 'CollaborationCount': 5}][0m

[1m> Finished chain.[0m
The writer Jonathan Nolan and the director Christopher Nolan have worked together 5 times, which is the most collaborations between a writer and director in the data provided.



- #### Sometimes the LLM generates proper Cypher queries, which return the proper results, but the LLM is unable to infer the required information from these results.
- #### Adding another line that motivates it to understand helps in these cases.

In [10]:
questions = [
    "Who is the actor who has starred in most of the best rated movies.",
    
    "Who is the actor who has starred in most of the best rated movies." \
    "You know the answer. Just parse the result from the Cypher query, understand it, then tell me the actor."
]

for question in questions:
    print("\n" + question)
    print(chain.invoke(question)["result"])


Who is the actor who has starred in most of the best rated movies.


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Person)-[:ACTED_IN]->(m:Movie)
WITH p, avg(m.rating) AS avgRating, count(m) AS movieCount
ORDER BY avgRating DESC, movieCount DESC
LIMIT 1
RETURN p.name AS ActorName
[0m
Full Context:
[32;1m[1;3m[{'ActorName': 'Tim Robbins'}][0m

[1m> Finished chain.[0m
The actor who has starred in most of the best rated movies is Tim Robbins.


Who is the actor who has starred in most of the best rated movies.You know the answer. Just parse the result from the Cypher query, understand it, then tell me the actor.


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Person)-[:ACTED_IN]->(m:Movie)
WITH p, avg(m.rating) AS avgRating, count(m) AS movieCount
ORDER BY avgRating DESC, movieCount DESC
LIMIT 1
RETURN p.name AS Actor
[0m
Full Context:
[32;1m[1;3m[{'Actor': 'Tim Robbins'}][0m

### Enter your own prompts -

In [11]:
while True:
    question = input("\n\nEnter your question (Type 'EXIT' to exit) : ")
    
    if question == "EXIT":
        print("Exiting")
        break
    else:
        print("\n" + question)
        print(chain.invoke(question)["result"])


drama movies before 2007


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (m:Movie)-[:HAS_GENRE]->(g:Genre)
WHERE g.name = "Drama" AND m.year < 2007
RETURN m
[0m
Full Context:
[32;1m[1;3m[{'m': {'year': 1994, 'certificate': 'R', 'rating': 9.3, 'tagline': 'Fear can hold you prisoner. Hope can set you free.', 'runtime': '2h 22m', 'id': 1, 'title': 'The Shawshank Redemption', 'box_office': '28884504', 'budget': '25000000'}}, {'m': {'year': 1972, 'certificate': 'R', 'rating': 9.2, 'tagline': "An offer you can't refuse.", 'runtime': '2h 55m', 'id': 2, 'title': 'The Godfather', 'box_office': '250341816', 'budget': '6000000'}}, {'m': {'year': 1974, 'certificate': 'R', 'rating': 9.0, 'tagline': "All the power on earth can't change destiny.", 'runtime': '3h 22m', 'id': 4, 'title': 'The Godfather Part II', 'box_office': '47961919', 'budget': '13000000'}}, {'m': {'year': 1957, 'certificate': 'Approved', 'rating': 9.0, 'tagline': 'Life Is In Thei

ValueError: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 's': expected 'FOREACH', 'ALTER', 'ORDER BY', 'CALL', 'USING PERIODIC COMMIT', 'CREATE', 'LOAD CSV', 'START DATABASE', 'STOP DATABASE', 'DEALLOCATE', 'DELETE', 'DENY', 'DETACH', 'DROP', 'DRYRUN', 'FINISH', 'GRANT', 'INSERT', 'LIMIT', 'MATCH', 'MERGE', 'NODETACH', 'OFFSET', 'OPTIONAL', 'REALLOCATE', 'REMOVE', 'RENAME', 'RETURN', 'REVOKE', 'ENABLE SERVER', 'SET', 'SHOW', 'SKIP', 'TERMINATE', 'UNWIND', 'USE' or 'WITH' (line 2, column 1 (offset: 7))
"s"
 ^}