In [55]:
# Load all the libraries we need
from langchain_community.graphs import Neo4jGraph
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain
from langchain_openai import ChatOpenAI, OpenAI
from langchain_community.chat_models import ChatMlflow

import os
import textwrap

In [56]:
# Load from environment
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'

In [57]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD
)

In [58]:
# Check the graph schema
kg.refresh_schema()
print(textwrap.fill(kg.schema, 60))

Node properties are the following: Movie {duration: STRING,
listed_in: STRING, country: STRING, cast: STRING,
date_added: STRING, director: STRING, release_year: STRING,
rating: STRING, description: STRING, id: STRING, title:
STRING, type: STRING},Person {name: STRING},Category {name:
STRING},Type {type: STRING},Country {name: STRING},Year
{value: INTEGER} Relationship properties are the following:
The relationships are the following: (:Movie)-[:TYPED_AS]->(
:Type),(:Movie)-[:IN_CATEGORY]->(:Category),(:Movie)-[:WHERE
]->(:Country),(:Movie)-[:CREATED_ON]->(:Year),(:Person)-[:AC
TED_IN]->(:Movie),(:Person)-[:WORK_WITH]->(:Person),(:Person
)-[:DIRECTED]->(:Movie),(:Year)-[:NEXT]->(:Year)


In [59]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher statements for particular questions:

# How many people starred in Top Gun?
MATCH (m:Movie {{title:"Top Gun"}})<-[:ACTED_IN]-()
RETURN count(*) AS numberOfActors

# What are the most popular movie categories
MATCH (m:Movie)-[:IN_CATEGORY]->(c:Category)
WITH c.name AS Category, COUNT(m) AS MoviesCount
RETURN Category, MoviesCount
ORDER BY MoviesCount DESC

# All the movies Keanu Reeves acted in
MATCH (p:Person {{name: "Keanu Reeves"}})-[:ACTED_IN]->(m:Movie)
RETURN m.title AS Movie, m.release_year AS ReleaseYear
ORDER BY m.release_year DESC

# all persons who have acted in at least one movie with Keanu Reeves
MATCH (p:Person {{name: "Keanu Reeves"}})-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(coActors)
WHERE p.name <> coActors.name
RETURN DISTINCT coActors.name AS CoActor

# Find pairs of actors who have worked together in more than one movie
MATCH (p1:Person)-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(p2:Person)
WHERE p1.name < p2.name
WITH p1, p2, COUNT(m) AS sharedMovies
WHERE sharedMovies > 1
RETURN p1.name AS Actor1, p2.name AS Actor2, sharedMovies
ORDER BY sharedMovies DESC

# Retrieve movies along with their categories.
MATCH (m:Movie)-[:IN_CATEGORY]->(c:Category)
RETURN m.title AS Movie, collect(c.name) AS Categories

# Find Movies Directed by Actors
MATCH (p:Person)-[:DIRECTED]->(m:Movie), (p)-[:ACTED_IN]->(m2:Movie)
RETURN p.name AS Person, m.title AS DirectedMovie, m2.title AS ActedInMovie

# Find movies released in the United States grouped by their release year
MATCH (m:Movie)-[:WHERE]->(c:Country {{name: "United States"}})
RETURN m.release_year AS ReleaseYear, collect(m.title) AS Movies
ORDER BY m.release_year

# Which actors have worked with Keanu Reeves in more than 2 movies and list the movies
MATCH (p1:Person)-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(p2:Person)
WHERE p1.name = "Keanu Reeves" AND p1.name <> p2.name
WITH p1, p2, collect(m.title) AS Movies, COUNT(m) AS sharedMovies
WHERE sharedMovies > 2
RETURN p2.name AS Actor, Movies
ORDER BY sharedMovies DESC

The question is:
{question}"""

In [60]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], 
    template=CYPHER_GENERATION_TEMPLATE
)

In [67]:
# Uncomment to use the AI gateway instead of OpenAI(temperature=0.9), change the name of the endpoint as required
# chat_llm = ChatMlflow(
#     target_uri=os.environ["DOMINO_MLFLOW_DEPLOYMENTS"],
#     endpoint="chat-gpt35turbo-sm",
# )

cypherChain = GraphCypherQAChain.from_llm(
    OpenAI(temperature=0.9),
    graph=kg,
    verbose=True,
    validate_cypher= True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
    
)

In [79]:
cypherChain.invoke("Who has acted in the most number of movies, give me three listed in alphabetic order?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (p:Person)-[:ACTED_IN]->(m:Movie)
RETURN p.name AS Actor, count(*) AS Movies
ORDER BY Movies DESC, Actor
LIMIT 3[0m
Full Context:
[32;1m[1;3m[{'Actor': '', 'Movies': 570}, {'Actor': 'Anupam Kher', 'Movies': 33}, {'Actor': 'Shah Rukh Khan', 'Movies': 30}][0m

[1m> Finished chain.[0m


{'query': 'Who has acted in the most number of movies, give me three listed in alphabetic order?',
 'result': ' Anupam Kher, Shah Rukh Khan'}

In [81]:
cypherChain.invoke("Which movies have Shah Rukh Khan and Anupam Kher worked in together and which year were they released?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (s:Person {name: "Shah Rukh Khan"})-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(a:Person {name: "Anupam Kher"})
RETURN m.title AS Movie, m.release_year AS ReleaseYear
ORDER BY m.release_year ASC[0m
Full Context:
[32;1m[1;3m[{'Movie': 'Oh Darling Yeh Hai India', 'ReleaseYear': '1995'}, {'Movie': 'Chaahat', 'ReleaseYear': '1996'}, {'Movie': 'Paheli', 'ReleaseYear': '2005'}][0m

[1m> Finished chain.[0m


{'query': 'Which movies have Shah Rukh Khan and Anupam Kher worked in together and which year were they released?',
 'result': ' Shah Rukh Khan and Anupam Kher worked in Oh Darling Yeh Hai India, Chaahat and Paheli, which were released in 1995, 1996, and 2005, respectively.'}

In [90]:
cypherChain.invoke("What genre is Stranger Things?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (m:Movie {title: "Stranger Things"})-[:IN_CATEGORY]->(c:Category)
RETURN c.name AS Genre[0m
Full Context:
[32;1m[1;3m[{'Genre': 'TV Mysteries'}, {'Genre': 'TV Horror'}, {'Genre': 'TV Sci-Fi & Fantasy'}][0m

[1m> Finished chain.[0m


{'query': 'What genre is Stranger Things?',
 'result': ' Stranger Things is a combination of TV Mysteries, TV Horror, and TV Sci-Fi & Fantasy.\n'}

In [96]:
cypherChain.invoke("Find movies that share at least two genres with Stranger Things")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m

MATCH (s:Movie {title: "Stranger Things"})-[:IN_CATEGORY]->(c1:Category),
      (s)-[:IN_CATEGORY]->(c2:Category),
      (m:Movie)-[:IN_CATEGORY]->(c1),
      (m)-[:IN_CATEGORY]->(c2)
WHERE m.title <> s.title
RETURN m.title AS Movie, collect(c1.name) AS SharedGenres
ORDER BY Movie[0m
Full Context:
[32;1m[1;3m[{'Movie': 'American Horror Story', 'SharedGenres': ['TV Horror', 'TV Mysteries']}, {'Movie': 'Anjaan: Special Crimes Unit', 'SharedGenres': ['TV Horror', 'TV Mysteries']}, {'Movie': 'Beyond Stranger Things', 'SharedGenres': ['TV Sci-Fi & Fantasy', 'TV Mysteries']}, {'Movie': 'Chambers', 'SharedGenres': ['TV Horror', 'TV Mysteries']}, {'Movie': 'Chilling Adventures of Sabrina', 'SharedGenres': ['TV Horror', 'TV Sci-Fi & Fantasy', 'TV Mysteries', 'TV Sci-Fi & Fantasy', 'TV Mysteries', 'TV Horror']}, {'Movie': 'Haven', 'SharedGenres': ['TV Horror', 'TV Mysteries']}, {'Movie': 'Helix', 'SharedGenr

{'query': 'Find movies that share at least two genres with Stranger Things',
 'result': ' American Horror Story, Chilling Adventures of Sabrina, Haven, Hemlock Grove, Helix'}

In [99]:
cypherChain.invoke("Who acts in Stranger Things?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m

MATCH (m:Movie {title: "Stranger Things"})<-[:ACTED_IN]-(p:Person)
RETURN p.name AS Actor[0m
Full Context:
[32;1m[1;3m[{'Actor': 'Sean Astin'}, {'Actor': 'David Harbour'}, {'Actor': 'Finn Wolfhard'}, {'Actor': 'Millie Bobby Brown'}, {'Actor': 'Gaten Matarazzo'}, {'Actor': 'Caleb McLaughlin'}, {'Actor': 'Natalia Dyer'}, {'Actor': 'Charlie Heaton'}, {'Actor': 'Noah Schnapp'}, {'Actor': 'Joe Keery'}][0m

[1m> Finished chain.[0m


{'query': 'Who acts in Stranger Things?',
 'result': ' Sean Astin, David Harbour, Finn Wolfhard, Millie Bobby Brown, Gaten Matarazzo, Caleb McLaughlin, Natalia Dyer, Charlie Heaton, Noah Schnapp, Joe Keery'}