# Grounding LLMs with a Knowledge Graph




In [None]:
# install the necessary libraries
!pip install langchain
!pip install openai
!pip install neo4j

In [None]:
# using 3.5 model

OPENAI_API_KEY = "sk-"
OPENAI_ENDPOINT = 'https://api.openai.com/v1/embeddings'

# neo4j connection
url = ""
username="neo4j"
password=""


In [None]:
! pip install langchain-community langchain-core

In [4]:
from langchain.chat_models import ChatOpenAI
from langchain.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain
from langchain.prompts import PromptTemplate

In [None]:
# Connect to GPT. Temperature is set to 0 as we want to have consinstency in the GPT responses
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY, temperature = 0
)

In [None]:
# Connect to the movie graph by specifying the Bolt URL, the username and the password. These are available under the "Connection Details" tab in the instance we created.

movie_graph = Neo4jGraph(
    url=url,
    username=username,
    password=password,
)

## Technique 1: LLM accesses the knowledge graph by transforming text into a Cypher query

In [None]:
# We define a prompt template to generate a cypher query from a question. An important parameter of this prompt is the graph's schema.
CYPHER_GENERATION_TEMPLATE = """
You are an expert Neo4j Developer translating user questions into Cypher to answer questions about movies.
Convert the user's question based on the schema.

Schema: {schema}
Question: {question}
"""

cypher_generation_prompt = PromptTemplate(
    template=CYPHER_GENERATION_TEMPLATE,
    input_variables=["schema", "question"],
)

In [None]:
# We create a cypher QA chain and pass as parameters the graph and the prompt template
cypher_chain = GraphCypherQAChain.from_llm(
    llm,
    graph=movie_graph,
    cypher_prompt=cypher_generation_prompt,
    verbose=True
)

In [None]:
cypher_chain.run("Who directed Top Gun?")

  warn_deprecated(




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Movie {title: "Top Gun"})<-[:DIRECTED]-(p:Person)
RETURN p.name[0m
Full Context:
[32;1m[1;3m[{'p.name': 'Tony Scott'}][0m

[1m> Finished chain.[0m


'Tony Scott directed Top Gun.'

In [None]:
# We can run the chain to provide context in a prompt
template = """Answer the following question based only on the provided context.

{question}

{context}
"""

question = "Who directed Top Gun?"

prompt = PromptTemplate.from_template(template).format_prompt(question = question,context=cypher_chain.run(question))
llm(prompt.to_messages())



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:DIRECTED]->(m:Movie)
WHERE m.title = "Top Gun"
RETURN p.name as Director[0m
Full Context:
[32;1m[1;3m[{'Director': 'Tony Scott'}][0m

[1m> Finished chain.[0m


AIMessage(content='Tony Scott', response_metadata={'finish_reason': 'stop', 'logprobs': None})

## Technique 2: LLM accesses the KG by creating and querying an entity vector index

We create an empty vector index that will hold llm-based embeddings for each movie entity in the graph.

The embeddings will be based on the 'tagline' attribute of each movie

In [None]:
# We create an empty vector index that will hold llm-based embeddings for each movie entity in the graph.
# The embeddings will be based on the 'tagline' attribute of each movie

movie_graph.query("""
  CREATE VECTOR INDEX movie_tagline_embeddings IF NOT EXISTS
  FOR (m:Movie) ON (m.taglineEmbedding)
  OPTIONS { indexConfig: {
    `vector.dimensions`: 1536,
    `vector.similarity_function`: 'cosine'
  }}"""
)

movie_graph.query("""
  SHOW VECTOR INDEXES
  """
)

In [None]:
# populate the vector index by calculating a vector representation for each movie tagline using OpenAI
# We add the vector to each `Movie` node as `taglineEmbedding` property

movie_graph.query("""
    MATCH (movie:Movie) WHERE movie.tagline IS NOT NULL
    WITH movie, genai.vector.encode(
        movie.tagline,
        "OpenAI",
        {
          token: $openAiApiKey,
          endpoint: $openAiEndpoint
        }) AS vector
    CALL db.create.setNodeVectorProperty(movie, "taglineEmbedding", vector)
    """,
    params={"openAiApiKey":OPENAI_API_KEY, "openAiEndpoint": OPENAI_ENDPOINT} )

In [None]:
# We can see the vector of each movie node, mostly just to doublecheck
result = movie_graph.query("""
    MATCH (m:Movie)
    WHERE m.tagline IS NOT NULL
    RETURN m.tagline, m.taglineEmbedding
    LIMIT 1
    """
)

first_tagline = result[0]['m.tagline']
first_vector = result[0]['m.taglineEmbedding']

print(first_tagline, first_vector)

Welcome to the Real World [0.01738535612821579, -0.005492697935551405, -0.002040519379079342, -0.02559983730316162, -0.01443757489323616, 0.01673029363155365, -0.017123330384492874, 0.0005064451252110302, -0.02524610422551632, -0.02953021228313446, 0.0005342852673493326, 0.020044907927513123, -0.006026573944836855, -0.004634566139429808, 0.008044165559113026, -0.002965794876217842, 0.0269099622964859, -0.0306831207126379, 0.005751447752118111, -0.007814894430339336, -0.01752946898341179, 0.01686130464076996, -0.006291874218732119, -0.03610703721642494, -0.011922135017812252, -0.010356536135077477, 0.024997180327773094, -0.023202309384942055, 0.014214853756129742, -0.022560348734259605, -0.003376846434548497, -0.008640272542834282, 0.010199321433901787, -0.024237308651208878, -0.0049195182509720325, -0.010349985212087631, -0.01673029363155365, -0.018315544351935387, 0.011430838145315647, 0.007094325497746468, 0.028455909341573715, -0.004254630301147699, 0.006884705740958452, 0.003448903

In [None]:
# We define a method to retrieve movies based on a keyword, by querying the vector index
def retrieve_movies_from_kg(keyword):

  question = f"What movies are about {keyword}"

  answer = movie_graph.query("""
      WITH genai.vector.encode(
          $question,
          "OpenAI",
          {
            token: $openAiApiKey,
            endpoint: $openAiEndpoint
          }) AS question_embedding
      CALL db.index.vector.queryNodes(
          'movie_tagline_embeddings',
          $top_k,
          question_embedding
          ) YIELD node AS movie, score
      RETURN movie.title, movie.tagline, score
      """,
      params={"openAiApiKey":OPENAI_API_KEY,
              "openAiEndpoint": OPENAI_ENDPOINT,
              "question": question,
              "top_k": 5
              })

  return answer

In [None]:
# We can run the method directly
retrieve_movies_from_kg("love")

[{'movie.title': 'Joe Versus the Volcano',
  'movie.tagline': 'A story of love, lava and burning desire.',
  'score': 0.9090045690536499},
 {'movie.title': 'As Good as It Gets',
  'movie.tagline': 'A comedy from the heart that goes for the throat.',
  'score': 0.90399169921875},
 {'movie.title': 'Snow Falling on Cedars',
  'movie.tagline': 'First loves last. Forever.',
  'score': 0.9011707305908203},
 {'movie.title': "You've Got Mail",
  'movie.tagline': 'At odds in life... in love on-line.',
  'score': 0.8928787112236023},
 {'movie.title': 'When Harry Met Sally',
  'movie.tagline': 'Can two friends sleep together and still love each other in the morning?',
  'score': 0.8893798589706421}]

In [None]:
# We can run the method to provide context in a prompt
template = """Lately I am interested in movies about {movie_topic}.
Tell me what movies should I watch next and a brief description of why, based only on the provided context.

{context}
"""

prompt = PromptTemplate.from_template(template).format_prompt(movie_topic = "love",context=retrieve_movies_from_kg("love"))

llm(prompt.to_messages())

AIMessage(content="1. Joe Versus the Volcano - A story of love, lava and burning desire. This movie explores the theme of love in a unique and adventurous way, with a mix of humor and romance.\n\n2. As Good as It Gets - A comedy from the heart that goes for the throat. This film delves into the complexities of love and relationships, showcasing the ups and downs of finding love in unexpected places.\n\n3. Snow Falling on Cedars - First loves last. Forever. This movie captures the enduring power of first love and how it can shape our lives and relationships in profound ways.\n\n4. You've Got Mail - At odds in life... in love on-line. This film explores the dynamics of modern love and relationships in the digital age, showing how love can blossom even in unexpected circumstances.\n\n5. When Harry Met Sally - Can two friends sleep together and still love each other in the morning? This classic romantic comedy delves into the age-old question of whether friendship can evolve into something