In [2]:
!pip install langchain google-cloud-aiplatform google-auth neo4j > /dev/null

In [3]:
from neo4j import GraphDatabase
host = 'bolt://34.16.150.220:7687'
user = 'neo4j'
password = 'password'
driver = GraphDatabase.driver(host,auth=(user, password))

In [4]:
def run_query(query, params={}):
    with driver.session() as session:
        result = session.run(query, params)
        return result.to_df()

In [5]:
context_01 = run_query("""
MATCH (m:Movie)
MATCH (m)-[r:ACTED_IN|DIRECTED]-(t)
WITH m, type(r) as type, collect(t.name) as names
WITH m, type+": "+reduce(s="", n IN names | s + n + ", ") as types
WITH m, collect(types) as contexts
WITH m, "Movie title: "+ m.title + " year: "+coalesce(m.released,"") +" plot: "+ coalesce(m.tagline,"")+"\n" +
       reduce(s="", c in contexts | s + substring(c, 0, size(c)-2) +"\n") as context
RETURN context LIMIT 1
""")

In [6]:
context_01['context'][0]

'Movie title: The Matrix year: 1999 plot: Welcome to the Real World\nACTED_IN: Emil Eifrem, Hugo Weaving, Laurence Fishburne, Carrie-Anne Moss, Keanu Reeves\nDIRECTED: Lana Wachowski, Lilly Wachowski\n'

In [14]:
run_query("""
CALL apoc.periodic.iterate(
  'MATCH (m:Movie) RETURN id(m) AS id',
  'MATCH (m:Movie)
   WHERE id(m) = id
   MATCH (m)-[r:ACTED_IN|DIRECTED]-(t)
   WITH m, type(r) as type, collect(t.name) as names
   WITH m, type+": "+reduce(s="", n IN names | s + n + ", ") as types
   WITH m, collect(types) as contexts
   WITH m, "Movie title: "+ m.title + " year: "+coalesce(m.released,"") +" plot: "+ coalesce(m.tagline,"")+"\n" +
        reduce(s="", c in contexts | s + substring(c, 0, size(c)-2) +"\n") as context
   CALL apoc.ml.vertexai.embedding([context], $access_token, $project) YIELD embedding
   SET m.embedding = embedding',
  {batchSize:1, retries:3, params: {access_token: $access_token,
  project: $project}})
""", {'access_token': "",
      'project':''})

Unnamed: 0,batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics
0,38,38,4,38,0,0,0,{},"{'total': 38, 'committed': 38, 'failed': 0, 'e...","{'total': 38, 'committed': 38, 'failed': 0, 'e...",False,{},"{'nodesDeleted': 0, 'labelsAdded': 0, 'relatio..."


In [8]:
system_prompt = """
You are an assistant that helps to generate text to form nice and human understandable answers based.
The latest prompt contains the information, and you need to generate a human readable response based on the given information.
Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
Do not add any additional information that is not explicitly provided in the latest prompt.
I repeat, do not add any information that is not explicitly given.
"""

In [9]:
def generate_user_prompt(question, context):
    return f"""
   The question is {question}
   Answer the question by using the provided information:
   {context}
   """

In [16]:
def retrieve_context(question, k=3):
    data = run_query(f"""
CALL apoc.ml.vertexai.embedding(['{question}'],
                                '',
                                '')
YIELD embedding
MATCH (m:Movie)
WITH m, gds.similarity.cosine(embedding, m.embedding) AS score
ORDER BY score DESC
LIMIT 3
MATCH (m)--()--(m1:Movie)
WITH m,m1, count(*) AS count
ORDER BY count DESC
WITH m, apoc.text.join(collect(m1.title)[..3], ", ") AS similarMovies
MATCH (m)-[r:ACTED_IN|DIRECTED]-(t)
WITH m, similarMovies, type(r) as type, collect(t.name) as names
WITH m, similarMovies, type+": "+reduce(s="", n IN names | s + n + ", ") as types
WITH m, similarMovies, collect(types) as contexts
WITH m, "Movie title: "+ m.title + " year: "+coalesce(m.released,"") +" plot: "+ coalesce(m.tagline,"")+"\n" +
reduce(s="", c in contexts | s + substring(c, 0, size(c)-2) +"\n") + "similar movies:" + similarMovies + "\n" as context
RETURN context
  """)
    return data["context"].to_list()

In [12]:
def generate_answer(question):
    # Retrieve context
    context = retrieve_context(question)
    # Print context
    for c in context:
        print(c)
    # Generate answer
    response = run_query("""
CALL apoc.ml.vertexai.completion($user,$apiKey,$project)
YIELD value
RETURN value.choices[0].message.content AS answer
  """,
        {
            "user": generate_user_prompt(question, context),
            "apiKey": access_token,
            "project":project
        },
    )
    return response["answer"][0]

In [18]:
run_query("""
CALL apoc.ml.vertexai.embedding(['Who played matrix'],
                                '',
                                '')
YIELD embedding""")

Unnamed: 0,embedding
0,"[0.015150133520364761, -0.026424551382660866, ..."


In [19]:
retrieve_context('Who played in the Matrix?')

['Movie title: The Matrix year: 1999 plot: Welcome to the Real World\nACTED_IN: Emil Eifrem, Hugo Weaving, Laurence Fishburne, Carrie-Anne Moss, Keanu Reeves\nDIRECTED: Lana Wachowski, Lilly Wachowski\nsimilar movies:The Matrix Revolutions, The Matrix Reloaded, V for Vendetta\n',
 'Movie title: The Matrix Reloaded year: 2003 plot: Free your mind\nDIRECTED: Lana Wachowski, Lilly Wachowski\nACTED_IN: Hugo Weaving, Laurence Fishburne, Carrie-Anne Moss, Keanu Reeves\nsimilar movies:The Matrix Revolutions, The Matrix, V for Vendetta\n',
 'Movie title: The Matrix Revolutions year: 2003 plot: Everything that has a beginning has an end\nDIRECTED: Lana Wachowski, Lilly Wachowski\nACTED_IN: Hugo Weaving, Laurence Fishburne, Carrie-Anne Moss, Keanu Reeves\nsimilar movies:The Matrix Reloaded, The Matrix, V for Vendetta\n']

In [None]:
output = generate_answer("Who played in the Matrix?")