In [1]:
from langchain_community.graphs import Neo4jGraph
import os
from typing import List
from openai import OpenAI
import os

**Load the openAI instance**

In [2]:
client = OpenAI()
model_name = "gpt-3.5-turbo"

def embed_text(text:str)->List:
    """
    Embeds the given text using the specified model.

    Parameters:
        text (str): The text to be embedded.

    Returns:
        List: A list containing the embedding of the text.
    """
    response = client.embeddings.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response.data[0].embedding

**Add Neo4j credentials (These information need to be kept secret)**

In [3]:
NEO4J_URL = "neo4j://localhost:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "fireinthehole"
NEO4J_DATABASE = 'neo4j'

In [4]:
graph = Neo4jGraph(url=NEO4J_URL, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE)

**Sample question for RAG:**

In [5]:
question = "What movies are about love?"

**Get the questions embedding:**

In [6]:
question_embedding = embed_text(question)
question_embedding[:10]

[-0.004103008657693863,
 -0.030376773327589035,
 0.0007277242839336395,
 -0.027677176520228386,
 -0.00803571566939354,
 0.0019726611208170652,
 -0.006635457742959261,
 -0.02149585634469986,
 -0.0020720036700367928,
 -0.007903259247541428]

**Perform Similarity Search using the question's embedding on the vector index of the graph database and get the results**

In [10]:
result = graph.query("""
    with $question_embedding as question_embedding      // Use the provided question embedding as 'question_embedding'
    CALL db.index.vector.queryNodes(                    // Call the vector index query function
        'movie_tagline_embeddings',                     // Name of the vector index to query against
        $top_k,                                         // Number of top results to retrieve
        question_embedding                              // The question embedding to compare against
        ) YIELD node AS movie, score                    // Yield each matched node and its similarity score
    RETURN movie.title, movie.tagline, score            // Return the title, tagline, and similarity score of each movie
    """,
    params={
        "question_embedding": question_embedding,       # Pass the question embedding as a parameter
        "top_k": 3                                      # Specify the number of top results to retrieve
    })
result

[{'movie.title': 'Heat',
  'movie.tagline': 'A Los Angeles crime saga',
  'score': 0.8907528519630432},
 {'movie.title': 'Grumpier Old Men',
  'movie.tagline': 'Still Yelling. Still Fighting. Still Ready for Love.',
  'score': 0.8872848749160767},
 {'movie.title': 'Tom and Huck',
  'movie.tagline': 'The Original Bad Boys.',
  'score': 0.8832330107688904}]

**Pass the results to an LLM for the final answer**

In [11]:
prompt = f"# Question:\n{question}\n\n# Graph DB search results:\n{result}"
messages = [
    {"role": "system", "content": str(
        "You will be given the user question along with the search result of that question over a Neo4j graph database. Give the user the proper answer."
    )},
    {"role": "user", "content": prompt}
]

response = client.chat.completions.create(
    model=model_name,
    messages=messages
)

print(response.choices[0].message.content)

Some movies about love are:
1. **Heat**: A Los Angeles crime saga
2. **Grumpier Old Men**: Still Yelling. Still Fighting. Still Ready for Love.
3. **Tom and Huck**: The Original Bad Boys.


-----------------------

**Note: In this usecase, there is a higher chance of hallucination due to lack of enough evidence for the LLM to use its own judgment. The contents of the vector DB and the system role can address this issue to some extent.**

-----------------------

**Second example (in one go):**

In [13]:
question = "What movies are about adventure?"
question_embedding = embed_text(question)
result = graph.query("""
    with $question_embedding as question_embedding
    CALL db.index.vector.queryNodes(
        'movie_tagline_embeddings', 
        $top_k, 
        question_embedding
        ) YIELD node AS movie, score
    RETURN movie.title, movie.tagline, score
    """,
    params={
        "question_embedding": question_embedding,
        "top_k": 5
    })

prompt = f"# Question:\n{question}\n\n# Graph DB search results:\n{result}"
messages = [
    {"role": "system", "content": str(
        "You will be given the user question along with the search result of that question over a Neo4j graph database. Give the user the proper answer."
    )},
    {"role": "user", "content": prompt}
]

response = client.chat.completions.create(
    model=model_name,
    messages=messages
)

print(response.choices[0].message.content)

Here are some movies about adventure:
1. Toy Story - "The adventure takes off!"
2. Cutthroat Island - "The Course Has Been Set. There Is No Turning Back. Prepare Your Weapons. Summon Your Courage. Discover the Adventure of a Lifetime!"
3. Tom and Huck - "The Original Bad Boys."
4. Jumanji - "Roll the dice and unleash the excitement!"
