In [2]:
# Setup and imports
from dotenv import load_dotenv
import os
from langchain_neo4j import Neo4jGraph
from langchain_openai import ChatOpenAI

load_dotenv()

# Expect the following env vars to be set: AURA_INSTANCENAME, NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD, OPENAI_API_KEY, OPENAI_ENDPOINT
AURA_INSTANCENAME = os.environ.get("AURA_INSTANCENAME")
NEO4J_URI = os.environ["NEO4J_URI"]
NEO4J_USERNAME = os.environ["NEO4J_USERNAME"]
NEO4J_PASSWORD = os.environ["NEO4J_PASSWORD"]
AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
OPENAI_ENDPOINT = os.environ.get("OPENAI_ENDPOINT")

chat = ChatOpenAI(api_key=OPENAI_API_KEY)

kg = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
) #database=NEO4J_DATABASE,


In [None]:
# Create vector index (run once)
kg.query(
    """
    CREATE VECTOR INDEX health_providers_embeddings IF NOT EXISTS
    FOR (hp:HealthcareProvider) ON (hp.comprehensiveEmbedding)
    OPTIONS {
      indexConfig: {
        `vector.dimensions`: 1536,
        `vector.similarity_function`: 'cosine'
      }
    }
    """
)

# Verify index
res = kg.query(
    """
  SHOW VECTOR INDEXES
  """
)
res


In [None]:
# Populate embeddings (run once)
kg.query(
    """
    MATCH (hp:HealthcareProvider)-[:TREATS]->(p:Patient)
    WHERE hp.bio IS NOT NULL
    WITH hp, genai.vector.encode(
        hp.bio,
        "OpenAI",
        {
          token: $openAiApiKey,
          endpoint: $openAiEndpoint
        }) AS vector
    WITH hp, vector
    WHERE vector IS NOT NULL
    CALL db.create.setNodeVectorProperty(hp, "comprehensiveEmbedding", vector)
    """,
    params={
        "openAiApiKey": OPENAI_API_KEY,
        "openAiEndpoint": OPENAI_ENDPOINT,
    },
)


In [None]:
# Inspect a few nodes to verify embedding property is present
result = kg.query(
    """
    MATCH (hp:HealthcareProvider)
    WHERE hp.bio IS NOT NULL
    RETURN hp.bio AS bio, hp.name AS name, hp.comprehensiveEmbedding AS embedding
    LIMIT 5
    """
)
result


In [None]:
# Vector query demo
question = "give me a list of healthcare providers in the area of cardiology"

result = kg.query(
    """
    WITH genai.vector.encode(
        $question,
        "OpenAI",
        {
          token: $openAiApiKey,
          endpoint: $openAiEndpoint
        }) AS question_embedding
    CALL db.index.vector.queryNodes(
        'health_providers_embeddings',
        $top_k,
        question_embedding
        ) YIELD node AS healthcare_provider, score
    RETURN healthcare_provider.name AS name, healthcare_provider.bio AS bio, score AS score
    """,
    params={
        "openAiApiKey": OPENAI_API_KEY,
        "openAiEndpoint": OPENAI_ENDPOINT,
        "question": question,
        "top_k": 3,
    },
)

# Pretty print
for record in result:
    print(f"Name: {record['name']}")
    print(f"Bio: {record['bio']}")
    print(f"Score: {record['score']}")
    print("---")


Name: Dr. Jessica Lee
Bio: Dr. Jessica Lee is a dermatologist focused on skin cancer treatment and prevention.
Score: 0.896636962890625
---
Name: Dr. Sarah Johnson
Bio: Dr. Sarah Johnson is a pediatrician known for her compassionate care.
Score: 0.8798370361328125
---
Name: Dr. Emily Davis
Bio: Dr. Emily Davis specializes in neurology and has published numerous research papers.
Score: 0.872283935546875
---
