In [1]:
# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI

import textwrap

In [2]:
# Load from environment
from dotenv import load_dotenv
import os

load_dotenv('.env', override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

OPENAI_ENDPOINT = os.getenv('OPENAI_BASE_URL') + '/embeddings'

In [3]:

# connect to graph using langchain
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD
)


## Create question chain using Neo4j vectorstore

In [31]:
kg.schema

'Node properties are the following:\nItem {id: STRING, type: STRING, item_refs: LIST, chapter_refs: LIST, textEmbedding: LIST, text: STRING},Section {id: STRING, type: STRING, text: STRING},Chapter {id: STRING, type: STRING, text: STRING}\nRelationship properties are the following:\n\nThe relationships are the following:\n(:Item)-[:PART_OF]->(:Section),(:Item)-[:PART_OF]->(:Chapter),(:Item)-[:PART_OF]->(:Item),(:Item)-[:REFERENCES]->(:Section),(:Item)-[:REFERENCES]->(:Item),(:Section)-[:PART_OF]->(:Chapter),(:Section)-[:PART_OF]->(:Section)'

In [152]:
retrieval_query_window = """
MATCH (queried_item:Item)
WITH queried_item, score
LIMIT 3 // 
MATCH (queried_item)-[:REFERENCES]->(reference)
WITH queried_item, reference
ORDER BY reference.id 
OPTIONAL MATCH path=(reference)<-[:PART_OF*0..]-(lowestLevel:Item)
WHERE NOT EXISTS((lowestLevel)-[:PART_OF]->()) OR reference:Item
WITH queried_item, COLLECT(queried_item) + COLLECT(reference) + COLLECT(DISTINCT lowestLevel) AS allItems
UNWIND allItems AS result
RETURN apoc.text.join(COLLECT(DISTINCT result.text), "\n") as text,
{ 
      source: queried_item.id
    } as metadata,
    score


"""

VECTOR_INDEX_NAME = "ACI318-19_items"
VECTOR_SOURCE_PROPERTY = 'text'
# VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'

vector_store_window = Neo4jVector.from_existing_index(
    embedding=OpenAIEmbeddings(),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database="neo4j",
    index_name=VECTOR_INDEX_NAME,
    text_node_property=[VECTOR_SOURCE_PROPERTY],
    retrieval_query=retrieval_query_window, # NEW!!!
)

# Create a retriever from the vector store
retriever = vector_store_window.as_retriever()

# Create a chatbot Question & Answer chain from the retriever
struct_info_chain = RetrievalQAWithSourcesChain.from_chain_type(
    ChatOpenAI(temperature=0), 
    chain_type="stuff", 
    retriever=retriever
)

Failed to write data to connection ResolvedIPv4Address(('34.121.155.65', 7687)) (ResolvedIPv4Address(('34.121.155.65', 7687)))
Failed to write data to connection IPv4Address(('3202934d.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.121.155.65', 7687)))


In [96]:
kg.query("SHOW INDEXES")

[{'id': 4,
  'name': 'ACI318-19_items',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Item'],
  'properties': ['textEmbedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 3, 27, 19, 55, 30, 167000000, tzinfo=<UTC>),
  'readCount': 36},
 {'id': 0,
  'name': 'index_343aff4e',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'NODE',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 3, 27, 19, 49, 59, 957000000, tzinfo=<UTC>),
  'readCount': 165},
 {'id': 1,
  'name': 'index_f7700477',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'RELATIONSHIP',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.

In [112]:
def neo4j_vector_search(question):
  """Search for similar nodes using the Neo4j vector index"""
  vector_search_query = """
    WITH genai.vector.encode(
      $question, 
      "OpenAI", 
      {
        token: $openAiApiKey,
        endpoint: $openAiEndpoint
      }) AS question_embedding
    CALL db.index.vector.queryNodes($index_name, $top_k, question_embedding) yield node, score
    RETURN score, node.text AS text
  """
  
  similar = kg.query(vector_search_query, 
                     params={
                      'question': question, 
                      'openAiApiKey':OPENAI_API_KEY,
                      'openAiEndpoint': OPENAI_ENDPOINT,
                      'index_name':VECTOR_INDEX_NAME, 
                      'top_k': 10})
  return similar

In [155]:
print(neo4j_vector_search("how do i determine loads"))

[{'score': 0.9022544622421265, 'text': 'The total test load  T t , including dead load already in place, shall be at least the greatest of (a), (b), and (c):    (a) T t  = 1.0 D w  + 1.1 D s  + 1.6 L  + 0.5( L r or S or R )  (27.4.6.2a)        (b) T t  = 1.0 D w  + 1.1 D s  + 1.0 L  + 1.6( L r or S or R )  (27.4.6.2b)        (c) T t = 1.3( D w  + D s )  (27.4.6.2c)   '}, {'score': 0.8994762301445007, 'text': 'The effect of one or more loads not acting simultaneously shall be investigated.'}, {'score': 0.8988335132598877, 'text': 'Test load arrangements shall be selected to maximize the load effects in the critical regions of the members being evaluated.'}, {'score': 0.8966745734214783, 'text': 'If the effect of a strength deficiency is not well understood or it is not practical to measure the dimensions and determine the material properties of the members required for analysis, a load test is required in accordance with 27.4 .'}, {'score': 0.8963295221328735, 'text': 'Required strength

## Simple Query


In [5]:
retrieval_query_window = """
MATCH (queried_item:Item)
WITH queried_item, score
    MATCH (queried_item)-[REFERENCES]->(ref_item:Item)
RETURN queried_item.text + ref_item.text as text,
{ 
      source: queried_item.id
    } as metadata,
    score
LIMIT 5
"""

VECTOR_INDEX_NAME = "ACI318-19_items"
VECTOR_SOURCE_PROPERTY = 'text'
# VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'

vector_store_window = Neo4jVector.from_existing_index(
    embedding=OpenAIEmbeddings(),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database="neo4j",
    index_name=VECTOR_INDEX_NAME,
    text_node_property=[VECTOR_SOURCE_PROPERTY],
    retrieval_query=retrieval_query_window, # NEW!!!
)

# Create a retriever from the vector store
retriever = vector_store_window.as_retriever()

# Create a chatbot Question & Answer chain from the retriever
struct_info_chain = RetrievalQAWithSourcesChain.from_chain_type(
    ChatOpenAI(temperature=0), 
    chain_type="stuff", 
    retriever=retriever
)

In [None]:
def add_to_unique_ref_list(ref_list: list):

In [12]:
question = "what requirements should I look at when designing a concrete seismic-froce resisting system?"

struct_info_chain(
    {"question": question},
    return_only_outputs=True,
)

{'answer': 'When designing a concrete seismic-force resisting system, you should look at the requirements for reinforcement in uncased cast-in-place drilled or augered concrete piles, as well as the design of anchors and shear lugs. Additionally, consider the spacing and amount of transverse reinforcement, as well as the maximum spacing of shear reinforcement. \n',
 'sources': '10.3.1.5, 18.13.5.7.1, 26.7.1, 18.4.3.5'}

In [13]:
# compare simple chain with neo 4j vector search
question = "I want to design a concrete column. How do i figure out how much rebar I need to add and how I should space out the rebar?"
print("Struct info Chain:")
print(struct_info_chain(
    {"question": question},
    return_only_outputs=True,
))
print("\n Neo4J Vector Search:")
print(neo4j_vector_search(question))



Struct info Chain:
{'answer': "I don't know. \n", 'sources': ''}

 Neo4J Vector Search:


NameError: name 'neo4j_vector_search' is not defined