In [None]:
# See --> https://blog.stackademic.com/using-neo4j-and-langchain-for-knowledge-graph-creation-a-detailed-guide-84e7a74495eb

In [22]:
from dotenv import load_dotenv
import os

# Common data processing
import textwrap

# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import PyPDFLoader

In [23]:
# Load PDF file
loader = PyPDFLoader("data/MIL-STD-1289D.pdf")
pages = loader.load_and_split()

In [26]:
# Split pages into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
chunks = text_splitter.split_documents(pages)
# chunks

In [27]:
# Warning control
import warnings
warnings.filterwarnings("ignore")

# Set api key
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

# Load from environment from the credentials file
NEO4J_URI = "bolt://10.16.0.100:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "testpassword"
NEO4J_DATABASE = 'neo4j'

# Global constants
VECTOR_INDEX_NAME = 'pdf_chunks'
VECTOR_NODE_LABEL = 'Chunk'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'

kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

In [28]:
# Create Neo4j vector store
neo4j_vector_store = Neo4jVector.from_documents(
    embedding=OpenAIEmbeddings(),
    documents=chunks,
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name=VECTOR_INDEX_NAME,
    text_node_property=VECTOR_SOURCE_PROPERTY,
    embedding_node_property=VECTOR_EMBEDDING_PROPERTY,
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [29]:
# Construct Relationships: We establish relationships between the chunks within the graph, 
# indicating their sequential order and their association with the parent PDF document.

# Create a PDF node
cypher = """
MERGE (p:PDF {name: $pdfName})
RETURN p
"""
kg.query(cypher, params={'pdfName': "data/MIL-STD-1289D.pdf"})

[{'p': {'name': 'data/MIL-STD-1289D.pdf'}}]

In [30]:
# Connect chunks to their parent PDF with a PART_OF relationship
cypher = """
MATCH (c:Chunk), (p:PDF)
WHERE p.name = $pdfName
MERGE (c)-[newRelationship:PART_OF]->(p)
RETURN count(newRelationship)
"""
kg.query(cypher, params={'pdfName': "data/MIL-STD-1289D.pdf"})

[{'count(newRelationship)': 84}]

In [31]:
# Create a NEXT relationship between subsequent chunks
cypher = """
MATCH (c1:Chunk), (c2:Chunk)
WHERE c1.chunkSeqId = c2.chunkSeqId - 1
MERGE (c1)-[r:NEXT]->(c2)
RETURN count(r)
"""
kg.query(cypher)

[{'count(r)': 0}]

In [32]:
# Create a retriever from the vector store
retriever = neo4j_vector_store.as_retriever()

In [33]:
# Create a chatbot Question & Answer chain from the retriever
chain = RetrievalQAWithSourcesChain.from_chain_type(
    OpenAI(temperature=0), 
    chain_type="stuff",
    retriever=retriever
)

In [34]:
# Ask a question
question = "What is the main topic of this PDF document?"
answer = chain(
    {"question": question},
    return_only_outputs=True,
)
print(textwrap.fill(answer["answer"]))

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK"


 The main topic of this PDF document is MIL-STD-1289D, which is a
standard for support equipment for aeronautical and airborne stores.


In [35]:
# Returns the node count
kg.query("""
         MATCH (n)
         RETURN count(n) as nodeCount
         """)

[{'nodeCount': 85}]

In [36]:
kg.refresh_schema()
print(kg.schema)

Node properties:
Chunk {source: STRING, text: STRING, textEmbedding: LIST, page: INTEGER, id: STRING, embedding: LIST}
PDF {name: STRING}
Relationship properties:

The relationships:
(:Chunk)-[:PART_OF]->(:PDF)


In [37]:
# Show the indexes
kg.query("SHOW INDEXES")

[{'id': 4,
  'name': 'constraint_1dc138a',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'RANGE',
  'entityType': 'NODE',
  'labelsOrTypes': ['Chunk'],
  'properties': ['id'],
  'indexProvider': 'range-1.0',
  'owningConstraint': 'constraint_1dc138a',
  'lastRead': neo4j.time.DateTime(2024, 6, 12, 17, 32, 36, 226000000, tzinfo=<UTC>),
  'readCount': 287},
 {'id': 1,
  'name': 'index_343aff4e',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'NODE',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 6, 12, 17, 35, 49, 639000000, tzinfo=<UTC>),
  'readCount': 592},
 {'id': 2,
  'name': 'index_f7700477',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'RELATIONSHIP',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead': neo

In [38]:
# Ask a question
question = "What is mission engineering?"
answer = chain(
    {"question": question},
    return_only_outputs=True,
)
print(textwrap.fill(answer["answer"]))

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK"


 Mission engineering is the process of ensuring that a system will
perform according to the requirements of the entire test program. It
involves testing for factors such as accessibility, alignment,
armament compatibility, and electrical function. It is defined in MIL-
STD-1289D, which can be accessed online or through SAE Customer
Service.


In [41]:
# Ask a question
question = "List the steps do I need to perform when doing missing engineering and provide a refence page or table from the MIL-STD-1289D?"
answer = chain(
    {"question": question},
    return_only_outputs=True,
)
print(textwrap.fill(answer["answer"]))

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK"


 The steps to perform when doing missing engineering are: 1) Verify
the currency of the information using the ASSIST Online database, 2)
Ensure safety measures are in place to prevent inadvertent release or
function of store, 3) Safely any parts that may work loose and create
a hazard, 4) Contact reinforced area in accordance with MIL-A-8591, 5)
Test the release system electrical devices and wiring to meet SAE-
AS50881 and EMC/EMI criteria of MIL-STD-464, 6) Ensure no mechanical
strain due to airstream forces exists, 7) Conduct satisfactory
function tests of the complete system, 8) Conduct electrical function
tests, 9) Conduct store functional tests, 10) Conduct armament control
system tests, 11) Verify compatibility with armament weapons support
equipment, 12) Ensure all necessary tools are available, and 13)
Provide a common area for store cradling/handling.
