In [1]:
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from dotenv import load_dotenv

# Load the environment variables from the specified .env file
dotenv_path = '/home/zihan/Desktop/Manufacturing_QA/Experimental_and_Test/GraphRag/.env'
load_dotenv(dotenv_path)

# Load the OpenAI API key from the environment variable
api_key = os.getenv('GRAPHRAG_API_KEY')
assert api_key is not None, "OpenAI API key not found in environment variables."


documents = SimpleDirectoryReader("/home/zihan/Desktop/Manufacturing_QA/Experimental_and_Test/GraphRag/input").load_data()
print(documents)
print(documents[0].text)

[Document(id_='ab6850c6-e271-4a35-8e31-ce801e67df70', embedding=None, metadata={'file_path': '/home/zihan/Desktop/Manufacturing_QA/Experimental_and_Test/GraphRag/input/Sec124.txt', 'file_name': 'Sec124.txt', 'file_type': 'text/plain', 'file_size': 136812, 'creation_date': '2024-07-19', 'last_modified_date': '2024-07-19'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Introduction \nand Overview of \nManufacturing \n \n \n \n \nChapter Contents \n1.1 What Is Manufacturing? \n1.1.1 Manufacturing Defined \n1.1.2 Manufacturing Industries and \nProducts \n1.1.3 Manufacturing Capability \n1.2 Materials in Manufacturing \n1.2.1 Metals \n1.2.2 Ceramics \n1.2.3 Polymers \n1.2.4 Composites \n1.3 Manufacturing Processes \n1.3.1 Processing Operations \n1.3.2 Assem

In [2]:
os.environ["OPENAI_API_KEY"] = api_key
# index = VectorStoreIndex.from_documents(documents)
# index.storage_context.persist() # Save the index to disk
# index

In [3]:
# ## To reload you can run 
from llama_index.core import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# load index
index = load_index_from_storage(storage_context)

## This section below can virtualize the chunk size

In [2]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=300, chunk_overlap=100)
nodes = text_splitter.get_nodes_from_documents(documents=documents)


In [3]:
nodes

[TextNode(id_='b5429916-1466-47a7-af8f-af8217f05132', embedding=None, metadata={'file_path': '/home/zihan/Desktop/Manufacturing_QA/Experimental_and_Test/GraphRag/input/Sec124.txt', 'file_name': 'Sec124.txt', 'file_type': 'text/plain', 'file_size': 136812, 'creation_date': '2024-07-19', 'last_modified_date': '2024-07-19'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='cc53946f-a0a8-43dc-acee-53fc5c09129e', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/home/zihan/Desktop/Manufacturing_QA/Experimental_and_Test/GraphRag/input/Sec124.txt', 'file_name': 'Sec124.txt', 'file_type': 'text/plain', 'file_size': 136812, 'creation_date': '2024-07-19', 'last_modified_date': '2024-07-19'}, hash='e8096f0

In [4]:
print(len(documents))
print(len(nodes))

1
177


## Single result test

In [4]:
retriever = index.as_retriever()

In [5]:
retriever.retrieve("noncrystalline structure")

[NodeWithScore(node=TextNode(id_='b30af281-c625-4c31-a03b-872829a3ac79', embedding=None, metadata={'file_path': '/home/zihan/Desktop/Manufacturing_QA/Experimental_and_Test/GraphRag/input/Sec124.txt', 'file_name': 'Sec124.txt', 'file_type': 'text/plain', 'file_size': 136812, 'creation_date': '2024-07-19', 'last_modified_date': '2024-07-19'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0c20e6ff-0d16-4336-8785-d5fb64141c74', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/home/zihan/Desktop/Manufacturing_QA/Experimental_and_Test/GraphRag/input/Sec124.txt', 'file_name': 'Sec124.txt', 'file_type': 'text/plain', 'file_size': 136812, 'creation_date': '2024-07-19', 'last_modified_date': '2024-07-

In [10]:
llm = OpenAI(model="gpt-4o")
query_engine = index.as_query_engine(llm=llm)

In [None]:
query_engine.query("Define manufacturing.")

In [13]:
prompts_dict = query_engine.get_prompts()
print(prompts_dict)

{'response_synthesizer:text_qa_template': SelectorPromptTemplate(metadata={'prompt_type': <PromptType.QUESTION_ANSWER: 'text_qa'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings={}, function_mappings={}, default_template=PromptTemplate(metadata={'prompt_type': <PromptType.QUESTION_ANSWER: 'text_qa'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='Context information is below.\n---------------------\n{context_str}\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: {query_str}\nAnswer: '), conditionals=[(<function is_chat_model at 0x7f80eec532e0>, ChatPromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, message_templates=[ChatMessage(role=<MessageRole.

## Continue

In [6]:
from llama_index.core import PromptTemplate


new_summary_tmpl_str = (
    "Answer the question based only on the following context:\n"
    "{context_str}\n"
    "Answer the following question:"
    "Query: {query_str}\n"
    "Answer: "
)
new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)

In [11]:
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": new_summary_tmpl}
)

In [12]:
prompts_dict = query_engine.get_prompts()
print(prompts_dict)

{'response_synthesizer:text_qa_template': PromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='Answer the question based only on the following context:\n{context_str}\nAnswer the following question:Query: {query_str}\nAnswer: '), 'response_synthesizer:refine_template': SelectorPromptTemplate(metadata={'prompt_type': <PromptType.REFINE: 'refine'>}, template_vars=['query_str', 'existing_answer', 'context_msg'], kwargs={}, output_parser=None, template_var_mappings={}, function_mappings={}, default_template=PromptTemplate(metadata={'prompt_type': <PromptType.REFINE: 'refine'>}, template_vars=['query_str', 'existing_answer', 'context_msg'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template="The original query is as follows: {query_str}\nWe have provided an existing answer: {existing_answer}\nWe h

In [None]:
## Single Test
result = query_engine.query("Define manufacturing.")

In [13]:
import pandas as pd

# Define the file path
csv_file_path = '/home/zihan/Desktop/Manufacturing_QA/Experimental_and_Test/GraphRag/TestResult.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)
questions = df['Question'].tolist()
results = []
for question in questions:
    result = query_engine.query(question)
    print(result)
    results.append(result.response)

Outputs_df = pd.DataFrame(results, columns=['LangChain'])
Outputs_df

The elements listed in the Periodic Table can be divided into three categories: metals, nonmetals, and metalloids (or semimetals). Examples of each category are:

- Metals: Iron (Fe)
- Nonmetals: Nitrogen (N)
- Metalloids (or Semimetals): Silicon (Si)
Some materials that possess a noncrystalline structure include glass (e.g., window glass) and fused silica.
The noble metals are copper, silver, and gold.
Primary bonding involves strong atom-to-atom attractions that include the exchange or sharing of valence electrons, forming bonds such as ionic, covalent, and metallic. These bonds are generally associated with the formation of molecules and are much stronger than secondary bonds. In contrast, secondary bonding involves attraction forces between molecules (intermolecular forces) without the transfer or sharing of electrons, making these bonds weaker. Examples of secondary bonding include dipole forces, London forces, and hydrogen bonding.
Ionic bonding works by the transfer of electrons

Unnamed: 0,LangChain
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,


In [34]:
response = results[-45:]


response

['The elements listed in the Periodic Table can be divided into three categories: metals, nonmetals, and metalloids (or semimetals). Examples of each category are:\n\n- Metals: Iron (Fe)\n- Nonmetals: Nitrogen (N)\n- Metalloids (or Semimetals): Silicon (Si)',
 'Some materials that possess a noncrystalline structure include glass (e.g., window glass) and fused silica.',
 'The noble metals are copper, silver, and gold.',
 'Primary bonding involves strong atom-to-atom attractions that include the exchange or sharing of valence electrons, forming bonds such as ionic, covalent, and metallic. These bonds are generally associated with the formation of molecules and are much stronger than secondary bonds. In contrast, secondary bonding involves attraction forces between molecules (intermolecular forces) without the transfer or sharing of electrons, making these bonds weaker. Examples of secondary bonding include dipole forces, London forces, and hydrogen bonding.',
 'Ionic bonding works by the

In [35]:
Outputs_df = pd.DataFrame(response, columns=['LlamaIndex'])
Outputs_df

Unnamed: 0,LlamaIndex
0,The elements listed in the Periodic Table can ...
1,Some materials that possess a noncrystalline s...
2,"The noble metals are copper, silver, and gold."
3,Primary bonding involves strong atom-to-atom a...
4,Ionic bonding works by the transfer of electro...
5,Crystalline structures in materials are charac...
6,Some common point defects in a crystal lattice...
7,Elastic deformation in a crystal lattice struc...
8,Grain boundaries contribute to the strain hard...
9,Some materials that have a crystalline structu...


In [36]:
df = pd.concat([df, Outputs_df], axis=1)
df

Unnamed: 0,Question,Answer,GraphRag,LangChain,LlamaIndex
0,The elements listed in the Periodic Table can ...,Answer. The three categrories of elements are ...,The elements in the Periodic Table can be divi...,The elements listed in the Periodic Table can ...,The elements listed in the Periodic Table can ...
1,Identify some materials that possess a noncrys...,Answer. Materials typically having a noncrysta...,### Noncrystalline Materials\n\nNoncrystalline...,Some materials that possess a noncrystalline s...,Some materials that possess a noncrystalline s...
2,Which elements are the noble metals?,"Answer. The noble metals are copper, silver, a...",### Noble Metals\n\nNoble metals are a group o...,"The noble metals are copper, silver, and gold.","The noble metals are copper, silver, and gold."
3,What is the difference between primary and sec...,Answer. Primary bonding is strong bonding betw...,### Differences Between Primary and Secondary ...,Primary bonding involves strong atom-to-atom a...,Primary bonding involves strong atom-to-atom a...
4,Describe how ionic bonding works?,"Answer. In ionic bonding, atoms of one element...",### Understanding Ionic Bonding\n\nIonic bondi...,Ionic bonding involves the transfer of outer e...,Ionic bonding works by the transfer of electro...
5,What is the difference between crystalline and...,Answer. The atoms in a crystalline structure a...,### Differences Between Crystalline and Noncry...,The difference between crystalline and noncrys...,Crystalline structures in materials are charac...
6,What are some common point defects in a crysta...,Answer. The common point defects are (1) vacan...,### Common Point Defects in a Crystal Lattice ...,Some common point defects in a crystal lattice...,Some common point defects in a crystal lattice...
7,Define the difference between elastic and plas...,Answer. Elastic deformation involves a tempora...,### Elastic Deformation\n\nElastic deformation...,Elastic deformation involves a temporary tilti...,Elastic deformation in a crystal lattice struc...
8,How do grain boundaries contribute to the stra...,Answer. Grain boundaries block the continued m...,### Introduction\n\nGrain boundaries play a cr...,Grain boundaries contribute to the strain hard...,Grain boundaries contribute to the strain hard...
9,Identify some materials that have a crystallin...,Answer. Materials typically possessing a cryst...,### Crystalline Structures in Engineering Mate...,Some materials that have a crystalline structu...,Some materials that have a crystalline structu...


In [37]:
df.to_csv(csv_file_path, index=False)