In [1]:
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from dotenv import load_dotenv

# Load the environment variables from the specified .env file
dotenv_path = '/home/zihan/Desktop/Manufacturing_QA/Experimental_and_Test/GraphRag/.env'
load_dotenv(dotenv_path)

# Load the OpenAI API key from the environment variable
api_key = os.getenv('GRAPHRAG_API_KEY')
assert api_key is not None, "OpenAI API key not found in environment variables."

In [2]:
required_exts = [".txt"]

documents = SimpleDirectoryReader("/home/zihan/Desktop/Manufacturing_QA/Experimental_and_Test/GraphRag/input",
                                    required_exts=required_exts,
                                    recursive=True).load_data()
print(f"Loaded {len(documents)} docs")

Loaded 1 docs


Since this is a single file uploaded, so we define the chunk size to make files smaller

In [3]:
os.environ["OPENAI_API_KEY"] = api_key

# # Build this from documents to create the index
# index = VectorStoreIndex.from_documents(documents, chunk_size=3000, chunk_overlap=300)
# index.storage_context.persist() # Save the index to disk
# index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x77dc22cdb450>

In [2]:
# ## To reload you can run 
from llama_index.core import StorageContext, load_index_from_storage

os.environ["OPENAI_API_KEY"] = api_key

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# load index
index = load_index_from_storage(storage_context)

## Single result test

In [10]:
retriever = index.as_retriever(similarity_top_k=5)

In [12]:
results = retriever.retrieve("noncrystalline structure")
for result in results:
    print("#" * 100)
    print(result.text)

####################################################################################################
The pattern may be replicated millions of times within 
2.3 
Section 2.3/Crystalline Structures 
42 
 
 
 
FIGURE 2.7 Body-centered cubic (BCC) crystal structure: (a) unit cell with atoms 
indicated as point locations in a three-dimensional axis system; (b) unit cell model 
showing closely packed atoms (sometimes called the hard-ball model); and 
(c) repeated pattern of the BCC structure. 
 
a given crystal. The structure can be viewed in the form of a unit cell, which is the 
basic geometric grouping of atoms that is repeated. To illustrate, consider the unit 
cell for the body-centered cubic (BCC) crystal structure shown in Figure 2.7, one of 
the common structures found in metals. The simplest model of the BCC unit cell is 
illustrated in Figure 2.7(a). Although this model clearly depicts the locations of the 
atoms within the cell, it does not indicate the close packing of the atoms

In [13]:
llm = OpenAI(model="gpt-4o")
query_engine = index.as_query_engine(llm=llm, similarity_top_k=3)

In [14]:
from llama_index.core import PromptTemplate


new_summary_tmpl_str = (
    "You are an expert in manufacturing domain. You need to answer the question regarding manufacturing process and materials correctly. Answer the question based only on the following context:\n"
    "{context_str}\n"
    "Answer the following question:"
    "Query: {query_str}\n"
    "Answer: "
)
new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": new_summary_tmpl}
)

prompts_dict = query_engine.get_prompts()
print(prompts_dict['response_synthesizer:text_qa_template'].template)

You are an expert in manufacturing domain. You need to answer the question regarding manufacturing process and materials correctly. Answer the question based only on the following context:
{context_str}
Answer the following question:Query: {query_str}
Answer: 


In [16]:
## Single Test
result = query_engine.query("Define manufacturing.")
print(result.response)

Manufacturing can be defined in two ways: technologically and economically.

**Technologically**, manufacturing is the application of physical and chemical processes to alter the geometry, properties, and/or appearance of a given starting material to make parts or products. It also includes the assembly of multiple parts to make products. The processes involved in manufacturing typically require a combination of machinery, tools, power, and labor, and are usually carried out as a sequence of operations, each bringing the material closer to the desired final state.

**Economically**, manufacturing is the transformation of materials into items of greater value by means of one or more processing and/or assembly operations. The key point is that manufacturing adds value to the material by changing its shape or properties, or by combining it with other materials that have been similarly altered. This added value is what makes the material more valuable through the manufacturing operations p

In [15]:
import pandas as pd

# Define the file path
csv_file_path = '/home/zihan/Desktop/Manufacturing_QA/Test/ch24MCQ.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)
df

Unnamed: 0,Question,Answer,LangChain
0,Approximately how many different elements have...,Answer. (c),The correct answer is (c) 100.
1,"In the Periodic Table, the elements can be div...","Answer. (d), (e), and (g).","In the Periodic Table, the elements can be div..."
2,The element with the lowest density and smalle...,Answer. (d),The element with the lowest density and smalle...
3,Which of the following bond types are classifi...,"Answer. (a), (c), and (d).",The bond types classified as primary bonds are...
4,How many atoms are there in the unit cell of t...,Answer. (e),The number of atoms in the unit cell of the fa...
5,Which of the following are not point defects i...,"Answer. (b), (c), (d)","Based on the provided context, the defects tha..."
6,Which one of the following crystal structures ...,Answer. (c),The crystal structure with the fewest slip dir...
7,Grain boundaries are an example of which one o...,Answer. (e),Grain boundaries are an example of surface def...
8,Twinning is which of the following (more than ...,"Answer. (b), (c), and (d).",Twinning is:\n(b) a mechanism of plastic defor...
9,Polymers are characterized by which of the fol...,Answer. (b) and (f).,"Based on the provided context, polymers are ch..."


In [16]:
questions = df['Question'].tolist()
results = []
for question in questions:
    result = query_engine.query(question)
    print(result)
    results.append(result.response)

Outputs_df = pd.DataFrame(results, columns=['llamaindex'])
Outputs_df

(c) 100
In the Periodic Table, the elements can be divided into the following categories:

(b) gases,
(c) liquids,
(d) metals,
(e) nonmetals,
(g) semi-metals.
Based on the provided context, the element with the lowest density and smallest atomic weight is (d) hydrogen.
Based on the provided context, the bond types classified as primary bonds are:

(a) covalent bonding,
(c) ionic bonding, and
(d) metallic bonding.
The context provided does not explicitly state the number of atoms in the unit cell of the face-centered cubic (FCC) structure. However, based on standard knowledge in materials science, the correct answer is:

(e) 14

This is because the FCC unit cell has atoms at each of the eight corners of the cube and one atom at the center of each of the six faces. Each corner atom is shared among eight adjacent unit cells, and each face-centered atom is shared between two adjacent unit cells, leading to a total of 4 atoms per unit cell.
Based on the provided context, the following are n

Unnamed: 0,LangChain
0,(c) 100
1,"In the Periodic Table, the elements can be div..."
2,"Based on the provided context, the element wit..."
3,"Based on the provided context, the bond types ..."
4,The context provided does not explicitly state...
5,"Based on the provided context, the following a..."
6,(c) HCP
7,Grain boundaries are an example of surface def...
8,"Based on the provided context, twinning is:\n\..."
9,Polymers are characterized by the following bo...


In [18]:
df = pd.concat([df, Outputs_df], axis=1)
df

Unnamed: 0,Question,Answer,LangChain,llamaindex
0,Approximately how many different elements have...,Answer. (c),The correct answer is (c) 100.,(c) 100
1,"In the Periodic Table, the elements can be div...","Answer. (d), (e), and (g).","In the Periodic Table, the elements can be div...","In the Periodic Table, the elements can be div..."
2,The element with the lowest density and smalle...,Answer. (d),The element with the lowest density and smalle...,"Based on the provided context, the element wit..."
3,Which of the following bond types are classifi...,"Answer. (a), (c), and (d).",The bond types classified as primary bonds are...,"Based on the provided context, the bond types ..."
4,How many atoms are there in the unit cell of t...,Answer. (e),The number of atoms in the unit cell of the fa...,The context provided does not explicitly state...
5,Which of the following are not point defects i...,"Answer. (b), (c), (d)","Based on the provided context, the defects tha...","Based on the provided context, the following a..."
6,Which one of the following crystal structures ...,Answer. (c),The crystal structure with the fewest slip dir...,(c) HCP
7,Grain boundaries are an example of which one o...,Answer. (e),Grain boundaries are an example of surface def...,Grain boundaries are an example of surface def...
8,Twinning is which of the following (more than ...,"Answer. (b), (c), and (d).",Twinning is:\n(b) a mechanism of plastic defor...,"Based on the provided context, twinning is:\n\..."
9,Polymers are characterized by which of the fol...,Answer. (b) and (f).,"Based on the provided context, polymers are ch...",Polymers are characterized by the following bo...


In [19]:
df.to_csv(csv_file_path, index=False)