In [1]:
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings


# Load the OpenAI API key from the environment variable
api_key=os.getenv("OPENAI_API_KEY")

In [3]:
required_exts = [".txt"]

documents = SimpleDirectoryReader("/home/zihan/Desktop/Manufacturing_QA/BookData/FullBook_HighQuality",
                                    required_exts=required_exts,
                                    recursive=True).load_data()
print(f"Loaded {len(documents)} docs")

Loaded 36 docs


Since this is a single file uploaded, so we define the chunk size to make files smaller

In [4]:
os.environ["OPENAI_API_KEY"] = api_key
persist_dir = "/home/zihan/Desktop/Manufacturing_QA/Embedd_Vector_storage/High_Quality_WholeBook_storage"
# # Build this from documents to create the index
index = VectorStoreIndex.from_documents(documents, chunk_size=600, chunk_overlap=100)
index.storage_context.persist(persist_dir=persist_dir) # Save the index to disk
index


<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x732eef275fd0>

In [56]:
# ## To reload you can run 
from llama_index.core import StorageContext, load_index_from_storage

os.environ["OPENAI_API_KEY"] = api_key

persist_dir = "/Users/zihan/Desktop/Manufacturing_QA/1Book/4.High_Quality_WholeBook_storage"
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
# load index
index = load_index_from_storage(storage_context)

## Single result test

In [3]:
retriever = index.as_retriever(similarity_top_k=10)

In [43]:
results = retriever.retrieve("noncrystalline structure")
for result in results:
    print("#" * 100)
    print(result.text)

####################################################################################################
An amorphous material exhibits quite different behavior than that of a pure  metal when it changes from solid to liquid, as shown in Figure 2.15. The process  is again reversible, but observe the behavior of the amorphous material during  

![](images/776cfa33091e51e101c8ebed9d862ed874c77977fdbda411f161f224a002419f.jpg)  
FIGURE 2.15  Characteristic change in  volume for a pure metal (a crystalline  structure), compared to the same  volumetric changes in glass   (a   noncrystalline   structure).  

cooling from the liquid state, rather than during melting from the solid, as before.  Glass (silica, $\mathrm{SiO}_{2}$ ) is used to illustrate. At high temperatures, glass is a true liquid,  and the molecules are free to move about as in the usual definition of a liquid. As  the glass cools, it gradually transforms into the solid state, going through a transi-  tion phase, called a  supercoo

In [44]:
results

[NodeWithScore(node=TextNode(id_='9c87e5be-2d5e-4faa-8a39-ee0f274534dd', embedding=None, metadata={'file_path': '/home/zihan/Desktop/Manufacturing_QA/BookData/FullBook_HighQuality/sec12456.txt', 'file_name': 'sec12456.txt', 'file_type': 'text/plain', 'file_size': 309052, 'creation_date': '2024-08-24', 'last_modified_date': '2024-08-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0988b1b5-6b63-485d-8796-2f574c1aabc3', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/home/zihan/Desktop/Manufacturing_QA/BookData/FullBook_HighQuality/sec12456.txt', 'file_name': 'sec12456.txt', 'file_type': 'text/plain', 'file_size': 309052, 'creation_date': '2024-08-24', 'last_modified_date': '2024-08-24'}, 

In [5]:
llm = OpenAI(model="gpt-4o")
query_engine = index.as_query_engine(llm=llm, similarity_top_k=2)

In [57]:
from llama_index.core import PromptTemplate


# new_summary_tmpl_str = (
#     """You are an top student in manufacturing major. You are in an exam and you need to answering the following MCQ based on the context provided and what you know regarding manufacturing process and materials correctly. 
# Please think step by step then provide the choose the correct answer(s) carefully as yout finnal answer, remember you need to provide all correct answer in the letter choice. for the context choice will result a zero even it is correct:\n


# You must provide answer in the following format:

# "Explanation": "The capital of France is Paris, which is a major European city and a global center for art, fashion, and culture.", "YourChoice": "A"

# {context_str}\n
# Answer the following question:
# Query: {query_str}\n
# Answer: """
# )


new_summary_tmpl_str = ("You are a top student in the manufacturing major, and you are taking an exam. You need to solve the following math question based on the context provided and your knowledge of manufacturing calculations. Make sure to clearly show all your steps and explain your reasoning for each step. Provide the final answer at the end.\n\n{context_str}\n\nSolve the following question:\n{query_str}\nAnswer:")

new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": new_summary_tmpl}
)

prompts_dict = query_engine.get_prompts()
print(prompts_dict['response_synthesizer:text_qa_template'].template)

You are a top student in the manufacturing major, and you are taking an exam. You need to solve the following math question based on the context provided and your knowledge of manufacturing calculations. Make sure to clearly show all your steps and explain your reasoning for each step. Provide the final answer at the end.

{context_str}

Solve the following question:
{query_str}
Answer:


In [58]:
## Single Test
result = query_engine.query("Approximately how many different elements have been identified.")
print(result.response)

Based on the information provided in the excerpt from the book, the periodic table of elements lists 103 elements. Therefore, approximately 103 different elements have been identified.


In [59]:
nodes = result.source_nodes
scores = [node.score for node in nodes]
scores

[0.8037183077062213, 0.7822655391937483]

In [60]:
file_paths = [node.metadata['file_path'] for node in nodes if 'file_path' in node.metadata]
file_paths

['/home/zihan/Desktop/Manufacturing_QA/BookData/FullBook_HighQuality/sec12456.txt',
 '/home/zihan/Desktop/Manufacturing_QA/BookData/FullBook_HighQuality/sec36.txt']

In [49]:
# result.get_formatted_sources
result.metadata
file_paths = [entry['file_path'] for entry in result.metadata.values()]
file_paths
# result.response
# result.source_nodes
# texts = [node.text for node in result.source_nodes]
# texts

['/home/zihan/Desktop/Manufacturing_QA/BookData/FullBook_HighQuality/sec12456.txt',
 '/home/zihan/Desktop/Manufacturing_QA/BookData/FullBook_HighQuality/sec12456.txt']

In [55]:
## Single Test
result = query_engine.query("23.03 A series of turning tests were conducted using a cemented carbide tool, and flank wear data were collected. The feed was 0.010 in/rev and the depth was 0.125 in. At a speed of 350 ft/min, flank wear = 0.005 in at 1 min, 0.008 in at 5 min, 0.012 in at 11 min, 0.0.015 in at 15 min, 0.021 in at 20 min, and 0.040 in at 25 min. At a speed of 450 ft/min, flank wear = 0.007 in at 1 min, 0.017 in at 5 min, 0.027 in at 9 min, 0.033 in at 11 min, and 0.040 in at 13 min. The last value in each case is when final tool failure occurred. (a) On a single piece of linear graph paper, plot flank wear as a function of time. Using 0.020 in of flank wear as the criterion of tool failure, determine the tool lives for the two cutting speeds. (b) On a piece of natural log-log paper, plot your results determined in the previous part. From the plot, determine the values of n and C in the Taylor Tool Life Equation. (c) As a comparison, calculate the values of n and C in the Taylor equation solving simultaneous equations. Are the resulting n and C values the same?")
print(result.response)


To solve this problem, we will follow the steps outlined in the question. We'll start by plotting the flank wear data, determine the tool lives, and then use the Taylor Tool Life Equation to find the parameters \( n \) and \( C \).

### (a) Plot Flank Wear as a Function of Time and Determine Tool Lives

1. **Plotting Flank Wear:**
   - For 350 ft/min:
     - At 1 min: 0.005 in
     - At 5 min: 0.008 in
     - At 11 min: 0.012 in
     - At 15 min: 0.015 in
     - At 20 min: 0.021 in
     - At 25 min: 0.040 in (failure)
   - For 450 ft/min:
     - At 1 min: 0.007 in
     - At 5 min: 0.017 in
     - At 9 min: 0.027 in
     - At 11 min: 0.033 in
     - At 13 min: 0.040 in (failure)

2. **Determine Tool Lives:**
   - Using 0.020 in as the criterion for tool failure:
     - For 350 ft/min, the tool life is approximately 19 minutes (interpolating between 15 min and 20 min).
     - For 450 ft/min, the tool life is approximately 10.5 minutes (interpolating between 9 min and 11 min).

### (b) Pl

In [14]:
import pandas as pd
import glob
import re

# Define the file path pattern to match all CSV files ending with -MCQ.csv
csv_file_pattern = '/home/zihan/Desktop/Manufacturing_QA/QuestionsData/QA_Pairs/*-MCQ.csv'

# Use glob to find all files matching the pattern
csv_files = glob.glob(csv_file_pattern)

# Read all matching CSV files into a list of DataFrames
dfs = [pd.read_csv(file) for file in csv_files]

# Concatenate all DataFrames into one
df = pd.concat(dfs, ignore_index=True)

# Display the combined DataFrame
df




Unnamed: 0,Question,Answer
0,Reasons why workparts must be cleaned include ...,"Answer. (a), (c), (d), and (e)."
1,Which of the following chemicals is associated...,"Answer. (a), (b)."
2,Shot peening is a mechanical cleaning method u...,Answer. (b) Principal function is to cold work...
3,"In sand blasting, which one of the following a...",Answer. (e)
4,"The abrasive media used in mass finishing, suc...","Answer. (a), (b), (c), (d), and (e)."
...,...,...
457,Steel cutting grades of cemented carbide are t...,Answer. (c) and (d).
458,If you had to select a cemented carbide for an...,Answer. (d)
459,Which of the following processes are used to p...,Answer. (a) and (c).
460,Which of the following materials has the highe...,Answer. (b)


In [15]:
questions = df['Question'].tolist()
results = []
for question in questions:
    result = query_engine.query(question)
    print(result)
    results.append(result.response)

Outputs_df = pd.DataFrame(results, columns=['llamaindex'])
Outputs_df

"Explanation": "Based on the provided text, the reasons why workparts must be cleaned include: (a) to enhance appearance and performance of the product, (c) to improve hygiene conditions for workers and customers, (d) to prepare the surface for subsequent industrial processing, and (e) to remove contaminants that might chemically react with the surface. Enhancing mechanical properties of the surface is not mentioned as a reason for cleaning.", "YourChoice": "A, C, D, E"
"Explanation": "Alkaline cleaning employs an alkali to remove oils, grease, wax, and various types of particles from a metallic surface. The alkaline cleaning solutions consist of low-cost, water-soluble salts such as sodium and potassium hydroxide (NaOH, KOH), sodium carbonate (Na2CO3), borax (Na2B4O7), phosphates, and silicates of sodium and potassium.", "YourChoice": "A, B"
"Explanation": "Shot peening is not primarily used for removing surface scale from metallic parts. Instead, it is a mechanical process that invol

Unnamed: 0,llamaindex
0,"""Explanation"": ""Based on the provided text, th..."
1,"""Explanation"": ""Alkaline cleaning employs an a..."
2,"""Explanation"": ""Shot peening is not primarily ..."
3,"""Explanation"": ""Sand blasting, also known as a..."
4,"""Explanation"": ""The abrasive media used in mas..."
...,...
457,"""Explanation"": ""Steel-cutting grades of cement..."
458,"""Explanation"": ""For finish turning of steel, a..."
459,"""Explanation"": ""The source material indicates ..."
460,"""Explanation"": ""Based on the provided text and..."


In [16]:
results

['"Explanation": "Based on the provided text, the reasons why workparts must be cleaned include: (a) to enhance appearance and performance of the product, (c) to improve hygiene conditions for workers and customers, (d) to prepare the surface for subsequent industrial processing, and (e) to remove contaminants that might chemically react with the surface. Enhancing mechanical properties of the surface is not mentioned as a reason for cleaning.", "YourChoice": "A, C, D, E"',
 '"Explanation": "Alkaline cleaning employs an alkali to remove oils, grease, wax, and various types of particles from a metallic surface. The alkaline cleaning solutions consist of low-cost, water-soluble salts such as sodium and potassium hydroxide (NaOH, KOH), sodium carbonate (Na2CO3), borax (Na2B4O7), phosphates, and silicates of sodium and potassium.", "YourChoice": "A, B"',
 '"Explanation": "Shot peening is not primarily used for removing surface scale from metallic parts. Instead, it is a mechanical process 

In [17]:
import re

# Function to parse the Explanation and YourChoice
def parse_row(row):
    explanation_match = re.search(r'"Explanation": "(.*?)"', row)
    choice_match = re.search(r'"YourChoice": "(.*?)"', row)
    
    explanation = explanation_match.group(1) if explanation_match else ""
    choice = choice_match.group(1) if choice_match else ""
    
    return pd.Series([explanation, choice])

# Apply the parsing function to each row
Outputs_df[['Explanation', 'YourChoice']] = Outputs_df['llamaindex'].apply(parse_row)

In [18]:
result_df = pd.concat([df, Outputs_df], axis=1)
result_df

Unnamed: 0,Question,Answer,llamaindex,Explanation,YourChoice
0,Reasons why workparts must be cleaned include ...,"Answer. (a), (c), (d), and (e).","""Explanation"": ""Based on the provided text, th...","Based on the provided text, the reasons why wo...","A, C, D, E"
1,Which of the following chemicals is associated...,"Answer. (a), (b).","""Explanation"": ""Alkaline cleaning employs an a...",Alkaline cleaning employs an alkali to remove ...,"A, B"
2,Shot peening is a mechanical cleaning method u...,Answer. (b) Principal function is to cold work...,"""Explanation"": ""Shot peening is not primarily ...",Shot peening is not primarily used for removin...,B
3,"In sand blasting, which one of the following a...",Answer. (e),"""Explanation"": ""Sand blasting, also known as a...","Sand blasting, also known as abrasive blasting...",E
4,"The abrasive media used in mass finishing, suc...","Answer. (a), (b), (c), (d), and (e).","""Explanation"": ""The abrasive media used in mas...",The abrasive media used in mass finishing proc...,"A, B, D, E"
...,...,...,...,...,...
457,Steel cutting grades of cemented carbide are t...,Answer. (c) and (d).,"""Explanation"": ""Steel-cutting grades of cement...",Steel-cutting grades of cemented carbide typic...,"A, C, D, E"
458,If you had to select a cemented carbide for an...,Answer. (d),"""Explanation"": ""For finish turning of steel, a...","For finish turning of steel, a cemented carbid...",C
459,Which of the following processes are used to p...,Answer. (a) and (c).,"""Explanation"": ""The source material indicates ...",The source material indicates that both chemic...,"A, C"
460,Which of the following materials has the highe...,Answer. (b),"""Explanation"": ""Based on the provided text and...",Based on the provided text and general knowled...,B


In [19]:
csv_file_path = '/home/zihan/Desktop/Manufacturing_QA/QuestionsData/Test_Result/HighQualityBookMCQ_CoT_TextOnly.csv'
result_df.to_csv(csv_file_path, index=False)