In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
import nest_asyncio
nest_asyncio.apply()

# API access to llama-cloud
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
# Using Anthropic API for embeddings/LLMs
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [4]:
# GPT-3.5-Turbo Setup
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.core import Settings

# for the purpose of this example, we will use the small model embedding and gpt3.5
embed_model=OpenAIEmbedding(model="text-embedding-3-small")
llm = OpenAI(model="gpt-3.5-turbo", temperature= 0.5, system_prompt="You are a manufacturing assistant. All numerical data that you encounter are specifications for components. You need to give recommendations of components that have the least numerical difference in specifications to the specifications provided by the user. The specifications of recommendations should also semantically be the most similar to the specifications provided by the user. ")

Settings.llm = llm

In [5]:
# Parsing with instructions 
from llama_parse import LlamaParse
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
parser = LlamaParse(
    api_key= LLAMA_CLOUD_API_KEY,  # can also be set in your env as LLAMA_CLOUD_API_KEY
    result_type="markdown",  # "markdown" and "text" are available
    parsing_instruction="""It contains tables and specifications for different components. 
                            Try to reconstruct the information in a concise way.""",
    verbose=True
)

file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader("./data_pistons", file_extractor=file_extractor).load_data()


Started parsing the file under job_id 7a76d433-2e64-45c9-bf8c-03954d484cd5
Started parsing the file under job_id 169a0b21-2439-4a2e-bf90-1fe1a61738de


In [6]:
# Nodes
from llama_index.core.node_parser import MarkdownElementNodeParser

node_parser = MarkdownElementNodeParser(llm=OpenAI(model="gpt-3.5-turbo-0125"), num_workers=8)
nodes = node_parser.get_nodes_from_documents(documents)
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)

85it [00:00, 65128.94it/s]
100%|██████████| 85/85 [00:30<00:00,  2.75it/s]
9it [00:00, 62914.56it/s]
100%|██████████| 9/9 [00:03<00:00,  2.39it/s]


In [7]:
# Reranker: most relevant docs
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker
reranker = FlagEmbeddingReranker(top_n=5, model="BAAI/bge-reranker-large")

In [8]:
# Recursive Query Engine: for reading tables too 

from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.postprocessor import SimilarityPostprocessor

recursive_index = VectorStoreIndex(nodes=base_nodes+objects)
index = recursive_index
recursive_query_engine = recursive_index.as_query_engine(similarity_top_k=4, 
    node_postprocessors=[reranker], 
    verbose=True
)

In [9]:
# For printing response 
from llama_index.core.response.pprint_utils import pprint_response
from llama_index.core import StorageContext

In [None]:
# Query 
#query = "Our client Audi wants to build a piston for their engine. Can you give me some recommendations? Here are the provided specifications: 1. ⁠Bore Diameter- 83.50 mm 2.⁠ ⁠Compression Height- 29.59 mm (1.165 inches) 3.⁠ ⁠Material- Forged 2618 T6 Aluminum 4.⁠ ⁠Compression Ratio- 8.5:1 5.⁠ ⁠Maximum G-Force- 8000 G's."
#response = recursive_query_engine.query(query)

In [10]:
from llama_index.core.schema import MetadataMode
from llama_index.core.response.pprint_utils import pprint_response
from llama_index.core import StorageContext

#pprint_response(response, show_source=True)

#print(response)

from llama_index.core.memory import ChatMemoryBuffer

memory = ChatMemoryBuffer.from_defaults(token_limit=3900)

chat_engine = index.as_chat_engine(
    chat_mode="condense_plus_context",
    memory=memory,
    llm=llm,
    context_prompt=(
        "You are a manufacturing assistant. All numerical data that you encounter are specifications for components. You need to give recommendations of components that have the least numerical difference in specifications to the specifications provided by the user. The specifications of recommendations should also semantically be the most similar to the specifications provided by the user. "
        "Here are the relevant documents for the context:\n"
        "{context_str}"
        "\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
    ),
    verbose=False,
)

In [22]:

response = chat_engine.chat("Our client Audi wants to build a piston for their engine. Can you give me some recommendations? Here are the provided specifications: 1.⁠ ⁠Bore Diameter- 83.50 mm 2.⁠ ⁠Compression Height- 29.59 mm (1.165 inches) 3.⁠ ⁠Material- Forged 2618 T6 Aluminum 4.⁠ ⁠Compression Ratio- 8.5:1 5.⁠ ⁠Maximum G-Force- 8000 G's")

In [23]:
print(response)

Based on the specifications provided for the Audi engine piston, here are some recommendations for components that closely match the criteria:

1. **Kit Part #:** KE201M825
2. **Bore mm / Inch:** 82.50 / 3.248
3. **Over Sizes:** 1.50
4. **Stroke:** 6.260
5. **Rod Lgth.:** 1.204
6. **Comp Ht.:** 32.65
7. **Head cc’s:** 46
8. **Dome Dish:** -19.4
9. **Gasket Thickness:** 0.065
10. **Deck Clearance:** 0.000
11. **Block Ht.:** 9.291
12. **Comp. Ratio:** 8.0:1
13. **Pin Part # (included):** 8300XX
14. **Ring # (included):** 308
15. **Gram Foot:** 1

This piston closely matches the specifications provided, with a bore size of 82.50 mm, a compression height of 29.59 mm, and a compression ratio of 8.0:1. The material used is forged 2618 T6 aluminum, which meets the client's requirements.


In [None]:
# Node, metadata for final response 
print(response.source_nodes[0].metadata)