In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
import nest_asyncio
nest_asyncio.apply()
import tabula
import pandas as pd

# API access to llama-cloud
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
# Using Anthropic API for embeddings/LLMs
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
# GPT-3.5-Turbo Setup
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.core import Settings
from llama_index.core import StorageContext
from llama_index.core import load_index_from_storage

# for the purpose of this example, we will use the small model embedding and gpt3.5
embed_model=OpenAIEmbedding(model="text-embedding-3-small")
llm = OpenAI(model="gpt-3.5-turbo", temperature= 0.5, system_prompt="You are a manufacturing assistant. All numerical data that you encounter are specifications for components. Give recommendations that have the least numerical difference in specifications to the specifications provided by the user and are closest semantically.")

Settings.llm = llm

In [None]:
# Parsing with instructions 

# Ingest in parallel 
from llama_parse import LlamaParse
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
parser = LlamaParse(
    api_key= LLAMA_CLOUD_API_KEY,  # can also be set in your env as LLAMA_CLOUD_API_KEY
    result_type="markdown",  # "markdown" and "text" are available
    parsing_instruction="""It contains specifications for different components. 
                            Reconstruct the information in a concise way.""",
    verbose=True
)

def get_meta(file_path):
    return {"foo": "bar", "file_path": file_path}

file_extractor = {".pdf": parser}
dir_path = "./data_pistons"
reader = SimpleDirectoryReader(dir_path, file_extractor=file_extractor)
documents = reader.load_data(num_workers = 8)


In [None]:
from llama_index.core.node_parser import MarkdownElementNodeParser

In [None]:
# Nodes
from llama_index.core.node_parser import MarkdownElementNodeParser

node_parser = MarkdownElementNodeParser(llm=OpenAI(model="gpt-3.5-turbo-0125"), num_workers=8)
nodes = await node_parser.get_nodes_from_documents(documents)
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)

In [None]:
node_parser = MarkdownElementNodeParser(llm=llm, num_workers=8)

In [None]:
# Reranker: most relevant docs
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker
reranker = FlagEmbeddingReranker(top_n=5, model="BAAI/bge-reranker-large")

In [None]:
# Recursive Query Engine: for reading tables too 

from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.postprocessor import SimilarityPostprocessor

recursive_index = VectorStoreIndex(nodes=base_nodes+objects)
index = recursive_index
recursive_query_engine = recursive_index.as_query_engine(similarity_top_k=4, 
    node_postprocessors=[reranker], 
    verbose=True
)

In [None]:
# For printing response 
from llama_index.core.response.pprint_utils import pprint_response
from llama_index.core import StorageContext

In [None]:
# Query 
#query = "Our client Audi wants to build a piston for their engine. Can you give me some recommendations? Here are the provided specifications: 1. ⁠Bore Diameter- 83.50 mm 2.⁠ ⁠Compression Height- 29.59 mm (1.165 inches) 3.⁠ ⁠Material- Forged 2618 T6 Aluminum 4.⁠ ⁠Compression Ratio- 8.5:1 5.⁠ ⁠Maximum G-Force- 8000 G's."
#response = recursive_query_engine.query(query)

In [None]:
from llama_index.core.schema import MetadataMode
from llama_index.core.response.pprint_utils import pprint_response
from llama_index.core import StorageContext

#pprint_response(response, show_source=True)

#print(response)

from llama_index.core.memory import ChatMemoryBuffer

memory = ChatMemoryBuffer.from_defaults(token_limit=3900)

chat_engine = index.as_chat_engine(
    chat_mode="condense_plus_context",
    memory=memory,
    llm=llm,
    context_prompt=(
        "You are a manufacturing assistant. All numerical data that you encounter are specifications for components. You need to give recommendations of components that have the least numerical difference in specifications to the specifications provided by the user. The specifications of recommendations should also semantically be the most similar to the specifications provided by the user. "
        "Here are the relevant documents for the context:\n"
        "{context_str}"
        "\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
    ),
    verbose=False,
)

In [None]:

response = chat_engine.chat("Our client Audi wants to build a piston for their engine. Can you give me some recommendations? Here are the provided specifications: 1.⁠ ⁠Bore Diameter- 83.50 mm 2.⁠ ⁠Compression Height- 29.59 mm (1.165 inches) 3.⁠ ⁠Material- Forged 2618 T6 Aluminum 4.⁠ ⁠Compression Ratio- 8.5:1 5.⁠ ⁠Maximum G-Force- 8000 G's")

In [None]:
print(response)

In [None]:
# Node, metadata for final response 
print(response.source_nodes[0].metadata)