In [None]:
import os
import time
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from httpx import ConnectError

# GROQ_KEY = "ADD key I have removed mine"

loader = PyPDFLoader("./elgimanual-1.pdf", password= None)
all_docs = loader.load()
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
chunks = splitter.split_documents(all_docs)
print(f"Total chunks: {len(chunks)}")
print()
print(chunks[0])

Total chunks: 51

page_content='Product Manual 
EN Series 
Electric Powered Screw Air Compressor 
EN 5 - 125 
208 - 230/460V, 3Ph, 60Hz' metadata={'producer': '3-Heights(TM) PDF Security Shell 4.8.25.2 (http://www.pdf-tools.com)', 'creator': 'pdftk 2.02 - www.pdftk.com', 'creationdate': '2019-08-08T01:28:01+05:30', 'moddate': '2020-02-28T10:42:43-05:00', 'source': './elgimanual-1.pdf', 'total_pages': 56, 'page': 2, 'page_label': '3'}


**ususally metadata in chunks is just pg no., and src but here we can see many extra attibutes, If we look closely All these are the metadata attributes of the original PDF, Like we can seethe last modified date and creation date:** 
<br>
'creationdate': '2019-08-08T01:28:01+05:30', 'moddate': '2020-02-28T10:42:43-05:00'

And all other are just metadata attributes related to on which software this pdf was actually made, And the only other imp metadatas are  
<br>
- 'total_pages': 56
- 'page': 13
- 'page_label': '14' (lang chain starts idx frim 0)

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)

In [38]:
SYSTEM_INSTRUCTION = (
    "You are ELGi‑Helper, an AI assistant that strictly answers questions based on the provided ELGi compressor manual only.\n\n"
    "Please follow these rules carefully:\n"
    "1) Do NOT use your own knowledge. Only answer from the provided manual.\n"
    "2) If a question is outside the scope of the manual, respond with: \"LLM can only answer questions related to the given file.\"\n"
    "3) Do NOT guess or assume anything. If the manual does not mention it, say so.\n"
    "4) Keep your answers short, clear, and directly based on the manual.\n"
    "5) Include exact wording or refer to the manual page where relevant, if available.\n"
    "6) Never answer general questions, opinions, or anything unrelated to the ELGi manual."
)

In [39]:
llm = ChatOpenAI(
    model_name="llama3-70b-8192",
    temperature=0.0,
    openai_api_key=GROQ_KEY,
    openai_api_base="https://api.groq.com/openai/v1"
)

In [40]:
history_list = []
# this is just for extar context to improve moedal acc : [first_answer, second_last_answer, last_answer]


def ask_question(question: str, k: int = 3) -> dict:
    
    docs = vectorstore.similarity_search(question, k=k)
    
    # print("\nChunks we got from PDF:\n")
    index = 0
    snippets = [] 

    for doc in docs:
        metadata = doc.metadata
        snippet_text = doc.page_content[0:100] 

        # print("Chunk " + str(index))
        # print("  Metadata: " + str(metadata))
        # print("  Snippet:  " + snippet_text)
        # print()
        
        # jus for returing purposes only
        snippet = "[Chunk " + str(index) + "] " + snippet_text + "..........."
        snippets.append(snippet)
        index += 1



    context = ""
    for doc in docs:
        context += doc.page_content + "\n"

    
    # history part 
    extra_parts = []
    if len(history_list) >= 1: # edge case as maybe phaly chat he ho abbi
        extra_parts.append("First Answer: " + history_list[0])
    if len(history_list) >= 2:
        extra_parts.append("Second Last Answer: " + history_list[-2])
    if len(history_list) >= 1:
        extra_parts.append("Last Answer: " + history_list[-1])

    extra_context = ""
    for part in extra_parts:
        extra_context += part + "\n\n"




    system_msg = SystemMessage(content=SYSTEM_INSTRUCTION)
    human_parts = []
    
    if extra_context != "":
        human_parts.append("History:\n" + extra_context)

    human_parts.append("Context:\n" + context)
    human_parts.append("Question: " + question)

    full_human_message = ""
    for part in human_parts:
        full_human_message += part + "\n"

    human_msg = HumanMessage(content=full_human_message)

    response = llm.generate([[system_msg, human_msg]])
    answer = response.generations[0][0].message.content

    # Update the history_list based on current number of items
    if len(history_list) == 0:
        history_list.append(answer)
    elif len(history_list) == 1:
        history_list.append(answer)
    elif len(history_list) == 2:
        history_list.append(answer)
    else:
        history_list[1] = history_list[2]
        history_list[2] = answer
    
    
    
    
    # return dic 
    result = {
        "question": question,
        "snippets": snippets,
        "extra_history": extra_parts,
        "answer": answer
    }

    return result


In [41]:
test_questions = [
    "Who is the manufacturer of the EN Series Electric Powered Screw Air Compressor?",
    "What is the function of the minimum pressure valve?",
    "How to become a president of pakistan",
    "What maintenance should be done every 4000 hours?"
]


test_index = 1

for question in test_questions:
    result = ask_question(question)

    print("\nTest Case " + str(test_index))
    print("Question: " + result['question'])
    print("Answer:")
    print("Retrieved Snippets:")
    for snippet in result['snippets']:
        print("  " + snippet)

    if result['extra_history']:
        print("Extra History Used:")
        for entry in result['extra_history']:
            print("  " + entry.replace('\n', ' | '))

    print("  " + result['answer'])
    print()
    print()
    print()
    print()
    test_index += 1




Test Case 1
Question: Who is the manufacturer of the EN Series Electric Powered Screw Air Compressor?
Answer:
Retrieved Snippets:
  [Chunk 0] Product Manual 
EN Series 
Electric Powered Screw Air Compressor 
EN 5 - 125 
208 - 230/460V, 3Ph, 6...........
  [Chunk 1] 1.7 
About ELGi 1.0 
ELGi, established in 1960, designs and manufactures a wide range of air compres...........
  [Chunk 2] 2.3 General functional description 
Welcome to the ELGi customer family. ELGi compressors 
are the s...........
  According to the manual, the manufacturer of the EN Series Electric Powered Screw Air Compressor is ELGi. (Page 1.0, "About ELGi")





Test Case 2
Question: What is the function of the minimum pressure valve?
Answer:
Retrieved Snippets:
  [Chunk 0] 5.10 Installation and operation  
5.3.1 Safety system and interlocking  
device 
5.3.1.1 Discharge t...........
  [Chunk 1] 2.10 General functional description 
vibration mountings reside below the airend and the 
motor. Thi...........
  [Chunk 