In [None]:
!pip install --quiet -U langchain langchainhub langchain_community langchain-tools openai langgraph langchain_groq python-dotenv langchain_experimental duckduckgo-search groq pypdf chromadb

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m389.5/389.5 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.0/125.0 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.0/209.0 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.9/108.9 kB[0m [31m6.3 MB/s[0m eta [36m0:0

In [None]:
from langchain_groq import ChatGroq
from google.colab import userdata

groq_api = userdata.get('groq_api')

In [None]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.agents import initialize_agent, Tool
from langchain.document_loaders import PyPDFLoader
from langchain.tools import StructuredTool
from pydantic import BaseModel, Field


class PDFinput(BaseModel):
    pdf_path: str = Field(description="path to the PDF")
    query: str = Field(description="Query to be searched")
    top_k: int = Field(description="Documents to be retrieved")


def extract_pdf(pdf_path: str, query: str, top_k: int = 3):
    """Extract text from a PDF and store it in a vectorDB using HuggingFace embeddings."""
    embed_model = HuggingFaceHubEmbeddings(
        repo_id="sentence-transformers/all-mpnet-base-v2",
        huggingfacehub_api_token=userdata.get('huggingface')
    )

    loader = PyPDFLoader(pdf_path)
    pages = loader.load_and_split()
    texts = [page.page_content for page in pages]
    db = Chroma.from_texts(texts, embedding=embed_model)
    search_results = db.similarity_search(query, k=top_k)
    result_text = "\n".join([result.page_content for result in search_results])

    return result_text

def create_tools():
    return [
    Tool(
        name="extract_pdf",
        func=extract_pdf,
        description="Extracts text from a PDF file and stores it in a vector database."
    ),
]
PDF_extractor = StructuredTool.from_function(
    func=extract_pdf,
    name="extract_pdf",
    description="extract PDF and save it into vectorDB.",
    args_schema=PDFinput,
    return_direct=True,
)

def create_retriever_agent(PDF_extractor):
    """
    Create and initialize the retriever agent with the Chroma vector store.
    """
    llm = ChatGroq(api_key=groq_api, model="llama-3.2-90b-text-preview")
    tools = [PDF_extractor]

    agent = initialize_agent(
        tools=tools,
        llm=llm,
        agent="structured-chat-zero-shot-react-description",
        verbose=True
    )

    return agent




if __name__ == "__main__":
    pdf_path = "/content/ISME2024_Track1_066. R1.pdf"
    query = "Query: What are the list of components used?; Path to doc: /content/ISME2024_Track1_066. R1.pdf"
    top_k = 3
    query_input = {
    "pdf_path": "/content/ISME2024_Track1_066. R1.pdf",
    "query": "What are the list of components used?",
    "top_k": 3
    }
    print("Creating the agent...")
    agent = create_retriever_agent(PDF_extractor)

    print("Invoking the agent...")
    result = agent.invoke({"input" : query,
                           "pdf_path" : pdf_path,
                           "top_k" : top_k})
    print("\nRetrieved Results:")
    print(result)


Creating the agent...
Invoking the agent...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction:
```
{
  "action": "extract_pdf",
  "action_input": {
    "pdf_path": "/content/ISME2024_Track1_066. R1.pdf",
    "query": "list of components used",
    "top_k": 5
  }
}
```
[0m
Observation: [36;1m[1;3m22nd ISME International Conference on Recent Advances in Mechanical Engineering (ISME-2024) 
Delhi Technological University, Delhi, 11– 13 July, 2024 
 
 
 
 
 
 
9 
 
 
 
 
8 – Channel 
Relay Board 
 
Connector Type- Screw 
Current rating – 10 Amps 
Maximum Switching Voltage 
– 12 V 
Item weight- 110 g 
Dimensions- 16.8 x 3.6 x 3 cm 
; 110 Grams 
 
 
 
 
 
 
10 
 
 
 
 
LCD Display 
 
 
Product dimensions – 13 x 13 
x 8cm ; 45 grams 
Item height – 8 cm 
Item width – 13 cm 
Included components- 5V 
LCD display module 
JHD762M5 
 
 
 
 
 
 
 
 
11 
 
 
 
 
 
Rain sensor 
 
 
 
 
Number of memory sticks – 1 
Item weight – 400 g 
Product dimensions- 7 x 5 x 5 
cm; 400 grams 
 

In [None]:
if __name__ == "__main__":
    query = "Query: What are the characteristics of python?; Path to doc: /content/Python Full Tutorial.pdf; top_k: 5"

    print("Creating the agent...")
    agent = create_retriever_agent(PDF_extractor)

    print("Invoking the agent...")
    # Pass the query dynamically to the agent
    result = agent.invoke({"input" : query})

    print("\nRetrieved Results:")
    print(result)

Creating the agent...
Invoking the agent...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The query is asking for characteristics of Python, and a path to a PDF document is provided. To answer this question, we need to extract the relevant information from the PDF document.

Action:
```json
{
  "action": "extract_pdf",
  "action_input": {
    "pdf_path": "/content/Python Full Tutorial.pdf",
    "query": "characteristics of Python",
    "top_k": 5
  }
}
```
[0m
Observation: [36;1m[1;3mPython Tutorial 
        
    2 
 
Python is a high -level, multi -paradigm programming language. As Python is an 
interpreter-based language, it is easier to learn compared to some of the other mainstream 
languages. Python is a dynamically typed language with very intuitive data types.  
Python is an open -source and cross -platform programming language. It is available for 
use under Python Software Foundation License  (compatible to GNU General Public 
License) on all the maj

In [None]:
if __name__ == "__main__":
    query = "Query: What are the applications of python?; Path to doc: /content/Python Full Tutorial.pdf; top_k: 5"

    print("Creating the agent...")
    agent = create_retriever_agent(PDF_extractor)

    print("Invoking the agent...")
    # Pass the query dynamically to the agent
    result = agent.invoke({"input" : query})

    print("\nRetrieved Results:")
    print(result)

Creating the agent...
Invoking the agent...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction:
```
{
  "action": "extract_pdf",
  "action_input": {
    "pdf_path": "/content/Python Full Tutorial.pdf",
    "query": "applications of python",
    "top_k": 5
  }
}
```
[0m
Observation: [36;1m[1;3mPython Tutorial 
        
    iii 
 
Careers in Python 
If you know Python nicely, then you have a great career ahead. Here are just a few of the 
career options where Python is a key skill: 
 Game developer 
 Web designer 
 Python developer 
 Full-stack developer 
 Machine learning engineer 
 Data scientist 
 Data analyst 
 Data engineer 
 DevOps engineer 
 Software engineer 
 Many more other roles 
Characteristics of Python 
Following are important characteristics of Python Programming − 
 It supports functional and structured programming methods as well as OOP. 
 
 It can be used as a scripting language or can be compiled to byte-code for building 
large applica