<a href="https://colab.research.google.com/github/Wallzers/Wallzers/blob/main/Vlsi_AgenticRAG_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install "git+https://github.com/huggingface/transformers.git#egg=transformers[agents]"
!pip install langchain
!pip install langchain-community
!pip install sentence-transformers
!pip install faiss-cpu
!pip install groq
!pip install -qU langchain-groq
!pip install unstructured
!pip install "unstructured[pdf]"
!pip install -U langchain-huggingface

In [31]:
import pandas as pd
import datasets
from transformers import AutoTokenizer
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.utils import DistanceStrategy
from tqdm import tqdm
from transformers.agents import Tool,  ReactJsonAgent
from huggingface_hub import InferenceClient

In [35]:
import os
os.makedirs("DATA", exist_ok=True)

from langchain_community.document_loaders import DirectoryLoader
loader = DirectoryLoader('/content/DATA', glob="**/*.pdf", show_progress=True)
docs = loader.load()
print(os.listdir("/content/DATA"))

100%|██████████| 1/1 [00:07<00:00,  7.32s/it]

['DigitalNotes.pdf']





In [36]:
# Initialize the text splitter
tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-small")
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
    tokenizer,
    chunk_size=200,
    chunk_overlap=20,
    add_start_index=True,
    strip_whitespace=True,
    #separators=["\n\n", "\n", ".", " ", ""],
)

In [37]:
# Split documents and remove duplicates

docs_processed = []
unique_texts = {}
for doc in tqdm(docs):
    new_docs = text_splitter.split_documents([doc])
    for new_doc in new_docs:
        if new_doc.page_content not in unique_texts:
            unique_texts[new_doc.page_content] = True
            docs_processed.append(new_doc)

100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


In [38]:
from langchain_huggingface import HuggingFaceEmbeddings
model_name = "thenlper/gte-small"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embedding_model = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs)

In [39]:
print(len(docs_processed))
print(type(embedding_model))

171
<class 'langchain_huggingface.embeddings.huggingface.HuggingFaceEmbeddings'>


In [40]:
# Create the vector database

vectordb = FAISS.from_documents(
    documents=docs_processed,
    embedding=embedding_model,
    distance_strategy=DistanceStrategy.COSINE,
)

In [43]:
class RetrieverTool(Tool):
    name = "retriever"
    description = "Using semantic similarity, retrieves some documents from the knowledge base that have the closest embeddings to the input query."
    inputs = {
        "query": {
            "type": "string",
            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
        }
    }
    output_type = "string"

    def __init__(self, vectordb, **kwargs):
        super().__init__(**kwargs)
        self.vectordb = vectordb

    def forward(self, query: str) -> str:
        assert isinstance(query, str), "Your search query must be a string"

        docs = self.vectordb.similarity_search(
            query,
            k=7,
        )

        return "\nRetrieved documents:\n" + "".join(
            [f"===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)]
        )

# Create an instance of the RetrieverTool
retriever_tool = RetrieverTool(vectordb)

In [59]:
from langchain_groq import ChatGroq
from google.colab import userdata
import os
os.environ["GROQ_API_KEY"] = "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"
#
llm = ChatGroq(
    model="	llama-3.3-70b-versatile",
    temperature=0.7,

  )

In [60]:
import os
from groq import Groq

from typing import List, Dict
from transformers.agents.llm_engine import MessageRole, get_clean_message_list
from huggingface_hub import InferenceClient

openai_role_conversions = {
    MessageRole.TOOL_RESPONSE: MessageRole.USER,
}


class OpenAIEngine:
    def __init__(self, model_name="llama-3.3-70b-versatile"):
        self.model_name = model_name
        self.client = Groq(
            api_key=os.getenv("GROQ_API_KEY"),
        )

    def __call__(self, messages, stop_sequences=[]):
        messages = get_clean_message_list(messages, role_conversions=openai_role_conversions)

        response = self.client.chat.completions.create(
            model=self.model_name,
            messages=messages,
            stop=stop_sequences,
            temperature=0.5,
            max_tokens=2048
        )
        return response.choices[0].message.content

In [61]:
# Create the agent
llm_engine = OpenAIEngine()
agent = ReactJsonAgent(tools=[retriever_tool], llm_engine=llm_engine, max_iterations=4, verbose=2)

In [62]:
# Function to run the agent
def run_agentic_rag(question: str) -> str:
    enhanced_question = f"""Using the information contained in your knowledge base, which you can access with the 'retriever' tool,
give a comprehensive answer to the question below.
Respond only to the question asked, response should be concise and relevant to the question.
If you cannot find information, do not give up and try calling your retriever again with different arguments!
Make sure to have covered the question completely by calling the retriever tool several times with semantically different queries.
Your queries should not be questions but affirmative form sentences: e.g. rather than "How do I load a model from the Hub in bf16?", query should be "load a model from the Hub bf16 weights".

Question:
{question}"""

    return agent.run(enhanced_question)

In [64]:
question = "Types of de morgan's theorem. "
answer = run_agentic_rag(question)
print(f"Question: {question}")
print(f"Answer: {answer}")

[37;1mUsing the information contained in your knowledge base, which you can access with the 'retriever' tool,
give a comprehensive answer to the question below.
Respond only to the question asked, response should be concise and relevant to the question.
If you cannot find information, do not give up and try calling your retriever again with different arguments!
Make sure to have covered the question completely by calling the retriever tool several times with semantically different queries.
Your queries should not be questions but affirmative form sentences: e.g. rather than "How do I load a model from the Hub in bf16?", query should be "load a model from the Hub bf16 weights".

Question:
Types of de morgan's theorem. [0m
[38;20mSystem prompt is as follows:[0m
[38;20mYou are an expert assistant who can solve any task using JSON tool calls. You will be given a task to solve as best you can.
To do so, you have been given access to the following tools: 'retriever', 'final_answer'
The 

Question: Types of de morgan's theorem. 
Answer: There are two types of De Morgan's theorem: (1) A + B = (cid:22)A (cid:22)B, which states that the negation of the conjunction of two statements is equivalent to the disjunction of their negations, and (2) AB = A + B, which states that the negation of the disjunction of two statements is equivalent to the conjunction of their negations.


In [65]:
print(f"Answer: {answer}")

Answer: There are two types of De Morgan's theorem: (1) A + B = (cid:22)A (cid:22)B, which states that the negation of the conjunction of two statements is equivalent to the disjunction of their negations, and (2) AB = A + B, which states that the negation of the disjunction of two statements is equivalent to the conjunction of their negations.
