In [1]:
!pip install pandas langchain langchain-community sentence-transformers faiss-cpu "transformers[agents]"

Collecting langchain
  Downloading langchain-0.2.10-py3-none-any.whl (990 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.0/990.0 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-community
  Downloading langchain_community-0.2.9-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-core<0.3.0,>=0.2.22 (from langchain)
  Downloading langchain_core-0.2.22-py3

In [2]:
!pip install "git+https://github.com/huggingface/transformers.git#egg=transformers[agents]"

[33mDEPRECATION: git+https://github.com/huggingface/transformers.git#egg=transformers[agents] contains an egg fragment with a non-PEP 508 name pip 25.0 will enforce this behaviour change. A possible replacement is to use the req @ url syntax, and remove the egg fragment. Discussion can be found at https://github.com/pypa/pip/issues/11617[0m[33m
[0mCollecting transformers[agents]
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-install-vknfcj69/transformers_febd6377a9c34552a976b7c6fa46754f
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-install-vknfcj69/transformers_febd6377a9c34552a976b7c6fa46754f
  Resolved https://github.com/huggingface/transformers.git to commit 0fdea8607d7e01eb0e38a1ebeb7feee30a22f0cf
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for colle

In [1]:
pip install groq

Collecting groq
  Using cached groq-0.9.0-py3-none-any.whl (103 kB)
Collecting httpx<1,>=0.23.0 (from groq)
  Using cached httpx-0.27.0-py3-none-any.whl (75 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->groq)
  Using cached httpcore-1.0.5-py3-none-any.whl (77 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->groq)
  Using cached h11-0.14.0-py3-none-any.whl (58 kB)
Installing collected packages: h11, httpcore, httpx, groq
Successfully installed groq-0.9.0 h11-0.14.0 httpcore-1.0.5 httpx-0.27.0


In [2]:
import pandas as pd
import datasets
from transformers import AutoTokenizer
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.utils import DistanceStrategy
from tqdm import tqdm
from transformers.agents import Tool, HfEngine, ReactJsonAgent
from huggingface_hub import InferenceClient
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [22]:
# loading knowledge base
kb = datasets.load_dataset("m-ric/huggingface_doc", split="train")

In [23]:
kb

Dataset({
    features: ['text', 'source'],
    num_rows: 2647
})

In [24]:
# Convert dataset to Document objects
source_docs = [
    Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
    for doc in kb
]

logger.info(f"Loaded {len(source_docs)} documents from the knowledge base")

In [28]:
source_docs[10:12]

 Document(metadata={'source': 'blog'}, page_content='--\ntitle: "Large Language Models: A New Moore\'s Law?"\nthumbnail: /blog/assets/33_large_language_models/01_model_size.jpg\nauthors:\n- user: juliensimon\n---\n\n# Large Language Models: A New Moore\'s Law?\n\n\n\nA few days ago, Microsoft and NVIDIA [introduced](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/) Megatron-Turing NLG 530B, a Transformer-based model hailed as "*the world’s largest and most powerful generative language model*."\n \nThis is an impressive show of Machine Learning engineering, no doubt about it. Yet, should we be excited about this mega-model trend?  I, for one, am not. Here\'s why.\n\n<kbd>\n  <img src="assets/33_large_language_models/01_model_size.jpg">\n</kbd>\n\n### This is your Brain on Deep Learning\n\nResearchers estimate that the human brain contains an average of [86 billion 

In [26]:
# Initialize the text splitter
tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-small")
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
    tokenizer,
    chunk_size=1000,
    chunk_overlap=100,
    add_start_index=True,
    strip_whitespace=True,
    separators=["\n\n", "\n", ".", " ", ""],
)

In [34]:
# only considering 100 data points
data = source_docs[0:100]

In [35]:
len(data)

100

In [36]:
# Split documents and remove duplicates
logger.info("Splitting documents...")
docs_processed = []
unique_texts = {}
for doc in tqdm(data):
    new_docs = text_splitter.split_documents([doc])
    for new_doc in new_docs:
        if new_doc.page_content not in unique_texts:
            unique_texts[new_doc.page_content] = True
            docs_processed.append(new_doc)

logger.info(f"Processed {len(docs_processed)} unique document chunks")

100%|██████████| 100/100 [00:05<00:00, 19.16it/s]


In [37]:
# Initialize the embedding model
logger.info("Initializing embedding model...")
embedding_model = HuggingFaceEmbeddings(model_name="thenlper/gte-small")

# Create the vector database
logger.info("Creating vector database...")
vectordb = FAISS.from_documents(
    documents=docs_processed,
    embedding=embedding_model,
    distance_strategy=DistanceStrategy.COSINE,
)

logger.info("Vector database created successfully")

  warn_deprecated(


modules.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/68.1k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/66.7M [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [38]:
logger.info("Saved Vector database successfully")
#saving model localy for future use 
vectordb.save_local("local")

In [39]:
class RetrieverTool(Tool):
    name = "retriever"
    description = "Using semantic similarity, retrieves some documents from the knowledge base that have the closest embeddings to the input query."
    inputs = {
        "query": {
            "type": "text",
            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
        }
    }
    output_type = "text"

    def __init__(self, vectordb, **kwargs):
        super().__init__(**kwargs)
        self.vectordb = vectordb

    def forward(self, query: str) -> str:
        assert isinstance(query, str), "Your search query must be a string"

        docs = self.vectordb.similarity_search(
            query,
            k=7,
        )

        return "\nRetrieved documents:\n" + "".join(
            [f"===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)]
        )



In [40]:
retriever_tool = RetrieverTool(vectordb)

In [41]:
retriever_tool

<__main__.RetrieverTool at 0x7a55a0e7cbb0>

In [42]:
import os
from groq import Groq
from google.colab import userdata
# os.environ["GROQ_API_KEY"]=userdata.get('GROQ_API_KEY')


from typing import List, Dict
from transformers.agents.llm_engine import MessageRole, get_clean_message_list
from huggingface_hub import InferenceClient

role_conversions = {
    MessageRole.TOOL_RESPONSE: MessageRole.USER,
}


class GorqEngine:
    def __init__(self, model_name="llama3-8b-8192"):
        self.model_name = model_name
        self.client = Groq(
        api_key=userdata.get('GROQ_API_KEY')
        )

    def __call__(self, messages, stop_sequences=[]):
        messages = get_clean_message_list(messages, role_conversions=role_conversions)

        response = self.client.chat.completions.create(
            model=self.model_name,
            messages=messages,
            stop=stop_sequences,
            temperature=0.5,
        )
        return response.choices[0].message.content

In [43]:
llm_engine = GorqEngine()

In [44]:
# Create the agent
agent = ReactJsonAgent(tools=[retriever_tool], llm_engine=llm_engine, max_iterations=4, verbose=2)

In [45]:
# Function to run the agent
def run_agentic_rag(question: str) -> str:
    enhanced_question = f"""Using the information contained in your knowledge base, which you can access with the 'retriever' tool,
give a comprehensive answer to the question below.
Respond only to the question asked, response should be concise and relevant to the question.
If you cannot find information, do not give up and try calling your retriever again with different arguments!
Make sure to have covered the question completely by calling the retriever tool several times with semantically different queries.
Your queries should not be questions but affirmative form sentences: e.g. rather than "How do I load a model from the Hub in bf16?", query should be "load a model from the Hub bf16 weights".

Question:
{question}"""

    return agent.run(enhanced_question)


In [46]:
# Example usage
question = "How can I push a model to the Hub?"
answer = run_agentic_rag(question)
print(f"Question: {question}")
print(f"Answer: {answer}")

[37;1mUsing the information contained in your knowledge base, which you can access with the 'retriever' tool,
give a comprehensive answer to the question below.
Respond only to the question asked, response should be concise and relevant to the question.
If you cannot find information, do not give up and try calling your retriever again with different arguments!
Make sure to have covered the question completely by calling the retriever tool several times with semantically different queries.
Your queries should not be questions but affirmative form sentences: e.g. rather than "How do I load a model from the Hub in bf16?", query should be "load a model from the Hub bf16 weights".

Question:
How can I push a model to the Hub?[0m
[38;20mSystem prompt is as follows:[0m
[38;20mYou are an expert assistant who can solve any task using JSON tool calls. You will be given a task to solve as best you can.
To do so, you have been given access to the following tools: 'retriever', 'final_answer'


Question: How can I push a model to the Hub?
Answer: Based on the information provided, here is a comprehensive answer to the question:

To push a model to the Hub, you can use the `push_to_hub` function provided by 🤗 Transformers. This function allows you to upload your model to the Hub and make it available for others to use.

Here is an example of how to use the `push_to_hub` function:
```
import torch
from transformers import push_to_hub

# Load your model
model = torch.load("path/to/your/model.pth")

# Push the model to the Hub
push_to_hub(model, repo_id="your-username/your-model-name")
```
You can also use the `huggingface_hub` library to push a model to the Hub. Here is an example:
```
import huggingface_hub

# Load your model
model = torch.load("path/to/your/model.pth")

# Push the model to the Hub
huggingface_hub.push_to_hub(model, repo_id="your-username/your-model-name")
```
When pushing a model to the Hub, you will need to specify the repository ID, which is the unique ident