# Packages

In [None]:
! pip install -q langchain_community tiktoken langchain-openai langchainhub chromadb langchain langchain-core langchain_google_genai

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m61.4/67.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m3.1 MB/s[0m 

# Langchain and Groq api

In [None]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = <your-api>
os.environ['GROQ_API_KEY'] = <your-api>

# Part 1: Overview

[RAG quickstart](https://python.langchain.com/docs/use_cases/question_answering/quickstart)

The code is designed to load documents from a specified web page using a web scraping tool called `WebBaseLoader`.

1. **WebBaseLoader**: This is a class that facilitates loading and scraping web content. It is initialized with specific parameters to specify where to scrape data from.

2. **web_paths**: This parameter takes a tuple containing the URL(s) from which to load the documents. In this case, it is set to fetch data from the URL `https://lilianweng.github.io/posts/2023-06-23-agent/`.

3. **bs_kwargs**: This parameter is a dictionary that contains additional options for Beautiful Soup (bs4), which is a library used for parsing HTML and XML documents.

4. **parse_only**: Within `bs_kwargs`, this specifies that only certain parts of the HTML should be parsed. It uses `bs4.SoupStrainer`, which allows for more efficient parsing by focusing on specific elements.

5. **class_**: This argument within `SoupStrainer` indicates which HTML classes to include during parsing. Here, it specifies three classes: `"post-content"`, `"post-title"`, and `"post-header"`. This means that only the HTML elements with these classes will be considered when loading the document.

6. **loader.load()**: This method call executes the loading process. It retrieves the content from the specified web page, applying the parsing rules defined earlier, and stores the result in the variable `docs`.

It sets up a web scraper to load specific content from a blog post located at the given URL. It efficiently targets only certain parts of the webpage (the post's content, title, and header) for parsing, which is useful for extracting relevant information without unnecessary data. The resulting documents are stored in the `docs` variable for further processing or analysis.

In [None]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

#### INDEXING ####

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()



In [None]:
print(len(docs))
docs[0].page_content[:100]

1


'\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |'

## Splits

In the context of the loaded documents, the RecursiveCharacterTextSplitter is used to break down the content into smaller, manageable chunks. This is often necessary when working with large amounts of text, as it allows for more efficient processing and storage.

The chunk_size parameter determines the maximum length of each text chunk, while the chunk_overlap ensures that there is a certain amount of overlap between consecutive chunks. This overlap can be beneficial for maintaining context and continuity when processing the text chunks individually.

In [None]:
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [None]:
splits[2:6],len(splits)

([Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.'),
  Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates m

In [None]:
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-3.0.1


In [None]:
splits[0]

Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refin

In [None]:
type(splits[0])

# vectorstore:

This usually refers to a vector store or a database that contains document embeddings. Embeddings are dense vector representations of text, capturing the semantic meaning of the documents.

In [None]:
import chromadb.utils.embedding_functions as embedding_functions
from langchain.embeddings import HuggingFaceEmbeddings


# Initialize the Hugging Face Sentence Transformer model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create the vector store
vector_store = Chroma.from_documents(documents=splits, embedding=embeddings)

In [None]:
query = "what is text decomposition?"
docs = vector_store.similarity_search(query)
docs[0].page_content

'Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'

In RAG, the process typically involves two main steps:

- Retrieval: Finding relevant documents or pieces of information from a large dataset or vector store that might be useful in answering a query.
- Generation: Using a language model (like GPT) to generate a response based on the retrieved documents.

The as_retriever() method is a utility provided by some libraries (like langchain) that converts the vector store into a retriever object.
A retriever is responsible for finding the most relevant documents or text snippets in the vector store based on a given query.

In [None]:
from transformers import pipeline

# Assume vectorstore and hub are already defined, retreive 1 relevent document
retriever = vector_store.as_retriever(search_kwargs={"k": 1})

In [None]:
query = "what is task decomposition?"
docs = retriever.get_relevant_documents(query)
docs[0].page_content

'Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.'

### hub Object

  The hub object typically refers to a library or framework that provides access to models, datasets, or templates. In many AI and machine learning contexts, this could be a library like Hugging Face's transformers, or a custom library designed for managing models and templates.

### pull Method
  The pull method is used to retrieve a resource from the hub. This could mean downloading a model, a dataset, or in this case, a prompt template. The pull method indicates that the resource is being fetched from a remote repository or service.
### "rlm/rag-prompt"
  The string "rlm/rag-prompt" specifies the identifier for the resource being pulled. This identifier typically follows a naming convention that includes:
  Namespace: rlm could represent the organization or user that published the resource.

### Resource Name:

  rag-prompt indicates the specific resource being accessed, which in this case is likely a prompt template designed for retrieval-augmented generation tasks.

In [None]:
# from transformers import AutoTokenizer, AutoModelForCausalLM

# # Load DialoGPT model and tokenizer
# tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
# model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")


# # # Prompt
# # prompt_template = hub.pull("rlm/rag-prompt")

# # # LLM
# # llm = pipeline("text2text-generation", model="google/flan-t5-large")

tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
!pip install langchain_groq

Collecting langchain_groq
  Downloading langchain_groq-0.1.9-py3-none-any.whl.metadata (2.9 kB)
Collecting groq<1,>=0.4.1 (from langchain_groq)
  Downloading groq-0.10.0-py3-none-any.whl.metadata (13 kB)
Downloading langchain_groq-0.1.9-py3-none-any.whl (14 kB)
Downloading groq-0.10.0-py3-none-any.whl (106 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.3/106.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq, langchain_groq
Successfully installed groq-0.10.0 langchain_groq-0.1.9


In [None]:
from langchain_groq import ChatGroq
from langchain.retrievers.multi_query import MultiQueryRetriever

# creating multi queries
llm = ChatGroq(model_name='llama-3.1-70b-versatile')

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# RAG

template = """
Answer the following questions base only the provided context.
if you don't know the answer say you don't know.

{context}

question : {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# (RunnablePassthrough()): This indicates that the question input is being passed directly through without any modifications.

chain = ({'context':retriever | format_docs ,'question':RunnablePassthrough()}
         |prompt
         |llm
         |StrOutputParser())

In [None]:
chain.invoke('what is task decomposition?')

'Task decomposition is the process of breaking down a complicated task into smaller and simpler steps, making it more manageable.'