In [1]:
import bs4, tiktoken, numpy as np, os

from langchain import hub
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings

from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader

from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

from dotenv import load_dotenv
load_dotenv()

learn_api_key = os.environ['LANGCHAIN_API_KEY']
openai_api_key = os.environ['OPENAI_API_KEY']
groq_api_key = os.environ['GROQ_API_KEY']

In [2]:
emb = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
print(emb.embed_query("Hello World"))

[-0.03447727486491203, 0.03102317824959755, 0.006734970025718212, 0.026108985766768456, -0.03936202451586723, -0.16030244529247284, 0.06692401319742203, -0.006441489793360233, -0.0474504791200161, 0.014758856035768986, 0.07087527960538864, 0.05552763119339943, 0.019193334504961967, -0.026251312345266342, -0.01010954286903143, -0.02694045566022396, 0.022307461127638817, -0.022226648405194283, -0.14969263970851898, -0.017493007704615593, 0.00767625542357564, 0.05435224249958992, 0.0032543970737606287, 0.031725890934467316, -0.0846213847398758, -0.02940601296722889, 0.05159561336040497, 0.04812406003475189, -0.0033148222137242556, -0.058279167860746384, 0.04196927323937416, 0.022210685536265373, 0.1281888335943222, -0.022338971495628357, -0.011656315997242928, 0.06292839348316193, -0.032876335084438324, -0.09122604131698608, -0.031175347045063972, 0.0526994913816452, 0.04703482985496521, -0.08420311659574509, -0.030056199058890343, -0.02074483036994934, 0.009517835453152657, -0.0037217906

## Part 1: Overview

In [3]:
### INDEXING ###

# Load Documents
loader = WebBaseLoader(
    web_paths=['https://lilianweng.github.io/posts/2023-06-23-agent/'],
    bs_kwargs=dict(
        parse_only=bs4.filter.SoupStrainer(
            class_=('post-content', 'post-title', 'post-header')
        )
    ),
)
docs = loader.load()

In [4]:
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

for i, doc in enumerate(splits):
    if not doc.page_content.strip():
        print(f"Empty chunk at index {i} : {doc.metadata}")

In [5]:
vectorstore = Chroma.from_documents(
    [doc for doc in splits if doc.page_content.strip()],
    HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
)
retriever = vectorstore.as_retriever()

In [6]:
### RETRIEVAL and GENERATION ###

# Prompt
prompt = hub.pull('rlm/rag-prompt')

# LLM
# llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0)
llm = ChatGroq(
    model='llama-3.1-8b-instant',
    api_key=groq_api_key,
    temperature=0.0,
    max_retries=2,
)

# Post-processing
def format_docs(docs):
    return '\n\n'.join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {'context': retriever | format_docs, 'question': RunnablePassthrough()} | prompt | llm | StrOutputParser()
)

# Question
rag_chain.invoke('What is Task Decomposition?')

'Task Decomposition is the process of breaking down a complex task into smaller, manageable subtasks. It can be achieved through various methods, including simple prompting, task-specific instructions, and human inputs. This process helps agents plan ahead and understand the steps involved in completing a task.'

## Part 2: Indexing

In [7]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a dog."

In [8]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(question, 'cl100k_base')

8

In [9]:
embed = HuggingFaceEmbeddings()
query_result = embed.embed_query(question)
document_result = embed.embed_query(document)
len(query_result)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

768

In [10]:
def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

similarity = cosine_similarity(query_result, document_result)
print("Cosine Similarity:", similarity)

Cosine Similarity: 0.5686721731463987


In [11]:
### INDEXING ###

# Load blog
loader = WebBaseLoader(
    web_paths=['https://lilianweng.github.io/posts/2023-06-23-agent/'],
    bs_kwargs=dict(
        parse_only=bs4.filter.SoupStrainer(
            class_=('post-content', 'post-title', 'post-header')
        )
    ),
)
blog_docs = loader.load()

In [12]:
# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap=50)
splits = text_splitter.split_documents(blog_docs)

vectorstore = Chroma.from_documents(
    splits,
    HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
)
retriever = vectorstore.as_retriever()

## Part 3: Retrieval

In [13]:
retriever = vectorstore.as_retriever(search_kwargs={'k':1})

In [14]:
docs = retriever.get_relevant_documents("What is Task Decomposition?")

  docs = retriever.get_relevant_documents("What is Task Decomposition?")


In [15]:
len(docs)

1

## Part 4: Generation

In [16]:
# Prompts
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [17]:
# LLM
llm = ChatGroq(model='llama-3.1-8b-instant', temperature=0)

# Chain
chain = prompt | llm

# Run
chain.invoke({'context':docs, "question":"What is Task Decomposition?"})

AIMessage(content='Task Decomposition is the process of breaking down a complicated task into smaller and simpler steps that an agent can plan ahead and manage. It involves transforming big tasks into multiple manageable tasks, allowing for a clearer understanding of the thinking process involved.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 48, 'prompt_tokens': 343, 'total_tokens': 391, 'completion_time': 0.09256295, 'prompt_time': 0.028858977, 'queue_time': 0.431566889, 'total_time': 0.121421927}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_46fc01befd', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--2f983bf4-a31a-43cc-a3c9-5e4a09b21846-0', usage_metadata={'input_tokens': 343, 'output_tokens': 48, 'total_tokens': 391})

In [18]:
prompt_hub_rag = hub.pull("rlm/rag-prompt")
prompt_hub_rag

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [19]:
rag_chain = (
    {"context":retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is the process of breaking down a complicated task into smaller and simpler steps that an agent can plan ahead and manage. It involves transforming big tasks into multiple manageable tasks, allowing for a clearer understanding of the thinking process involved.'