In [None]:
# Install required libraries
!pip install -q langchain langchain-community

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from langchain_community.document_loaders import WebBaseLoader, TextLoader
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
    CharacterTextSplitter,
    TokenTextSplitter,
    SentenceTransformersTokenTextSplitter
)
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import os


# Load from Web
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()

# Recursive Chunking (preserves structure better)
recursive_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks_recursive = recursive_splitter.split_documents(docs)

# Character-based Chunking
char_splitter = CharacterTextSplitter(separator="\n", chunk_size=500, chunk_overlap=50)
chunks_char = char_splitter.split_documents(docs)

# Token-based Chunking (with sentence-transformers tokenizer)
token_splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=20, tokens_per_chunk=128)
chunks_token = token_splitter.split_documents(docs)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import torch
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize

# -------------------------------
# 1. Load and Split Document into Sentences
# -------------------------------
# Load from Web
url = "https://lilianweng.github.io/posts/2023-06-23-agent/"
loader = WebBaseLoader(url)
docs = loader.load()

# Convert to raw text
text = " ".join([doc.page_content for doc in docs])
sentences = sent_tokenize(text)

# -------------------------------
# 2. Load BERT Model for Sentence Embedding
# -------------------------------
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModel.from_pretrained('bert-base-uncased')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

def get_sentence_embedding(sentence):
    inputs = tokenizer(sentence, return_tensors='pt', truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).cpu()

# -------------------------------
# 3. Generate Embeddings for Sentences
# -------------------------------
embeddings = [get_sentence_embedding(sent).squeeze().numpy() for sent in sentences]

# -------------------------------
# 4. Contextual Chunking by Similarity Threshold
# -------------------------------
chunks = []
current_chunk = [sentences[0]]
threshold = 0.7  # Lower means more aggressive chunking

for i in range(1, len(sentences)):
    sim = cosine_similarity([embeddings[i]], [embeddings[i - 1]])[0][0]
    if sim < threshold:
        chunks.append(" ".join(current_chunk))
        current_chunk = [sentences[i]]
    else:
        current_chunk.append(sentences[i])

# Add the last chunk
if current_chunk:
    chunks.append(" ".join(current_chunk))

# -------------------------------
# 5. Output the Chunks
# -------------------------------
print("Total Sentences: ", len(sentences))
print("Total Chunks Created: ", len(chunks))

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


Total Sentences:  426
Total Sentences:  426
Total Chunks Created:  215


In [None]:
import torch
from sentence_transformers import SentenceTransformer, util

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()

# Split the document into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(docs)

# Extract only the text from chunks
chunk_texts = [doc.page_content for doc in chunks]

# Load BGE-small model
model = SentenceTransformer("BAAI/bge-small-en-v1.5")

# Create a list of embeddings
chunk_embeddings = model.encode(chunk_texts, convert_to_tensor=True)

In [None]:
len(chunk_texts)

134

In [None]:
def retrieve_relevant_chunks(query, top_k=3):
    # Encode the query
    query_embedding = model.encode(query, convert_to_tensor=True)

    # Compute cosine similarities
    similarities = util.cos_sim(query_embedding, chunk_embeddings)[0]

    # Get top-k similar chunks
    top_results = torch.topk(similarities, k=top_k)

    print(f"\nTop {top_k} Chunks for Query: '{query}'")
    for i, idx in enumerate(top_results.indices):
        print(f"\n--- Chunk {i+1} (Score: {top_results.values[i]:.4f}) ---\n")
        print(chunk_texts[idx])


In [None]:
retrieve_relevant_chunks("What is task decomposition?")


Top 3 Chunks for Query: 'What is task decomposition?'

--- Chunk 1 (Score: 0.8078) ---

Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.

--- Chunk 2 (Score: 0.7579) ---

Planning

Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.
Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.


Memory

--- Chunk 3 (Score: 0.7457) ---

Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time com