In [1]:
# import
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter

# load the document and split it into chunks
from langchain_community.document_loaders import WebBaseLoader

# Load blog post
loader = WebBaseLoader("https://easonlai888.medium.com/working-with-table-data-in-documents-tips-and-tricks-for-llm-50f09d2c4e95")
documents = loader.load()

# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
docs

[Document(page_content='Working with Table Data in Documents: Tips and Tricks for LLM | by Eason | MediumOpen in appSign upSign inWriteSign upSign inMember-only storyWorking with Table Data in Documents: Tips and Tricks for LLMEason·Follow4 min read·Oct 28, 2023--2ShareIn the world of language model solutions, RAG (Retrieval-Augmented Generation) stands out as a revolutionary approach that combines retrieval and generation techniques. RAG leverages pre-existing information to enhance the accuracy and relevance of generated responses, making it a highly effective method for natural language processing.I have received inquiries regarding the handling of table data in document files, such as PDFs and Word documents, to make them more comprehensible to the Large Language Model (LLM). This is a common issue, as the document pre-processing treats all text in tables as unstructured text, which makes it challenging for LLM to comprehend the data’s meaning or structure. Consequently, LLM’s resu

In [4]:
# query it
query = "Tree of Thoughts (Yao et al. 2023) extends"
docs = db.similarity_search(query)

# print results
print(docs[0].page_content)

Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.
Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.
T

In [5]:
retriever = db.as_retriever(search_type="mmr")

In [9]:
res = retriever.get_relevant_documents(query)[0]

page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) 