In [29]:
from langchain.document_loaders import UnstructuredHTMLLoader as HTMLLoader
from glob import glob
from tqdm.auto import tqdm

In [30]:
def create_loaders():
    files = glob("pl-docs/*.html")
    return [HTMLLoader(file) for file in files]

In [31]:
def create_documents():
    documents = []
    ids = []
    files = glob("pl-docs/*.html")
    for file in tqdm(files):
        loader = HTMLLoader(file)
        document = loader.load()
        documents.extend(document)
        ids.append(file)
    return documents, ids

In [32]:
documents, ids = create_documents()

  0%|          | 0/8 [00:00<?, ?it/s]

In [None]:
# model_name = "google/flan-t5-small"
model_name = "sentence-transformers/all-mpnet-base-v2"

In [33]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name=model_name)
db = Chroma.from_documents(documents, embeddings, ids=ids)

Using embedded DuckDB without persistence: data will be transient


In [34]:
query = "mixed precision"
result = db.similarity_search(query)
result

[Document(page_content='N-Bit Precision (Basic)\n\nAudience: Users looking to train models faster and consume less memory.\n\nIf you’re looking to run models faster or consume less memory, consider tweaking the precision settings of your models.\n\nLower precision, such as 16-bit floating-point, requires less memory and enables training and deploying larger models.\nHigher precision, such as the 64-bit floating-point, can be used for highly sensitive use-cases.\n\n16-bit Precision\n\nUse 16-bit mixed precision to lower your memory consumption by up to half so that you can train and deploy larger models. If your GPUs are [Tensor Core] GPUs, you can also get a ~3x speed improvement. Half precision can sometimes lead to unstable training.\n\nTrainer\n\nprecision\n\n\'16-mixed\'\n\n32-bit Precision\n\n32-bit precision is the default used across all models and research. This precision is known to be stable in contrast to lower precision settings.\n\nTrainer\n\nprecision\n\n"32-true"\n\n# or

In [35]:
retriever = db.as_retriever()

In [36]:
retriever.get_relevant_documents("mixed precision")

[Document(page_content='N-Bit Precision (Basic)\n\nAudience: Users looking to train models faster and consume less memory.\n\nIf you’re looking to run models faster or consume less memory, consider tweaking the precision settings of your models.\n\nLower precision, such as 16-bit floating-point, requires less memory and enables training and deploying larger models.\nHigher precision, such as the 64-bit floating-point, can be used for highly sensitive use-cases.\n\n16-bit Precision\n\nUse 16-bit mixed precision to lower your memory consumption by up to half so that you can train and deploy larger models. If your GPUs are [Tensor Core] GPUs, you can also get a ~3x speed improvement. Half precision can sometimes lead to unstable training.\n\nTrainer\n\nprecision\n\n\'16-mixed\'\n\n32-bit Precision\n\n32-bit precision is the default used across all models and research. This precision is known to be stable in contrast to lower precision settings.\n\nTrainer\n\nprecision\n\n"32-true"\n\n# or