In [9]:
pip install langchain lxml chromadb sentence-transformers ctransformers

Looking in indexes: https://cbernecker%40munichre.com:****@mrartiemea.jfrog.io/artifactory/api/pypi/dragonfly-pypi-virtual/simple
Collecting ctransformers
  Using cached https://mrartiemea.jfrog.io/artifactory/api/pypi/dragonfly-pypi-virtual/packages/packages/14/50/0b608e2abee4fc695b4e7ff5f569f5d32faf84a49e322034716fa157d1cf/ctransformers-0.2.27-py3-none-any.whl (9.9 MB)
Collecting py-cpuinfo<10.0.0,>=9.0.0
  Using cached https://mrartiemea.jfrog.io/artifactory/api/pypi/dragonfly-pypi-virtual/packages/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Installing collected packages: py-cpuinfo, ctransformers
Successfully installed ctransformers-0.2.27 py-cpuinfo-9.0.0
Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'C:\Development\medium\.venv\Scripts\python.exe -m pip install --upgrade pip' command.


### Load the dataset
Visit Wikipedia and retrieve the [Wikipedia page for the Porsche 911](https://en.wikipedia.org/wiki/Porsche_911). In this simplified example, we are only loading a single page, but in practice, you have the capability to load multiple pages.

In [2]:
from langchain.text_splitter import HTMLHeaderTextSplitter

file_path= r'C:\Development\Playground\langchain\data\Wikipedia 911\Porsche 911 - Wikipedia.html'
file1 = open(file_path, encoding='utf-8')

headers_to_split_on = [
    ("h1", "Header 1"),
    ("h2", "Header 2"),
    ("h3", "Header 3"),
    ("h4", "Header 4"),
    ("h5", "Header 5"),
]

html_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on, return_each_element=None)
html_header_splits = html_splitter.split_text(file1.read())
print("Loader {} files".format(len(html_header_splits)))

Loader 44 files


### Split documents and save them into a vector database
In the subsequent phase, we divide the HTML page into subsections according to the headers. Feel free to employ alternative criteria for segmentation.

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from chromadb.utils import embedding_functions
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

# Define the Text Splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 384,
    chunk_overlap = 50
)

#Create a split of the document using the text splitter
splits = text_splitter.split_documents(html_header_splits)

### Create and Store embeddings in a vector database.
Next, we save all the data as word embeddings in ChromaDB. The embedding function() utilizes a transformer model loaded from the SBert library. You have the flexibility to experiment with various models. For reference, check the available options in the Pretrained Models section of the Sentence-Transformers documentation at [sbert.net](https://www.sbert.net/).

In [6]:
# Embedd the Splits
default_ef = embedding_functions.DefaultEmbeddingFunction()
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Create the vector store
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding_function, #embedding,
)

print(f'{vectordb._collection.count()} Embeddings are loaded in the Vector Database')

  from .autonotebook import tqdm as notebook_tqdm


345


### Initialize the local LLM model

Load a local LLM to answer the question. This can take a few minutes because the model will be downloaded the first time. 

In [27]:
from langchain.llms import CTransformers
# if you have a graphic card increse attribute gpu_layers
config = {'gpu_layers':0, 'temperature':1.1, "max_new_tokens": 1024, "context_length": 4096}
llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", model_type='llama', config=config)

Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 999.83it/s]
Fetching 1 files: 100%|██████████| 1/1 [00:00<?, ?it/s]


### Let's bring everything together

In [28]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible.  
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(search_kwargs={'k': 3}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)
results = qa_chain.invoke({"query": "Which Porsche has the highest top speed?" })