First install and import all requirements. This will also boot the LLM. The output contains errors relating to ollama, which can safely be ignored.

In [None]:
# Install requirements
!rm /content/chroma.sqlite3*
!wget https://github.com/eur-nl/bongaerts-10k-rag/raw/refs/heads/main/chroma.sqlite3
!pip install langchain-community
!pip install langchain-chroma
!pip install langchain-huggingface
!pip install langchain-ollama

# Import requirements
import os
import subprocess

from chromadb.config import Settings
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import ChatOllama

# Get the database and embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Install and run ollama
!curl https://ollama.ai/install.sh | sh
!nohup ollama serve > ollama.log 2>&1 &
!ollama pull llama3.2:1b

Next pick which segment of the vector database you would like to use for RAG, the possible values are 500, 1000, 2500 and optimized. Only the variable with the very explicit needs to be edited in this cell. This cell will also give some errors related to chromadb which can also be ignored.

In [None]:
# Initialize the vector dabase
"""
Pick a segment from the vector database for RAG. Possible values:
"500" - static chunks with 500 characters
"1000" - static chunks with 1000 characters
"2500" - static chunks with 2500 characters
"optimized" - dynamic optimized chunks
"""
db_segment = "optimized"  # EDIT THIS TO CHANGE SEGMENT

if db_segment not in ["500", "1000", "2500", "optimized"]:
    db_segment = "optimized"

vector_db = Chroma(
    persist_directory="/content",
    embedding_function=embeddings,
    collection_name=f"10k_{db_segment}"
)

# Prepare the LLM
llm = ChatOllama(model="llama3.2:1b", temperature=0.8)

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""
    You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(),
    llm,
    prompt=QUERY_PROMPT
)

template = """You are an assistant for question-answering tasks for Retrieval Augmented Generation system for the 10K financial reports.
Use the following pieces of retrieved context to answer the question.
Keep the answer concise.
Question: {question}
Context: {context}
"""

prompt = ChatPromptTemplate.from_template(template)

chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
)

Finally ask your question here. This may take a while. The chromadb error in this cell can also be ignored.

In [None]:
# Ask a question
query = "Who is the CEO of Apple?"
print(chain.invoke(query))