In [1]:
#@title Usepackages

# standard
import os
import sys

from typing import Any

# langchain and rag
from langchain import PromptTemplate
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import GPT4AllEmbeddings
from langchain.llms import Ollama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [2]:
#@title Utils

# class: supress stdout
class SuppressStdout:
    def __enter__(self):
        self._original_stdout = sys.stdout
        self._original_stderr = sys.stderr
        sys.stdout = open(os.devnull, 'w')
        sys.stderr = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout
        sys.stderr = self._original_stderr

# function: load pdf paper
def load_pdf(file_path: str) -> Any:
    loader = PyPDFLoader(file_path)
    pages = loader.load_and_split()
    return pages

# function: create vectorized database
def create_vector_store(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)
    
    embeddings = GPT4AllEmbeddings()
    
    with SuppressStdout():
        vector_store = Chroma.from_documents(texts, embeddings)
    return vector_store




In [3]:
#@title Config

MODEL = "mistral-nemo"

In [4]:
#@title Main

# you can use a local file path or a URL
pdf_path = "https://arxiv.org/pdf/2405.14438"   

# load the pdf
documents = load_pdf(pdf_path)

# create the vector store
vectorstore = create_vector_store(documents)

# qa while loop
while True:
    query = input("\nQuery: ")
    if query == "exit":
        break
    if query.strip() == "":
        continue

    # define the prompt
    template = """Use the following pieces of context to answer the question at the end.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    {context}
    Question: {question}
    Helpful Answer:"""

    # define the qa chain prompt
    QA_CHAIN_PROMPT = PromptTemplate(
        input_variables=["context", "question"],
        template=template,
    )

    # define LLM
    llm = Ollama(model=MODEL, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))

    # define the qa chain
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    )

    result = qa_chain({"query": query})



Found model file at  /Users/cmoyacal/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin


objc[44830]: Class GGMLMetalClass is implemented in both /Users/cmoyacal/miniforge3/envs/agents/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x126e34208) and /Users/cmoyacal/miniforge3/envs/agents/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x127260208). One of the two will be used. Which one is undefined.
  warn_deprecated(


Based on the provided context, here are the key contributions of the paper:

1. **LoRA-Ensemble**: The authors introduce LoRA-Ensemble, a method aimed at creating more efficient ensemble models for large language models (LLMs). This is done by freezing most of the LLM weights and only training a small number of low-rank matrices.
2. **Efficient Epistemic Uncertainty Quantification**: The paper focuses on efficiently quantifying epistemic uncertainty in large machine learning models, which has been traditionally challenging due to intractable analytical computation.
3. **Green AI Initiative**: The authors contribute to the concept of "Green AI" by aiming to reduce the computational resources and environmental impact associated with large LLMs. This is achieved through their efficient ensemble method that reduces training costs.
4. **Comparison with Previous Methods**: The paper compares LoRA-Ensemble with previous methods, such as those proposed by Graves (2011), Blundell et al. (2015),