In [1]:
from typing import Dict

from langchain.chains import (create_history_aware_retriever,
                              create_retrieval_chain)
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_ollama import ChatOllama


class SciBot:
    def __init__(self, llm: str) -> None:
        self.store = {}

        self.llm = ChatOllama(model=llm)
        # ===============================================

        ### Contextualize question ###
        self.contextualize_q_system_prompt = (
            "Given a chat history and the latest user question "
            "which might reference context in the chat history, "
            "formulate a standalone question which can be understood "
            "without the chat history. Do NOT answer the question, "
            "just reformulate it if needed and otherwise return it as is."
        )

        ### Answer question ###
        self.system_prompt = (
            "You are an assistant for question-answering tasks. "
            "Use the following pieces of retrieved context to answer "
            "the question. If you don't know the answer, say that you "
            "don't know. Use three sentences maximum and keep the "
            "answer concise."
            "\n\n"
            "{context}"
        )

    def ingest(self, db_path: str) -> None:
        """
        Load the database and create the conversational chain.
        """
        model = "hkunlp/instructor-xl"
        kwargs = {"device": "cpu"}
        embeddings = HuggingFaceInstructEmbeddings(
            model_name=model,
            model_kwargs=kwargs,
        )

        db = FAISS.load_local(
            folder_path=db_path,
            index_name="faiss_index",
            embeddings=embeddings,
            allow_dangerous_deserialization=True,
        )

        self.retriever = db.as_retriever(
            search_type="mmr",  # “similarity” (default), “mmr”, or “similarity_score_threshold”
            search_kwargs={"k": 6},
        )

        contextualize_q_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", self.contextualize_q_system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )
        history_aware_retriever = create_history_aware_retriever(
            self.llm, self.retriever, contextualize_q_prompt
        )

        qa_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", self.system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )

        self.question_answer_chain = create_stuff_documents_chain(self.llm, qa_prompt)

        self.rag_chain = create_retrieval_chain(
            history_aware_retriever, self.question_answer_chain
        )

        self.conversational_rag_chain = RunnableWithMessageHistory(
            self.rag_chain,
            self.get_session_history,
            input_messages_key="input",
            history_messages_key="chat_history",
            output_messages_key="answer",
        )

    def get_session_history(self, session_id: str) -> ChatMessageHistory:
        """
        Get the chat history for a given session ID.
        """
        if session_id not in self.store:
            self.store[session_id] = ChatMessageHistory()
        return self.store[session_id]

    def ask(self, query: str, session_id: str = "abc123") -> Dict[str, str]:
        """
        Ask a question and get a response.
        """
        response = self.conversational_rag_chain.invoke(
            {"input": query},
            config={
                "configurable": {"session_id": session_id},
            },
        )
        return response

In [2]:
with open("../../data/llm_eval/questions.txt", "r") as f:
    questions = f.readlines()

In [3]:
llms = ["qwen2.5:3b", "qwen2.5:7b-instruct-q4_0", "llama3.1:latest", "llama3.2:latest"]
db_path = "../../data/dbs/db_instructor"

In [4]:
import time
import pandas as pd
from tqdm import tqdm

In [5]:
results = []
for llm in llms:
    print(llm)
    chat = SciBot(llm=llm)
    chat.ingest(db_path)
    sample_ans = chat.ask("What is the capital of France?") # Sample question to eliminate cold start
    
    for i, question in enumerate(tqdm(questions)):
        start = time.time()
        response = chat.ask(question, session_id=str(i))
        ex_time = time.time() - start
        context = "".join(f"Document {i+1}: \n {doc.page_content} \n\n" for i, doc in enumerate(response["context"]))
        results.append([llm, question, context, response["answer"], ex_time])

qwen2.5:3b


  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer
max_seq_length  512


  model.load_state_dict(torch.load(os.path.join(input_path, 'pytorch_model.bin'), map_location=torch.device('cpu')))
100%|██████████| 60/60 [04:04<00:00,  4.07s/it]


qwen2.5:7b-instruct-q4_0
load INSTRUCTOR_Transformer
max_seq_length  512


  model.load_state_dict(torch.load(os.path.join(input_path, 'pytorch_model.bin'), map_location=torch.device('cpu')))
100%|██████████| 60/60 [06:31<00:00,  6.52s/it]


llama3.1:latest
load INSTRUCTOR_Transformer
max_seq_length  512


  model.load_state_dict(torch.load(os.path.join(input_path, 'pytorch_model.bin'), map_location=torch.device('cpu')))
100%|██████████| 60/60 [05:37<00:00,  5.63s/it]


llama3.2:latest
load INSTRUCTOR_Transformer
max_seq_length  512


  model.load_state_dict(torch.load(os.path.join(input_path, 'pytorch_model.bin'), map_location=torch.device('cpu')))
100%|██████████| 60/60 [03:36<00:00,  3.61s/it]


In [8]:
df = pd.DataFrame(results, columns=["llm", "question", "context", "answer", "ex_time"])
df

Unnamed: 0,llm,question,context,answer,ex_time
0,qwen2.5:3b,"What does the term ""learn to optimize"" mean?\n","Document 1: \n NatlSciRev ,2024,Vol.11,nwae132...","The term ""learn to optimize"" (L2O) refers to a...",2.757843
1,qwen2.5:3b,Please give some examples of metaheuristics.\n,Document 1: \n usually provide only sub-optima...,Metaheuristics are high-level methodologies or...,2.751359
2,qwen2.5:3b,"What is the ""no free lunch"" theorem about?\n",Document 1: \n IEEE TRANSACTIONS ON EVOLUTIONA...,"The ""No Free Lunch"" (NFL) theorem states that ...",2.572277
3,qwen2.5:3b,What is the concept behind Parallel Algorithm ...,Document 1: \n training set as well as for con...,The concept of a Parallel Algorithm Portfolio ...,2.925742
4,qwen2.5:3b,Please provide some approaches to how Parallel...,"Document 1: \n algorithms, and thereby combine...","To construct parallel algorithm portfolios, ef...",3.219646
...,...,...,...,...,...
235,llama3.2:latest,How can EBMs help detect observations poorly i...,Document 1: \n concatenation of the two curves...,EBMs (Explainable Boosting Machines) can help ...,4.825688
236,llama3.2:latest,How can one distinct terms intepretabilitry an...,Document 1: \n our approach shows better resul...,"The terms ""interpretability"" and ""explainabili...",4.310438
237,llama3.2:latest,What issues in machine learning can be categor...,Document 1: \n 2.1 Characterizing Model Bugs.\...,"According to the text, model bugs are categori...",2.143233
238,llama3.2:latest,What XAI techniques can be heloful in detectin...,Document 1: \n query is generally much smaller...,"Based on the provided context, two XAI (Explai...",4.372954


In [9]:
df.to_csv("../../data/llm_eval/real_outputs.csv", index=False)