In [1]:
import ast
import os

import sys
import tqdm
import pandas as pd

from datasets import Dataset
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from pathlib import Path

from ragas import EvaluationDataset
from ragas import evaluate
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness
from ragas.run_config import RunConfig

sys.path.insert(0, str(Path().resolve().parent))

from app.services.rag_pipeline.model import HybridRetriever, AnswerGenerator

In [3]:
testdataset = pd.read_csv('~/Work/data/q_insigt/q-insigt-testset.csv', index_col=False)

In [4]:
testdataset.head()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,–ß—Ç–æ –ø—Ä–æ–∏–∑–æ—à–ª–æ —Å –ì–∞–±–µ–Ω–æ–º –≤ –ø–æ—Å–ª–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è?,"['POV ‚Äì –∫–∞–∑—É–∞–ª—å–Ω—ã–π –ì–∞–±–µ–Ω, –∫–æ–≥–¥–∞ –≤—Å–µ –∂–¥—É—Ç –≥–µ–π–º–ø...","–ù–µ–¥–∞–≤–Ω–æ –ì–∞–±–µ–Ω —Å—Ç–∞–ª –∫–∞–∑—É–∞–ª—å–Ω—ã–º, –∫–æ–≥–¥–∞ –≤—Å–µ –∂–¥–∞–ª–∏...",single_hop_specifc_query_synthesizer
1,–ß—Ç–æ —Ç–∞–∫–æ–µ Kafka –∏ –ø–æ—á–µ–º—É —ç—Ç–æ –≤–∞–∂–Ω–æ –¥–ª—è —Ä–∞–±–æ—Ç—ã ...,['#–≤–∞–∫–∞–Ω—Å–∏—è #Middle #DataAnalyst #fulltime #—É–¥...,"Kafka ‚Äî —ç—Ç–æ —Å–∏—Å—Ç–µ–º–∞, –∫–æ—Ç–æ—Ä–∞—è –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è ...",single_hop_specifc_query_synthesizer
2,–ß—Ç–æ —Ç–∞–∫–æ–µ –í–∏—Å–ø –∏ –ø–æ—á–µ–º—É –µ–≥–æ —É–ø–æ–º–∏–Ω–∞—é—Ç –≤–º–µ—Å—Ç–µ —Å...,['–°–Ω–æ–≤–∞ —á—Ç–æ-—Ç–æ –Ω–∞ –∫–≤–∏–Ω–Ω–æ–≤—Å–∫–æ–º üÜó –õ–µ–≥–∫–æ –≥–æ–≤–æ—Ä–∏—Ç—å...,–í–∏—Å–ø —É–ø–æ–º–∏–Ω–∞–µ—Ç—Å—è –≤ –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ –õ–µ—à–µ–≥–æ –∫–∞–∫ –æ–¥–Ω–∞ –∏...,single_hop_specifc_query_synthesizer
3,–ö–∞–∫–æ–π –∫–æ–Ω—Ç—Ä–∞–∫—Ç –≤—ã–∏–≥—Ä–∞–ª–∞ Lockheed Martin –∏ –Ω–∞ –∫...,['Lockheed Martin –≤—ã–∏–≥—Ä–∞–ª–∞ –∫–æ–Ω—Ç—Ä–∞–∫—Ç –Ω–∞ –ø—Ä–æ—Ç–∏–≤–æ...,Lockheed Martin –≤—ã–∏–≥—Ä–∞–ª–∞ –∫–æ–Ω—Ç—Ä–∞–∫—Ç –Ω–∞ –ø—Ä–æ—Ç–∏–≤–æ—Ä–∞...,single_hop_specifc_query_synthesizer
4,–ö—Ç–æ –ø–æ–ª—É—á–∞–µ—Ç –º–µ—Ä—á?,"['–ü—Ä–∏–≤–µ—Ç, –ß–µ–º–ø–∏–æ–Ω—ã!üèÜ\n\n–°–µ–≥–æ–¥–Ω—è –ø—Ä–æ–∏–∑–æ—à–ª–æ –∑–Ω–∞–º...","–ú–µ—Ä—á –ø–æ–ª—É—á–∞–µ—Ç –∫–∞–∂–¥—ã–π, –∫—Ç–æ –ø–æ–ª—É—á–∏–ª —Å–µ—Ä—Ç–∏—Ñ–∏–∫–∞—Ç –∏...",single_hop_specifc_query_synthesizer


In [5]:
testdataset['text'] = testdataset.reference_contexts.apply(lambda x: ast.literal_eval(x)[0])

In [6]:
data_path = "~/Work/data/q_insigt/telegram_posts.csv"
df = pd.read_csv(data_path)
df = df.dropna(subset=['text'])

In [7]:
df.head()

Unnamed: 0,date,name,text,views,comments,forwards,emoji,reactions,subscribers
0,2024-06-27 14:59:34,üèÑ –°–æ—Ä–µ–≤–Ω–æ–≤–∞—Ç–µ–ª—å–Ω—ã–π Data Science | Kaggle | –ß–µ–º...,"**üë®‚Äçüíª**** –ê–ø–¥–µ–π—Ç –ø—Ä–æ Kaggle Camp, –∫–æ—Ç–æ—Ä—ã–π –≤—ã –∂...",755.0,2,3,"üëç: 14, ‚ö°: 7, ‚ù§‚Äçüî•: 3",24,2672
1,2024-06-25 15:33:36,üèÑ –°–æ—Ä–µ–≤–Ω–æ–≤–∞—Ç–µ–ª—å–Ω—ã–π Data Science | Kaggle | –ß–µ–º...,üë®‚Äçüíª **–ö–æ–Ω—Ç–µ–Ω—Ç –∏–∑ —Å–æ–æ–±—â–µ—Å—Ç–≤–∞**! \n**\n****üèÑ****...,877.0,3,19,"üëç: 16, Custom emoji: 7",23,2672
2,2024-06-24 12:06:49,üèÑ –°–æ—Ä–µ–≤–Ω–æ–≤–∞—Ç–µ–ª—å–Ω—ã–π Data Science | Kaggle | –ß–µ–º...,üèÜ [**–ó–∞–ø–∏—Å—å —Å—Ç—Ä–∏–º–∞**](https://youtu.be/kz__54Y...,1075.0,0,11,"‚ù§‚Äçüî•: 10, Custom emoji: 2",12,2672
3,2024-06-21 14:10:38,üèÑ –°–æ—Ä–µ–≤–Ω–æ–≤–∞—Ç–µ–ª—å–Ω—ã–π Data Science | Kaggle | –ß–µ–º...,–°—Ç—Ä–∏–º —Å–ª—É—á–∏–ª—Å—è! –í—Å–µ—Ö –ø—Ä–∏—á–∞—Å—Ç–Ω—ã—Ö —Ä–∞–¥—ã –±—ã–ª–∏ —É–≤–∏–¥...,1348.0,9,2,‚ù§‚Äçüî•: 4,4,2672
4,2024-06-21 12:45:52,üèÑ –°–æ—Ä–µ–≤–Ω–æ–≤–∞—Ç–µ–ª—å–Ω—ã–π Data Science | Kaggle | –ß–µ–º...,"–ü—Ä–∏–≤–µ—Ç, —Ä–∞–±–æ—Ç—è–≥–∏! üë®‚Äçüíª\n–ö–∞–∫ –Ω–µ–¥–µ–ª—è –ø—Ä–æ—à–ª–∞? –ï—Å—Ç—å...",1348.0,4,2,‚ù§‚Äçüî•: 10,10,2672


In [8]:
testdataset = testdataset.merge(df[['text', 'name']], on='text')

In [9]:
testdataset.head()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name,text,name
0,–ß—Ç–æ –ø—Ä–æ–∏–∑–æ—à–ª–æ —Å –ì–∞–±–µ–Ω–æ–º –≤ –ø–æ—Å–ª–µ–¥–Ω–µ–µ –≤—Ä–µ–º—è?,"['POV ‚Äì –∫–∞–∑—É–∞–ª—å–Ω—ã–π –ì–∞–±–µ–Ω, –∫–æ–≥–¥–∞ –≤—Å–µ –∂–¥—É—Ç –≥–µ–π–º–ø...","–ù–µ–¥–∞–≤–Ω–æ –ì–∞–±–µ–Ω —Å—Ç–∞–ª –∫–∞–∑—É–∞–ª—å–Ω—ã–º, –∫–æ–≥–¥–∞ –≤—Å–µ –∂–¥–∞–ª–∏...",single_hop_specifc_query_synthesizer,"POV ‚Äì –∫–∞–∑—É–∞–ª—å–Ω—ã–π –ì–∞–±–µ–Ω, –∫–æ–≥–¥–∞ –≤—Å–µ –∂–¥—É—Ç –≥–µ–π–º–ø–ª–µ...",BetBoom Esports Dota 2
1,–ß—Ç–æ —Ç–∞–∫–æ–µ Kafka –∏ –ø–æ—á–µ–º—É —ç—Ç–æ –≤–∞–∂–Ω–æ –¥–ª—è —Ä–∞–±–æ—Ç—ã ...,['#–≤–∞–∫–∞–Ω—Å–∏—è #Middle #DataAnalyst #fulltime #—É–¥...,"Kafka ‚Äî —ç—Ç–æ —Å–∏—Å—Ç–µ–º–∞, –∫–æ—Ç–æ—Ä–∞—è –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –¥–ª—è ...",single_hop_specifc_query_synthesizer,#–≤–∞–∫–∞–Ω—Å–∏—è #Middle #DataAnalyst #fulltime #—É–¥–∞–ª...,Data Science Jobs
2,–ß—Ç–æ —Ç–∞–∫–æ–µ –í–∏—Å–ø –∏ –ø–æ—á–µ–º—É –µ–≥–æ —É–ø–æ–º–∏–Ω–∞—é—Ç –≤–º–µ—Å—Ç–µ —Å...,['–°–Ω–æ–≤–∞ —á—Ç–æ-—Ç–æ –Ω–∞ –∫–≤–∏–Ω–Ω–æ–≤—Å–∫–æ–º üÜó –õ–µ–≥–∫–æ –≥–æ–≤–æ—Ä–∏—Ç—å...,–í–∏—Å–ø —É–ø–æ–º–∏–Ω–∞–µ—Ç—Å—è –≤ –∫–æ–Ω—Ç–µ–∫—Å—Ç–µ –õ–µ—à–µ–≥–æ –∫–∞–∫ –æ–¥–Ω–∞ –∏...,single_hop_specifc_query_synthesizer,"–°–Ω–æ–≤–∞ —á—Ç–æ-—Ç–æ –Ω–∞ –∫–≤–∏–Ω–Ω–æ–≤—Å–∫–æ–º üÜó –õ–µ–≥–∫–æ –≥–æ–≤–æ—Ä–∏—Ç—å, ...",BetBoom Esports Dota 2
3,–ö–∞–∫–æ–π –∫–æ–Ω—Ç—Ä–∞–∫—Ç –≤—ã–∏–≥—Ä–∞–ª–∞ Lockheed Martin –∏ –Ω–∞ –∫...,['Lockheed Martin –≤—ã–∏–≥—Ä–∞–ª–∞ –∫–æ–Ω—Ç—Ä–∞–∫—Ç –Ω–∞ –ø—Ä–æ—Ç–∏–≤–æ...,Lockheed Martin –≤—ã–∏–≥—Ä–∞–ª–∞ –∫–æ–Ω—Ç—Ä–∞–∫—Ç –Ω–∞ –ø—Ä–æ—Ç–∏–≤–æ—Ä–∞...,single_hop_specifc_query_synthesizer,Lockheed Martin –≤—ã–∏–≥—Ä–∞–ª–∞ –∫–æ–Ω—Ç—Ä–∞–∫—Ç –Ω–∞ –ø—Ä–æ—Ç–∏–≤–æ—Ä–∞...,"–ë–æ–ª—å—à–µ, —á–µ–º —ç–∫–æ–Ω–æ–º–∏–∫–∞"
4,–ö—Ç–æ –ø–æ–ª—É—á–∞–µ—Ç –º–µ—Ä—á?,"['–ü—Ä–∏–≤–µ—Ç, –ß–µ–º–ø–∏–æ–Ω—ã!üèÜ\n\n–°–µ–≥–æ–¥–Ω—è –ø—Ä–æ–∏–∑–æ—à–ª–æ –∑–Ω–∞–º...","–ú–µ—Ä—á –ø–æ–ª—É—á–∞–µ—Ç –∫–∞–∂–¥—ã–π, –∫—Ç–æ –ø–æ–ª—É—á–∏–ª —Å–µ—Ä—Ç–∏—Ñ–∏–∫–∞—Ç –∏...",single_hop_specifc_query_synthesizer,"–ü—Ä–∏–≤–µ—Ç, –ß–µ–º–ø–∏–æ–Ω—ã!üèÜ\n\n–°–µ–≥–æ–¥–Ω—è –ø—Ä–æ–∏–∑–æ—à–ª–æ –∑–Ω–∞–º–µ–Ω...",üèÑ –°–æ—Ä–µ–≤–Ω–æ–≤–∞—Ç–µ–ª—å–Ω—ã–π Data Science | Kaggle | –ß–µ–º...


In [13]:
embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base-v2")
retriever = HybridRetriever(
    uri="http://localhost:54637",
    collection_name="main_e5",
    dense_embedding_function=embeddings
)
retriever.build_collection(recreation=False)

–ö–æ–ª–ª–µ–∫—Ü–∏—è —É–∂–µ –µ—Å—Ç—å –∏ –Ω–µ –ø–µ—Ä–µ—Å–æ–∑–¥–∞—ë—Ç—Å—è


'Collection already exists'

In [12]:
embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base-v2")
retriever = HybridRetriever(
    uri="http://localhost:54637",
    collection_name="main_e5",
    dense_embedding_function=embeddings
)
retriever.build_collection(recreation=True)

for index, row in tqdm.tqdm(df.iterrows(), total=df.shape[0]):
    retriever.insert_data(
        {"message": row['text'], 'chat_name': row['name'], 'chat_message_id': index}
    )

–°–æ–∑–¥–∞–ª–∏ –Ω–æ–≤—É—é –∫–æ–ª–ª–µ–∫—Ü–∏—é  –≤ Milvus


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4508/4508 [06:33<00:00, 11.45it/s]


In [14]:
generator = AnswerGenerator(retriever)
retriever_params = {
    'k': 20,
    'mode': 'hybrid',
    'k_rerank': 10,
}

dataset = []
for index, row in tqdm.tqdm(testdataset.iterrows(), total=testdataset.shape[0]):
    answer, context = await generator.generate_answer(
        row['user_input'],
        chat_names=[row['name']],
        retriever_params=retriever_params
    )

    dataset.append(
        {
            "user_input": row['user_input'],
            "retrieved_contexts": context,
            "response": answer,
            "reference": row['reference']
        }
    )

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [05:23<00:00,  3.24s/it]


In [15]:
eval_dataset = EvaluationDataset.from_list(dataset)

In [16]:
llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))

In [17]:
metrics=[
    LLMContextRecall(), 
    Faithfulness(), 
    FactualCorrectness(),
]

In [18]:
evaluator_llm = LangchainLLMWrapper(llm)

result = evaluate(
    dataset=eval_dataset, 
    llm=llm,
    embeddings=embeddings,
    metrics=metrics,
    run_config=RunConfig(
        timeout=180,
        max_retries=10,
        max_wait = 180,
        max_workers= 1, 
    ),
)

Evaluating:   0%|          | 0/300 [00:00<?, ?it/s]

Exception raised in Job[14]: TimeoutError()
Exception raised in Job[67]: TimeoutError()
Exception raised in Job[190]: TimeoutError()
Exception raised in Job[213]: TimeoutError()
Exception raised in Job[241]: TimeoutError()
Exception raised in Job[244]: TimeoutError()
Exception raised in Job[283]: APIConnectionError(Connection error.)
Exception raised in Job[289]: APIConnectionError(Connection error.)


In [19]:
result

{'context_recall': 0.9512, 'faithfulness': 0.9178, 'factual_correctness': 0.6451}

### –†–µ–∑—É–ª—å—Ç–∞—Ç—ã –≤–∞–ª–∏–¥–∞—Ü–∏–∏

| Embedding model  | Ranker                              | Context Recall | Faithfulness | Factual Correctness |
|------------------|-------------------------------------|----------------|--------------|---------------------|
| all-MiniLM-L6-v2 | Weighted (k=20, weights=[0.5, 0.5]) | 0.9456         | 0.9011       | 0.6360              |
| all-MiniLM-L6-v2 | RRF (k=20, k_rerank=10)             | 0.9400         | 0.9173       | 0.6091              |
| e5-base-v2       | Weighted (k=20, weights=[0.5, 0.5]) | 0.9596         | 0.9200       | 0.6236              |
| e5-base-v2       | RRF (k=20, k_rerank=10)             | 0.9512         | 0.9178       | 0.6451              |