# Import Library

In [1]:
import pandas as pd
import sys
import os
from dotenv import load_dotenv
from datasets import Dataset
import json
import time

# RAG Evaluation (RAGAS)
from ragas.metrics import faithfulness, context_precision
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

# LLM
from langchain_ollama.llms import OllamaLLM
from langchain_ollama import OllamaEmbeddings

# Pipeline
pipeline_path = os.path.abspath('../../pipeline')
if pipeline_path not in sys.path:
    sys.path.append(pipeline_path)
from retrieval_generation_pipeline import hypothesis_pipeline, retrieval_pipeline, generation_pipeline

# Load Dataset

In [2]:
def load_evaluation_dataset(file_path):
    """
    Args:
        file_path (str): Path to the evaluation dataset CSV file.
    
    Returns:
        evaluation_dataset (pd.DataFrame): DataFrame containing the evaluation dataset.
    """

    # Mengambil data dari file CSV
    evaluation_dataset = pd.read_csv(file_path)

    return evaluation_dataset

# Generate Answer Dataset

In [3]:
def generate_answer_dataset(url, api_key, collection_name, embedding_model_name, llm_name, eval_dataset, hyde=False):
    '''
    Args:
        url (str): URL of the Qdrant server
        api_key (str): API key for Qdrant
        collection_name (str): Name of the collection to search for chunks
        embedding_model_name (str): Name of the model to be used for embeddings
        query (str): Query text
        topic (str): Topic to filter the chunks

    Returns:

    '''
    eval_dict = {
        "question": [],
        "ground_truth": [],
        "retrieved_contexts": [],
        "answer": []
    }

    len_dataset = len(eval_dataset)

    for index, row in eval_dataset.iterrows():
        # Print the progress
        print(f"Evaluating row {index + 1}/{len_dataset}", flush=True, end="\r")
        eval_dict["question"].append(row["question"])
        eval_dict["ground_truth"].append(row["ground_truth"])
        query = row["question"]
        topic = row["topic"]
        hyde_response = None
        if hyde:
            hypothesis_pipeline(query, topic)
        retrieved_documents = retrieval_pipeline(url, api_key, collection_name, embedding_model_name, query, topic, hyde_response)
        eval_dict["retrieved_contexts"].append(retrieved_documents)

        llm_answer = generation_pipeline(retrieved_documents, query, llm_name)
        eval_dict["answer"].append(llm_answer)
    
    print(f"Evaluation finished.")

    return eval_dict

# Running Test Case

In [5]:
def main():
    load_dotenv()
    # Load RAG_config.json
    with open('../../../config/eval_configs_demo.json') as f:
        configs = json.load(f)
    
    # Load dataset untuk evaluasi
    eval_dataset = load_evaluation_dataset('../../../data/evaluation_dataset_demo.csv')

    # Mengambil URL dan API key untuk terhubung ke Qdrant
    url = os.getenv("QDRANT_URL")
    api_key = os.getenv("QDRANT_API_KEY")

    evaluation_duration = {
        "config": [],
        "duration": []
    }

    for config in configs:
        config_name = config["name"]
        collection_name = config["collection"]
        embedding_model_name = config["embedding_model"]
        llm_name = config["llm_model"]
        print(f"Evaluating {config_name}")
        print(f"Collection      : {collection_name}")
        print(f"Embedding Model : {embedding_model_name}")
        print(f"LLM Model       : {llm_name}")
        print(f"HyDE            : {config['hyde']}\n------------------------------------------------------------")

        start_time = time.time()
        
        answer_json = generate_answer_dataset(url, api_key, collection_name, embedding_model_name, llm_name, eval_dataset)
        file_name = f"answer_{config_name}_demo.json"
        with open(f'../../../data/{file_name}', 'w') as f:
            json.dump(answer_json, f, indent=4)

        end_time = time.time()

        duration = end_time - start_time

        print(f"Answer saved to '{file_name}'. Duration: {duration:.2f} seconds\n")
        
        # Simpan durasi evaluasi ke dalam dictionary
        evaluation_duration["config"].append(config_name)
        evaluation_duration["duration"].append(duration)  
    
    # Simpan durasi evaluasi ke dalam file CSV
    duration_df = pd.DataFrame(evaluation_duration)
    duration_df.to_csv('../../../data/evaluation_duration_demo.csv', index=False)

main()

Evaluating combination_1
Collection      : cvd_collection_v1
Embedding Model : nomic-embed-text
LLM Model       : llama3.1
HyDE            : False
------------------------------------------------------------
Evaluation finished.
Answer saved to 'answer_combination_1_demo.json'. Duration: 42.42 seconds



In [29]:
url = os.getenv("QDRANT_URL")
api_key = os.getenv("QDRANT_API_KEY")
collection_name = "cvd_collection_v1"
# collection_name = "cvd_collection_v2"

embedding_model_name = "nomic-embed-text"
# embedding_model_name = "mxbai-embed-large"
llm_name = "llama3.1"

query = "Apa saja hal yang dapat menyebabkan jantung berdebar?"
topic = "Jantung Berdebar"

# RAG Evaluation

In [None]:

evaluator_llm = LangchainLLMWrapper(OllamaLLM(model="llama3.1"))

In [40]:


score = evaluate(eval_dataset, metrics=[faithfulness], llm=evaluator_llm)

score

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Exception raised in Job[0]: TimeoutError()


{'faithfulness': nan}

In [56]:
score = evaluate(eval_dataset, metrics=[context_precision], llm=evaluator_llm)

score

Evaluating: 100%|██████████| 1/1 [00:28<00:00, 28.05s/it]


{'context_precision': 1.0000}

In [21]:
from ipynb.fs.full.indexing_pipeline import get_json_document, get_documents

In [22]:
file_path='../data/cvd_prepared.json'

docs = get_documents(get_json_document(file_path))

In [23]:
docs

[Document(metadata={'id': 'eae39133-7730-4d21-94ff-8609769372c4', 'topic': 'Hipertensi', 'subtopic': 'Pengertian Hipertensi', 'source': 'https://www.alodokter.com/hipertensi', 'vector': []}, page_content='Topik: Hipertensi, Subtopik: Pengertian Hipertensi \n Hipertensi atau darah tinggi adalah kondisi ketika tekanan darah berada pada angka 130/80 mmHg atau lebih. Jika tidak segera ditangani, hipertensi bisa menyebabkan komplikasi serius, seperti gagal jantung, penyakit ginjal, hingga stroke.\nTekanan darah dinyatakan dalam dua nilai angka yang dipisahkan dengan garis miring atau yang biasanya disebut  per  . Angka di awal, yaitu di sebelah kiri garis miring menandakan tekanan sistolik. Ini adalah tekanan di dalam pembuluh darah ketika jantung berkontraksi untuk memompa darah keluar dari jantung.\n\nAngka di akhir yang berada setelah garis miring menandakan tekanan diastolik, yaitu tekanan darah saat jantung berelaksasi dan menyedot atau menerima darah masuk kembali ke dalam jantung.\nP

In [52]:
generator_llm = LangchainLLMWrapper(OllamaLLM(model="llama3.2", temperature=0.4))

generator_embeddings = LangchainEmbeddingsWrapper(OllamaEmbeddings(model="nomic-embed-text"))

In [36]:
type(docs)

list

In [37]:
first_doc = docs[0]

type(first_doc)

langchain_core.documents.base.Document

In [38]:
first_doc

Document(metadata={'id': 'eae39133-7730-4d21-94ff-8609769372c4', 'topic': 'Hipertensi', 'subtopic': 'Pengertian Hipertensi', 'source': 'https://www.alodokter.com/hipertensi', 'vector': []}, page_content='Topik: Hipertensi, Subtopik: Pengertian Hipertensi \n Hipertensi atau darah tinggi adalah kondisi ketika tekanan darah berada pada angka 130/80 mmHg atau lebih. Jika tidak segera ditangani, hipertensi bisa menyebabkan komplikasi serius, seperti gagal jantung, penyakit ginjal, hingga stroke.\nTekanan darah dinyatakan dalam dua nilai angka yang dipisahkan dengan garis miring atau yang biasanya disebut  per  . Angka di awal, yaitu di sebelah kiri garis miring menandakan tekanan sistolik. Ini adalah tekanan di dalam pembuluh darah ketika jantung berkontraksi untuk memompa darah keluar dari jantung.\n\nAngka di akhir yang berada setelah garis miring menandakan tekanan diastolik, yaitu tekanan darah saat jantung berelaksasi dan menyedot atau menerima darah masuk kembali ke dalam jantung.\nPa

In [55]:
generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs([first_doc], testset_size=3)

Applying CustomNodeFilter:   0%|          | 0/1 [00:00<?, ?it/s]        Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt question_potential_prompt failed to parse output: The output parser failed to parse the output including retries.
unable to apply transformation: The output parser failed to parse the output including retries.
Generating personas: 100%|██████████| 1/1 [00:01<00:00,  1.82s/it]                                         
Generating Scenarios: 100%|██████████| 1/1 [00:14<00:00, 14.11s/it]
Generating Samples: 0it [00:00, ?it/s]


In [56]:
dataset

Testset(samples=[])