In [78]:
# https://mer.vin/2024/05/ragas-evaluate-rag-from-test-set/

In [79]:
import os
api_key = os.environ.get('OPENAI_API_KEY')

In [80]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain_community.document_loaders import TextLoader

from langchain.embeddings import OpenAIEmbeddings  #← OpenAIEmbeddings 가져오기
import tiktoken


In [81]:
embeddings = OpenAIEmbeddings( #← OpenAIEmbeddings를 초기화
    model="text-embedding-ada-002" #← 모델명을 지정
)

In [82]:
# FILE_PATH="./data/sample.pdf"
# CHROMA_DB_PATH="./vector_db/chroma/sample"
# TESTSET_FILE="pdf_testset.csv"
# EVAL_FILE="pdf_eval.csv"

FILE_PATH="./data/130292099630937500_KIFVIP2013-10.pdf"
TESTSET_FILE="pdf1_testset.csv"
EVAL_FILE="pdf1_eval.csv"
CHROMA_DB_PATH="./vector_db/chroma/130292099630937500_KIFVIP2013"

loader = PyPDFLoader(FILE_PATH) #← sample.pdf 로드

# FILE_PATH="./data/llm.txt"
# TESTSET_FILE="txt_testset.csv"
# EVAL_FILE="txt_eval.csv"
# CHROMA_DB_PATH="./vector_db/chroma/llm"
# loader = TextLoader(FILE_PATH) #← llm.txt 로드

documents = loader.load()
print(f"문서 개수: {len(documents)}") #← 문서 개수 확인

문서 개수: 55


In [83]:
def tiktoken_len(text):
    tokenizer = tiktoken.get_encoding("cl100k_base")
    tokens = tokenizer.encode(text)
    return len(tokens)
    
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=100,
    length_function=tiktoken_len
)    
splits = text_splitter.split_documents(documents)

generator_llm = ChatOpenAI(api_key=api_key,model="gpt-4o-mini")
critic_llm = ChatOpenAI(api_key=api_key,model="gpt-4o-mini")
# embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
testset.to_pandas().to_csv(TESTSET_FILE, index=False)

embedding nodes:   0%|          | 0/214 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/10 [00:00<?, ?it/s]

In [84]:
from datasets import load_dataset
from datasets import Dataset 
import pandas as pd
import json 
import ast

data = pd.read_csv(TESTSET_FILE)
data['contexts'] = data['contexts'].apply(ast.literal_eval)  # convert string to list
data['answer'] = data['ground_truth']

json_data = data.to_json()
dict_data = data.to_dict(orient='list')

dataset = Dataset.from_dict(dict_data)

amnesty_qa = dataset
# amnesty_qa = load_dataset("explodinggradients/amnesty_qa", "english_v2")
print(amnesty_qa)

from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)

from ragas import evaluate

result = evaluate(
    amnesty_qa,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
)

print(result)
df = result.to_pandas()
df.to_csv(EVAL_FILE, index=False)

Dataset({
    features: ['question', 'contexts', 'ground_truth', 'evolution_type', 'metadata', 'episode_done', 'answer'],
    num_rows: 10
})


Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]

No statements were generated from the answer.
Failed to parse output. Returning None.


{'context_precision': 0.8000, 'faithfulness': 0.8241, 'answer_relevancy': 0.8448, 'context_recall': 0.7407}
