In [58]:
# https://medium.com/towards-data-science/evaluating-rag-applications-with-ragas-81d67b0ee31a

In [59]:
import os
api_key = os.environ.get('OPENAI_API_KEY')

In [60]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain_community.document_loaders import TextLoader
from langchain.vectorstores import Chroma

from langchain.embeddings import OpenAIEmbeddings  #← OpenAIEmbeddings 가져오기
import tiktoken


In [61]:
embeddings = OpenAIEmbeddings( #← OpenAIEmbeddings를 초기화
    model="text-embedding-ada-002" #← 모델명을 지정
)

# Generate test set

In [62]:
FILE_PATH="./data/sample.pdf"
CHROMA_DB_PATH="./vector_db/chroma/sample"
TESTSET_FILE="pdf_testset.csv"
EVAL_FILE="pdf_eval.csv"

# FILE_PATH="./data/130292099630937500_KIFVIP2013-10.pdf"
# TESTSET_FILE="pdf1_testset.csv"
# EVAL_FILE="pdf1_eval.csv"
# CHROMA_DB_PATH="./vector_db/chroma/130292099630937500_KIFVIP2013"

loader = PyPDFLoader(FILE_PATH) #← sample.pdf 로드

# FILE_PATH="./data/llm.txt"
# TESTSET_FILE="txt_testset.csv"
# EVAL_FILE="txt_eval.csv"
# CHROMA_DB_PATH="./vector_db/chroma/llm"
# loader = TextLoader(FILE_PATH) #← llm.txt 로드


### Load document and Save To vectorDB

In [63]:

documents = loader.load()
print(f"문서 개수: {len(documents)}") #← 문서 개수 확인

def tiktoken_len(text):
    tokenizer = tiktoken.get_encoding("cl100k_base")
    tokens = tokenizer.encode(text)
    return len(tokens)
    
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=100,
    length_function=tiktoken_len
)    
splitted_documents = text_splitter.split_documents(documents)
print(f"분할 전 문서 개수: {len(documents)}")
print(f"분할 후 문서 개수: {len(splits)}")


chroma_db = Chroma(  #← Chroma를 초기화
    persist_directory=CHROMA_DB_PATH,  #← 영속화 데이터 저장 위치 지정
    embedding_function=embeddings  #← 벡터화할 모델을 지정
)

chroma_db.add_documents(  #← 문서를 데이터베이스에 추가
    splitted_documents,  #← 추가할 문서 지정
)


문서 개수: 12
분할 전 문서 개수: 12
분할 후 문서 개수: 41


['aea9f679-78ca-4204-a682-85d879bb3c27',
 '9c354eef-6d52-4ad2-9689-e9c35f123008',
 '77aec394-4f03-4c73-8388-69ab5e784612',
 'd843c6ad-d196-40ac-861a-34db98537560',
 '27b0ac22-5a0f-4d64-a4c7-cabcb61e018f',
 '5c5bee27-480f-43a6-8bba-af1b76181087',
 'dc2fb61b-86cd-4fa1-95fc-843b7220d042',
 'f4484073-30bc-4aee-9c46-ca513427e782',
 'ee299306-8677-4d46-a38f-bd7777043aac',
 '316a0871-0ae0-4162-9da2-7a2ced3603d5',
 '246457bf-ed34-4dfa-901c-36f520889136',
 '8347d106-4019-494f-83f2-b7ebd7abea5e',
 'f6def1a3-ecb0-4aa0-bf40-91b91e13e6ea',
 '115c1eec-b2d9-4ade-8cd3-7f8ec308590f',
 'db6cd784-f8c7-4caa-9355-c0feb1ff77ad',
 '37fbc0a1-39af-41a0-955a-33ad6cb043bd',
 '7653a8f7-2d0a-41f0-9026-4088717b0743',
 'ee979c99-10f7-4794-a599-0a06bf36ff2e',
 'c46bd367-43c1-43d7-a7a5-89b5bd083e14',
 '3028a754-fe0c-4571-b1b2-4bbc5d2deb6b',
 '91881f4f-6015-4870-86a1-b4eb48580197',
 '40ec738b-3ee2-48d2-a48b-db250e6cb2a0',
 '40cb7032-dae7-4456-8b12-3f4f4d4045d8',
 '241a927a-cef8-48d2-ba56-7e0826b0a70b',
 '0b34298b-405b-

### Generate dataframe

In [64]:

generator_llm = ChatOpenAI(api_key=api_key,model="gpt-4o-mini")
critic_llm = ChatOpenAI(api_key=api_key,model="gpt-4o-mini")
# embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
# testset.to_pandas().to_csv(TESTSET_FILE, index=False)
testset_df=testset.to_pandas()
testset_df=testset_df.drop(columns=["evolution_type","metadata","episode_done"])
testset_df.to_csv(TESTSET_FILE, index=False)
testset_df.head()

embedding nodes:   0%|          | 0/44 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,question,contexts,ground_truth
0,비행 고도 제한법의 목적은 무엇인가?,[비행 자동차 고도 제한법\n제1조(목적 )\n이 법은 비행자동차의 비행안전 및 주...,비행 고도 제한법의 목적은 비행자동차의 비행안전 및 주민의 안전을 확보하기 위하여 ...
1,비행체 통신 시스템법의 목적은 무엇인가?,[비행체 통신 시스템법\n제1조(목적 )\n이 법은 비행자동차의 적절한 운영과 안전...,비행체 통신 시스템법의 목적은 비행자동차의 적절한 운영과 안전한 비행을 보장하기 위...
2,고도제한을 위반할 경우 어떤 처벌이 부과되나요?,[��하고 기록하는 장치를 의무적으로 탑재해야 한다.\n2.제1항의 장비의 사양 및...,"고도제한을 위반할 경우 150만엔 이하의 벌금에 처해지며, 중대한 사고를 유발하거나..."
3,What is the role of 항공교통관제 in the communicatio...,[비행체 통신 시스템법\n제1조(목적 )\n이 법은 비행자동차의 적절한 운영과 안전...,The answer to given question is not present in...
4,What are the requirements for noise regulation...,[비행차 소음 규제법\n제1조(목적 )\n이 법은 비행자동차로 인한 소음의 영향을 ...,The requirements for noise regulation standard...


### create test set and save to file

In [65]:
from datasets import load_dataset
from datasets import Dataset 
import pandas as pd
import json 
import ast

data_df = pd.read_csv(TESTSET_FILE)
data_df['contexts'] = data_df['contexts'].apply(ast.literal_eval)  # convert string to list
data_df['answer'] = data_df['ground_truth']
data_df.to_csv(TESTSET_FILE, index=False)

dict_data = data_df.to_dict(orient='list')

dataset = Dataset.from_dict(dict_data)

amnesty_qa = dataset
print(amnesty_qa)


Dataset({
    features: ['question', 'contexts', 'ground_truth', 'answer'],
    num_rows: 10
})


# Evaluate

### Load data from vectorDB

In [66]:
chroma_db = Chroma(  #← Chroma를 초기화
    persist_directory=CHROMA_DB_PATH,  #← 영속화 데이터 저장 위치 지정
    embedding_function=embeddings  #← 벡터화할 모델을 지정
)

retriever = chroma_db.as_retriever()


### Load test set from file

In [67]:
# data_df = pd.read_csv(TESTSET_FILE)
# dict_data = data_df.to_dict(orient='list')
# dataset = Dataset.from_dict(dict_data)
# amnesty_qa = dataset


### Create Rag Chain

In [68]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

# Define LLM
llm = ChatOpenAI(api_key=api_key,model_name="gpt-4o-mini", temperature=0)

# Define prompt template
template = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
Use two sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:
"""

prompt = ChatPromptTemplate.from_template(template)

# Setup RAG pipeline
rag_chain = (
    {"context": retriever,  "question": RunnablePassthrough()} 
    | prompt 
    | llm
    | StrOutputParser() 
)

In [69]:
from ragas import evaluate

from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)


# amnesty_qa = dataset
result = evaluate(
    amnesty_qa,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
)

print(result)
df = result.to_pandas()
df.to_csv(EVAL_FILE, index=False)

Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]

No statements were generated from the answer.
Failed to parse output. Returning None.


{'context_precision': 0.8000, 'faithfulness': 0.7593, 'answer_relevancy': 0.7672, 'context_recall': 0.8889}
