In [15]:
# https://mer.vin/2024/05/ragas-evaluate-rag-from-test-set/

In [16]:
import os
api_key = os.environ.get('OPENAI_API_KEY')

In [17]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain_community.document_loaders import TextLoader

from langchain.embeddings import OpenAIEmbeddings  #← OpenAIEmbeddings 가져오기
import tiktoken


In [18]:
embeddings = OpenAIEmbeddings( #← OpenAIEmbeddings를 초기화
    model="text-embedding-ada-002" #← 모델명을 지정
)

In [19]:
# FILE_PATH="./data/sample.pdf"
# CHROMA_DB_PATH="./vector_db/chroma/sample"
# TESTSET_FILE="pdf_testset.csv"
# EVAL_FILE="pdf_eval.csv"

# FILE_PATH="./data/130292099630937500_KIFVIP2013-10.pdf"
# TESTSET_FILE="pdf1_testset.csv"
# EVAL_FILE="pdf1_eval.csv"
# CHROMA_DB_PATH="./vector_db/chroma/130292099630937500_KIFVIP2013"

# loader = PyPDFLoader(FILE_PATH) #← sample.pdf 로드

FILE_PATH="./data/llm.txt"
TESTSET_FILE="txt_testset.csv"
EVAL_FILE="txt_eval.csv"
CHROMA_DB_PATH="./vector_db/chroma/llm"
loader = TextLoader(FILE_PATH) #← llm.txt 로드

documents = loader.load()
print(f"문서 개수: {len(documents)}") #← 문서 개수 확인

문서 개수: 1


In [20]:
def tiktoken_len(text):
    tokenizer = tiktoken.get_encoding("cl100k_base")
    tokens = tokenizer.encode(text)
    return len(tokens)
    
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=100,
    length_function=tiktoken_len
)    
split_documents = text_splitter.split_documents(documents)

generator_llm = ChatOpenAI(api_key=api_key,model="gpt-4o-mini")
critic_llm = ChatOpenAI(api_key=api_key,model="gpt-4o-mini")
# embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
testset_df=testset.to_pandas()
testset_df=testset_df.drop(columns=["evolution_type","metadata","episode_done"])
testset_df.to_csv(TESTSET_FILE, index=False)
testset_df

embedding nodes:   0%|          | 0/4 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,question,contexts,ground_truth
0,What distinguishes pre-trained language models...,[Large Language Models: A Survey\nShervin Mina...,Pre-trained language models (PLMs) differ from...
1,What is in-context learning and how does it fu...,[ostic. This generality also extends to the le...,In-context learning is an emergent ability of ...
2,What is in-context learning and how does it fu...,[ostic. This generality also extends to the le...,In-context learning is an emergent ability of ...
3,What advancements have transformer-based model...,[Large Language Models: A Survey\nShervin Mina...,Transformer-based large language models (LLMs)...
4,What role do large language models play in the...,[Large Language Models: A Survey\nShervin Mina...,Large language models (LLMs) play a crucial ro...
5,What skills let LLMs tackle complex tasks?,[ostic. This generality also extends to the le...,LLMs tackle complex tasks through emergent abi...
6,What allows transformer LLMs to do multi-step ...,[Large Language Models: A Survey\nShervin Mina...,Transformer LLMs can perform multi-step reason...
7,What makes large language models excel at comp...,[Large Language Models: A Survey\nShervin Mina...,Large language models (LLMs) excel at complex ...
8,What emergent skills help LLMs learn from few ...,[ostic. This generality also extends to the le...,LLMs exhibit emergent abilities such as in-con...
9,What allows LLMs to learn new tasks from few e...,[ostic. This generality also extends to the le...,LLMs can learn new tasks from few examples thr...


In [21]:
from datasets import load_dataset
from datasets import Dataset 
import pandas as pd
import json 
import ast

data_df = pd.read_csv(TESTSET_FILE)
# data_df=testset_df
data_df['contexts'] = data_df['contexts'].apply(ast.literal_eval)  # convert string to list
data_df['answer'] = data_df['ground_truth']
data_df.to_csv(TESTSET_FILE, index=False)


# dict_data = data_df.to_dict(orient='list')

# dataset = Dataset.from_dict(dict_data)

# amnesty_qa = dataset
# print(amnesty_qa)

# from ragas.metrics import (
#     answer_relevancy,
#     faithfulness,
#     context_recall,
#     context_precision,
# )

# from ragas import evaluate

# result = evaluate(
#     amnesty_qa,
#     metrics=[
#         context_precision,
#         faithfulness,
#         answer_relevancy,
#         context_recall,
#     ],
# )

# print(result)
# df = result.to_pandas()
# df.to_csv(EVAL_FILE, index=False)