## Disease data to Document

In [2]:
!python3 --version

Python 3.11.9


In [1]:
!nvidia-smi

Fri Sep 20 10:58:23 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.06             Driver Version: 535.183.06   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-PCIE-32GB           Off | 00000000:D8:00.0 Off |                    0 |
| N/A   26C    P0              23W / 250W |      0MiB / 32768MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [1]:
!pip3 install -r requirements.txt



In [None]:
# 초기 모델 다운로드
import transformers
import torch

model_id = "maywell/EXAONE-3.0-7.8B-Instruct-Llamafied"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.float16},
    device_map="cpu",
)

pipeline.model.save_pretrained('./maywell/EXAONE-3.0-7.8B-Instruct-Llamafied')
pipeline.tokenizer.save_pretrained('./maywell/EXAONE-3.0-7.8B-Instruct-Llamafied')
del pipeline

In [2]:
def text_format(row):
    text = f"""
    Difine: {row['define']}
    ===
    Disease name: {row['disease_name']}
    ===
    Target Animals: {row['animal']}
    ===
    Cause: {row['cause']}
    ===
    Disease Now: {row['disease_now']}
    ===
    Condition: {row['condition']}
    ===
    Jindan: {row['jindan']}
    ===
    Opinion: {row['opinion']}
    ===
    Treatment: {row['treatment']}
    ===
    Advice: {row['advice']}
    """
    return text

In [40]:
documents[2]

Document(metadata={'disease_name': '코로나 바이러스 감염증, 개(Canine Coronavirus Infection)', 'animal': '개', 'cause': 'canine coronavirus ', 'define': '개 코로나 바이러스(CCV)에 의해 발생하며 구토와 설사를 수반하는 장염을 유발하는 질병', 'disease_now': '# 성견에서는 불현성 감염이나 경미한 임상증상을 보이나 자견에서는 녹황색에서 담적색의 설사와 함께 급사하기도 함. 모든 연령과 품종에서 발생# 일차적인 전파는 분변에 의해 일어나며 약2주간 분변으로 바이러스 배출# 잠복기는 1-3일# 바이러스혈증이나 전신적 질환은 나타나지 않음# 야생 개과에서도 발생, 전염성이 매우 강하며 전 세계적으로 만연된 질병으로 그 증상이 CPV-2와 유사함.# 고양이에서는 불현성 감염을 일으킴# 집에서 키우는 동물 및 야생동물 모두 감염될 수 있음', 'condition': ' | 구토 | 무기력 , 침울 , 졸림 , 생기없음 | 발열 , 고열 | 분변 냄새의 이상 | 설사 , 하리 | 식욕부진 , 식욕감소 , 식욕절폐 , 젖을 안먹임 , 먹이 공급 중단 | 점액변 , 점액양분변 | 탈수 | 혈변 ', 'jindan': '혈청학적 진단, 바이러스 분리, 면역형광검사, 전자현미경, PCR(급성기)', 'opinion': '[임상병리소견]  PCV증가, CPV와 달리 백혈구 감소가 관찰되지 않음 [병리해부소견]  소장점막의 심한 충·출혈 및 장간막 림프절의 종대 및 충혈[병리조직소견]  소장 융모의 위축과 융합, 고유판 세포수의 증가, 상피세포의 편평화와 술잔세포 증가가 특징적', 'treatment': "# 치료하지 않아도 회복되는 경우도 있으나 심할 경우 대증요법 외에 특별한 치료법 없음  -수액: lactated ringer's 용액, 80-90ml/kg/h을 투여후 배뇨하면 dextrose ringer 용액(10ml/kg/h) 투여  -항생제: ampicilli

In [3]:
import pandas as pd
from langchain.vectorstores import Chroma
from langchain_community.retrievers import BM25Retriever
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document


# Load the dataset
file_path = 'data/disease.xlsx'
df = pd.read_excel(file_path)

# Clean the dataset (example: removing unwanted characters)
df = df.apply(lambda x: x.str.replace('_x000d_\n', '') if x.dtype == "object" else x)

# Create a list of Document objects
documents = [
    Document(page_content=text_format(row), metadata={
        'disease_name': row['disease_name'],
        'animal': row['animal'],
        'cause': row['cause'],
        'define': row['define'],
        'disease_now': row['disease_now'],
        'condition': row['condition'],
        'jindan': row['jindan'],
        'opinion': row['opinion'],
        'treatment': row['treatment'],
        'advice': row['advice']
    }) for _, row in df.iterrows()
]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


/mnt/workspace/MyFiles


In [5]:
# To documents.pdf
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics

# NanumGothic 폰트 등록
def register_fonts():
    pdfmetrics.registerFont(TTFont('NanumGothic', 'NanumGothic.ttf'))

# 텍스트를 페이지 너비에 맞게 자동 줄바꿈하는 함수
def split_text_to_lines(text, font, font_size, max_width):
    words = text.split()
    lines = []
    current_line = ""

    for word in words:
        test_line = current_line + " " + word if current_line else word
        if pdfmetrics.stringWidth(test_line, font, font_size) <= max_width:
            current_line = test_line
        else:
            lines.append(current_line)
            current_line = word

    if current_line:
        lines.append(current_line)

    return lines

# 텍스트를 페이지에 맞게 출력하는 함수
def add_text_with_auto_pagination(pdf, text, margin, y_position, page_height, page_width, font="NanumGothic", font_size=6):
    pdf.setFont(font, font_size)
    max_width = page_width - 2 * margin  # 페이지의 너비에서 여백을 뺀 최대 너비
    lines = split_text_to_lines(text, font, font_size, max_width)
    line_height = font_size + 2  # 줄 간격을 적절히 조정

    for line in lines:
        if y_position < margin:  # 페이지 하단에 도달하면 새 페이지 추가
            pdf.showPage()
            y_position = page_height - margin
            pdf.setFont(font, font_size)

        pdf.drawString(margin, y_position, line)
        y_position -= line_height

    return y_position

# PDF 저장 함수
def save_documents_to_pdf(documents, file_name):
    pdf = canvas.Canvas(file_name, pagesize=letter)
    register_fonts()  # 폰트 등록
    width, height = letter
    margin = 40

    for doc in documents:
        y_position = height - margin  # 새 페이지에서 처음 시작할 위치

        # 페이지 컨텐츠 출력
        y_position = add_text_with_auto_pagination(pdf, "Page Content:", margin, y_position, height, width, font="NanumGothic", font_size=6)
        y_position = add_text_with_auto_pagination(pdf, doc.page_content, margin, y_position, height, width, font="NanumGothic", font_size=6)

        # Page Content와 Metadata 사이에 간격 추가
        y_position -= 20  # 간격 추가

        # Metadata 출력
        y_position = add_text_with_auto_pagination(pdf, "Metadata:", margin, y_position, height, width, font="NanumGothic", font_size=6)
        for key, value in doc.metadata.items():
            text = f"{key}: {value}"
            y_position = add_text_with_auto_pagination(pdf, text, margin, y_position, height, width, font="NanumGothic", font_size=6)

        pdf.showPage()  # 각 문서마다 새 페이지 추가

    pdf.save()

# Call the function with your documents list
file_name = "./data/documents.pdf"
save_documents_to_pdf(documents, file_name)


In [4]:
from langchain_community.llms import VLLM

llm = VLLM(
    model="./maywell/EXAONE-3.0-7.8B-Instruct-Llamafied",
    trust_remote_code=True,  # mandatory for hf models
    max_new_tokens=2048,
    # top_k=10,
    # top_p=0.95,
    temperature=0.1,
    dtype='float16',
    vllm_kwargs={'quantization': "bitsandbytes", 'load_format': "bitsandbytes", 'gpu_memory_utilization': 0.4},
    streaming=True
)


INFO 09-20 10:58:55 llm_engine.py:213] Initializing an LLM engine (v0.6.0) with config: model='./maywell/EXAONE-3.0-7.8B-Instruct-Llamafied', speculative_config=None, tokenizer='./maywell/EXAONE-3.0-7.8B-Instruct-Llamafied', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.BITSANDBYTES, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=bitsandbytes, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=./maywell/EXAONE-3.0-7.8B-Instruct-Llamafied, use_v2_block_manager=Fals

  @torch.library.impl_abstract("xformers_flash::flash_fwd")
  @torch.library.impl_abstract("xformers_flash::flash_bwd")


INFO 09-20 10:58:57 model_runner.py:915] Starting to load model ./maywell/EXAONE-3.0-7.8B-Instruct-Llamafied...
INFO 09-20 10:58:58 selector.py:217] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
INFO 09-20 10:58:58 selector.py:116] Using XFormers backend.
INFO 09-20 10:58:58 loader.py:916] Loading weights with BitsAndBytes quantization.  May take a while ...


Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


INFO 09-20 10:59:14 model_runner.py:926] Loading model weights took 4.9173 GB
INFO 09-20 10:59:21 gpu_executor.py:122] # GPU blocks: 2879, # CPU blocks: 2048


In [5]:
# BM25
from langchain_community.retrievers import BM25Retriever
bm25_retriever = BM25Retriever.from_documents(documents)

In [6]:
# Embedding
from langchain_huggingface import HuggingFaceEmbeddings
# https://huggingface.co/nomic-ai/nomic-embed-text-v1
embed = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1", model_kwargs={'trust_remote_code': True})

  state_dict = loader(resolved_archive_file)


In [31]:
# Compressor(Combine)
from langchain.vectorstores import Chroma
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.retrievers.document_compressors import FlashrankRerank
from langchain.retrievers import ContextualCompressionRetriever

top_n = 5

compressor = FlashrankRerank(top_n=top_n, model='ms-marco-MultiBERT-L-12')


In [8]:
from typing import List
from langchain_core.retrievers import BaseRetriever
from langchain_core.documents.base import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda


class CombineRetriever(BaseRetriever):
    retrievers = []

    def get_relevant_documents(self, query: str) -> List[Document]:
        results = []
        for retriever in self.retrievers:
            result = retriever.invoke(query)
            results.extend(result)
        return results

    async def aget_relevant_documents(self, query: str) -> List[Document]:
        results = []
        for retriever in self.retrievers:
            result = await retriever.ainvoke(query)
            results.extend(result)
        return results


  class CombineRetriever(BaseRetriever):
  class CombineRetriever(BaseRetriever):


In [9]:
vectorstore = Chroma.from_documents(documents, embed)
vector_retriever = VectorStoreRetriever(vectorstore=vectorstore)

In [10]:
combine_retriever = CombineRetriever(retrievers=[bm25_retriever, vector_retriever])

In [11]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=combine_retriever
)

In [12]:
def format_docs(docs):
    reference = []
    page_start = "\n=== 참고 페이지: {} begin\n"
    page_end = "\n=== 참고 페이지: {} end\n"
    for idx, doc in enumerate(docs):
        reference.append(page_start.format(str(idx + 1)) + doc.page_content + page_end.format(str(idx + 1)))

    return "\n\n".join(doc for doc in reference)

In [13]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("./maywell/EXAONE-3.0-7.8B-Instruct-Llamafied")

In [14]:
rag_prompt = """You are an assistant for question-answering tasks.
a veterinarian who majored in veterinary medicine.
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know.
Please keep the english terminology and drug names and medical words.
Please provide any other explanations in korean.
Please present the same context only once.

<attention>
제공된 컨텍스트에 자료가 없다면 답변을 못한다고 하십시오.
When responding, please explain which page of the referenced context you are referring to.
The number should only include one specific page referenced. 
However, if multiple pages are referenced, refer to the next reference page using the word 참고 페이지: separated by commas.
When you finish explaining a specific topic, please leave the page number you are referencing.
Please provide references and evidence for each answer.
Please follow the example in the output example.
Please fill out the contents in abundance based on facts.
The given context numbers are a total of 1 to {top_n}.
</attention>

<write_reference>
참고 페이지: number
</write_reference>

<writing_style>
Kind, Warm, Academic, Little shortly
</writing_style>

<output_example>
크론병(Crohn's Disease, CD)을 치료하기 위한 방법은 여러 가지가 있습니다. 다음은 주요 치료 방법들입니다:

1. 글루코코르티코이드(Glucocorticoids):
중등도에서 중증 크론병에 효과적이며, 60-70%의 관해를 유도합니다. 부데소나이드(Budesonide)는 부작용이 적어 선호됩니다. 그러나 유지 요법으로는 사용되지 않습니다. (참고 페이지: 1)

Blah blah ...
</output_example>

<context>
{context}
</context>
"""

In [15]:
def apply_chat_for_rag(inputs):
    context = format_docs(inputs['context'])
    chat = [{'role': 'system', 'content': rag_prompt.format(context=context, top_n=str(top_n))},
           {'role': 'user', 'content': f"{inputs['question']}"}]
    #print(chat)
    #print(f"{inputs['question']}")
    chat_template = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    answer = llm.invoke(chat_template)
    return {"documents": inputs['context'], "answer": answer}

In [17]:
from IPython.display import display, HTML
import markdown

def create_interactive_text(main_text, clickable_dict):
    # main_text를 마크다운에서 HTML로 변환
    main_text_html = markdown.markdown(main_text)
    
    # main_text에서 클릭 가능한 부분을 HTML로 변환
    for key, value in clickable_dict.items():
        details_html = f'<details><summary>{key}</summary>{markdown.markdown(value)}</details>'
        main_text_html = main_text_html.replace(f"{{{{{key}}}}}", details_html)
    
    # 최종 HTML 출력
    display(HTML(main_text_html))

In [18]:
from operator import itemgetter

In [19]:
rag_chain = ({"context": itemgetter('question') | compression_retriever,
             "question": itemgetter('question')} | RunnableLambda(apply_chat_for_rag))

In [20]:
def run(text):
    result = rag_chain.invoke({"question": text})
    target_docs = {f"참고 페이지: {i + 1}": doc.page_content for i, doc in enumerate(result['documents'])}
    for replace_text in target_docs.keys():
        result['answer'] = result['answer'].replace(replace_text, f"{{{replace_text}}}")
        result['answer'] = result['answer'].replace(replace_text, f"{{{replace_text}}}")
    return create_interactive_text(result['answer'], target_docs)

In [37]:
kor_questions = [
"갑상선 기능 저하증의 증상은 무엇인가요?",
"강아지에서 파보바이러스 감염증을 어떻게 치료할 수 있나요?",
"개 디스템퍼의 원인은 무엇인가요?",
"고양이 백혈병을 예방하기 위해 어떤 예방 조치를 취할 수 있나요?",
"고양이에서 갑상선 기능 항진증의 흔한 증상은 무엇인가요?",
"개 코로나바이러스 감염증이 개에게 어떤 영향을 미치나요?",
"개 디스템퍼의 치료 옵션은 무엇인가요?",
"개 아데노바이러스 감염증의 정의는 무엇인가요?",
"개가 일반적인 질병으로부터 보호받기 위해 사용할 수 있는 예방 백신은 무엇이 있나요?",
"고양이가 고양이 면역 결핍 바이러스(FIV) 증상을 보일 때 권장되는 조치는 무엇인가요?"
]

In [24]:
run(kor_questions[1])

INFO:backoff:Backing off send_request(...) for 0.8s (requests.exceptions.ConnectionError: HTTPSConnectionPool(host='us.i.posthog.com', port=443): Max retries exceeded with url: /batch/ (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f0b64410090>: Failed to resolve 'us.i.posthog.com' ([Errno -3] Temporary failure in name resolution)")))
INFO:backoff:Backing off send_request(...) for 1.5s (requests.exceptions.ConnectionError: HTTPSConnectionPool(host='us.i.posthog.com', port=443): Max retries exceeded with url: /batch/ (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f10a00dfbd0>: Failed to resolve 'us.i.posthog.com' ([Errno -3] Temporary failure in name resolution)")))
Processed prompts: 100%|██████████| 1/1 [00:21<00:00, 21.71s/it, est. speed input: 122.34 toks/s, output: 19.76 toks/s]


In [39]:
run(kor_questions[2])

Processed prompts: 100%|██████████| 1/1 [00:17<00:00, 17.20s/it, est. speed input: 69.53 toks/s, output: 21.97 toks/s]


In [33]:
q1 = '분리불안 고치는 법 좀 알려주세요ㅠ'

In [34]:
run(q1)

Processed prompts: 100%|██████████| 1/1 [00:28<00:00, 28.60s/it, est. speed input: 96.90 toks/s, output: 20.81 toks/s]


In [30]:
documents[2]

Document(metadata={'disease_name': '코로나 바이러스 감염증, 개(Canine Coronavirus Infection)', 'animal': '개', 'cause': 'canine coronavirus ', 'define': '개 코로나 바이러스(CCV)에 의해 발생하며 구토와 설사를 수반하는 장염을 유발하는 질병', 'disease_now': '# 성견에서는 불현성 감염이나 경미한 임상증상을 보이나 자견에서는 녹황색에서 담적색의 설사와 함께 급사하기도 함. 모든 연령과 품종에서 발생# 일차적인 전파는 분변에 의해 일어나며 약2주간 분변으로 바이러스 배출# 잠복기는 1-3일# 바이러스혈증이나 전신적 질환은 나타나지 않음# 야생 개과에서도 발생, 전염성이 매우 강하며 전 세계적으로 만연된 질병으로 그 증상이 CPV-2와 유사함.# 고양이에서는 불현성 감염을 일으킴# 집에서 키우는 동물 및 야생동물 모두 감염될 수 있음', 'condition': ' | 구토 | 무기력 , 침울 , 졸림 , 생기없음 | 발열 , 고열 | 분변 냄새의 이상 | 설사 , 하리 | 식욕부진 , 식욕감소 , 식욕절폐 , 젖을 안먹임 , 먹이 공급 중단 | 점액변 , 점액양분변 | 탈수 | 혈변 ', 'jindan': '혈청학적 진단, 바이러스 분리, 면역형광검사, 전자현미경, PCR(급성기)', 'opinion': '[임상병리소견]  PCV증가, CPV와 달리 백혈구 감소가 관찰되지 않음 [병리해부소견]  소장점막의 심한 충·출혈 및 장간막 림프절의 종대 및 충혈[병리조직소견]  소장 융모의 위축과 융합, 고유판 세포수의 증가, 상피세포의 편평화와 술잔세포 증가가 특징적', 'treatment': "# 치료하지 않아도 회복되는 경우도 있으나 심할 경우 대증요법 외에 특별한 치료법 없음  -수액: lactated ringer's 용액, 80-90ml/kg/h을 투여후 배뇨하면 dextrose ringer 용액(10ml/kg/h) 투여  -항생제: ampicilli

In [4]:
# Example questions for testing the RAG system
eng_questions = [
    "What are the symptoms of Hypothyroidism in dogs? launguage is korean",
    "How can I treat Parvoviral Infection in puppies? launguage is korean",
    "What is the cause of Canine Distemper? launguage is korean" ,
    "Which preventive measures can be taken to avoid Feline Leukemia? launguage is korean",
    "What are the common signs of Hyperthyroidism in cats? launguage is korean",
    "How does Canine Coronavirus Infection affect dogs? launguage is korean",
    "What are the treatment options for Canine Distemper? launguage is korean",
    "What is the definition of Canine Adenovirus Infection? launguage is korean",
    "What preventive vaccines are available for dogs to protect against common diseases? launguage is korean",
    "What is the recommended course of action if my cat shows symptoms of Feline Immunodeficiency Virus (FIV)? launguage is korean"
]

## Evaluation
- RAGAS

### Make a corpus and qa set

In [25]:
# import logging

# # Set logging level to WARNING to suppress INFO and DEBUG logs
# logging.getLogger("openai").setLevel(logging.WARNING)
# logging.getLogger("httpx").setLevel(logging.WARNING)


In [35]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import os

In [None]:
# generator with openai models
generator_llm = ChatOpenAI(model="gpt-4o-mini")
critic_llm = ChatOpenAI(model="gpt-4o-mini")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

# generate testset
testset = generator.generate_with_langchain_docs(documents[:9], test_size=1, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})

In [None]:
testset.head()

In [None]:
import nest_asyncio
import asyncio

# nest_asyncio 적용
nest_asyncio.apply()

from autorag.data.qacreation.ragas import generate_qa_ragas
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.testset.evolutions import simple, reasoning, multi_context, conditional
from autorag.data.qacreation.ragas import generate_qa_ragas
import pandas as pd

corpus_df = pd.read_parquet(vector_pq)

distributions = {  # uniform distribution
    simple: 0.25,
    reasoning: 0.25,
    multi_context: 0.25,
    conditional: 0.25
}

generator_llm = ChatOpenAI(model="gpt-4o", temperature=0.5)
critic_llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
embedding_model = OpenAIEmbeddings()

qa_df = generate_qa_ragas(corpus_df, test_size=300, distributions=distributions,
                          generator_llm=generator_llm, critic_llm=critic_llm, embedding_model=embedding_model)



df2 = qa_df
df1 = corpus

# Create the context column based on the doc_id matching retrieval_gt values
df2['context'] = df2['retrieval_gt'].apply(
    lambda x: [df1.loc[df1['doc_id'] == doc_id, 'contents'].values[0] for doc_id in eval(x)[0]]
)

from datasets import Dataset

# Dataset
qa = df2[['query', 'context', 'generation_gt']]

questions = qa['query']
answers = qa['generation_gt']
contexts = qa['context']
ground_truths = qa['generation_gt']

# To dict
data = {
    "question": questions,
    "answer": answers,
    "contexts": contexts.to_list(),
    "ground_truth": ground_truths
}

# Convert dict to dataset
dataset = Dataset.from_dict(data)

dataset = dataset.map(lambda example: {'contexts': [example['contexts']]})


import nest_asyncio
import asyncio
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)

# nest_asyncio
nest_asyncio.apply()

# dataset and metrics
result = evaluate(
    dataset=dataset,
    metrics=[
        context_precision,
        context_recall,
        faithfulness,
        answer_relevancy,
    ],
    is_async=False  # 비동기 모드를 사용하지 않도록 설정
)

# results
df = result.to_pandas()

print(df)