In [2]:
# rag_evaluation.py

import os
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
from rouge_score import rouge_scorer
from tqdm import tqdm

# LangChain 관련
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores.faiss import FAISS
from langchain.schema import Document
import fitz  # PyMuPDF
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage

# ollama_model_load.py에서 정의한 Ollama 모델 llama 불러오기
from ollama_model_load import DUChatbot5ep

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# 1) 평가 지표 계산 함수
def calculate_bleu(reference: str, generated: str):
    reference_tokens = reference.split()
    generated_tokens = generated.split()
    smoothing_function = SmoothingFunction().method1
    return sentence_bleu([reference_tokens], generated_tokens, smoothing_function=smoothing_function)

def calculate_meteor(reference: str, generated: str):
    reference_tokens = reference.split()
    generated_tokens = generated.split()
    return meteor_score([reference_tokens], generated_tokens)

def calculate_rouge(reference: str, generated: str):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, generated)
    return {
        "rouge_1": scores["rouge1"].fmeasure,
        "rouge_2": scores["rouge2"].fmeasure,
        "rouge_L": scores["rougeL"].fmeasure
    }

# 2) RAG 검색 수행 함수
def perform_rag(
    question: str,
    pdf_file: str = "QADataset_new.pdf",
    chunk_size: int = 100,
    chunk_overlap: int = 50,
    device: str = "cuda"
) -> str:
    doc = fitz.open(pdf_file)
    full_text = ""
    for page in doc:
        full_text += page.get_text()

    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    chunks = [Document(page_content=t) for t in splitter.split_text(full_text)]

    model_kwargs = {"device": device}
    encode_kwargs = {"normalize_embeddings": True}
    embeddings = HuggingFaceEmbeddings(
        model_name="intfloat/multilingual-e5-large",
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
    )

    db = FAISS.from_documents(chunks, embedding=embeddings)
    retriever = db.as_retriever(search_kwargs={"k": 2})

    context_docs = retriever.get_relevant_documents(question)
    context_text = "\n\n".join(doc.page_content for doc in context_docs)
    return context_text

# 3) LLM 질의 함수 (RAG Prompt 포함)
def query_llm(context: str, question: str) -> str:
    RAG_PROMPT_TEMPLATE = """
    아래 정보(context)를 참고하여 사용자 질문에 답해주세요:
    {context}

    질문:
    {question}

    답변 시, 질문의 핵심만 파악하여 간결하게 1~2문장으로 답변하고, 
    불필요한 설명은 피합니다. (동서울대학교 관련 정보라면 그 내용만 요약)

    답변:
    """
    prompt = ChatPromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
    formatted_prompt = prompt.format(context=context, question=question)
    message = HumanMessage(content=formatted_prompt)

    # llama(ollama_model_load에서 import)를 사용
    response = DUChatbot5ep([message])  
    return response.content.strip()

# 4) 전체 평가 함수
def evaluate_model_responses(
    csv_file: str = "QADataset_old.csv",
    pdf_file: str = "QADataset_new.pdf",
    output_file: str = "Evaluation_RAG_results.csv",
    batch_size: int = 5,
    chunk_size: int = 100,
    chunk_overlap: int = 50,
    device: str = "cuda"
):
    processed_count = 0
    if os.path.exists(output_file):
        existing_df = pd.read_csv(output_file, encoding='utf-8-sig')
        processed_count = len(existing_df)

    try:
        df = pd.read_csv(csv_file, encoding='utf-8-sig')
    except UnicodeDecodeError:
        df = pd.read_csv(csv_file, encoding='euc-kr')
    except Exception as e:
        print(f"CSV 파일을 열 때 오류 발생: {e}")
        return pd.DataFrame()

    total_rows = len(df)
    if processed_count >= total_rows:
        print("이미 모든 행이 처리되었습니다.")
        return pd.read_csv(output_file, encoding='utf-8-sig')

    evaluation_results = []
    for idx in tqdm(range(processed_count, total_rows), desc="Evaluating with RAG"):
        question = df.iloc[idx, 0]
        reference = df.iloc[idx, 1]

        context = perform_rag(
            question=question,
            pdf_file=pdf_file,
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            device=device
        )

        generated_response = query_llm(context, question)

        bleu_score = calculate_bleu(reference, generated_response)
        meteor_score_value = calculate_meteor(reference, generated_response)
        rouge_scores = calculate_rouge(reference, generated_response)

        evaluation_results.append({
            "question": question,
            "reference": reference,
            "generated": generated_response,
            "bleu": bleu_score,
            "meteor": meteor_score_value,
            "rouge_1": rouge_scores["rouge_1"],
            "rouge_2": rouge_scores["rouge_2"],
            "rouge_L": rouge_scores["rouge_L"]
        })

        if (len(evaluation_results) % batch_size == 0) or (idx == total_rows - 1):
            partial_df = pd.DataFrame(evaluation_results)

            if os.path.exists(output_file) and processed_count > 0:
                partial_df.to_csv(
                    output_file,
                    mode='a',
                    index=False,
                    header=False,
                    encoding='utf-8-sig'
                )
            else:
                partial_df.to_csv(
                    output_file,
                    mode='w',
                    index=False,
                    header=True,
                    encoding='utf-8-sig'
                )

            evaluation_results = []
            processed_count = idx + 1

    final_df = pd.read_csv(output_file, encoding='utf-8-sig')
    print(f"평가 완료! 결과는 '{output_file}'에 저장되었습니다.")
    return final_df

# 5) 메인 실행 예시
if __name__ == "__main__":
    # RAG 평가 실행
    final_df = evaluate_model_responses(
        csv_file="QADataset_old.csv",     
        pdf_file="QADataset_new.pdf",     
        output_file="E5_PEFT_RAG_old2.csv", 
        batch_size=5,
        chunk_size=100,
        chunk_overlap=50,
        device="cuda"
    )
    
    print(final_df.head())

Evaluating with RAG:   0%|          | 0/770 [00:00<?, ?it/s]

  response = DUChatbot5ep([message])
Evaluating with RAG: 100%|██████████| 770/770 [4:00:16<00:00, 18.72s/it]  

평가 완료! 결과는 'E5_PEFT_RAG_old2.csv'에 저장되었습니다.
                question                          reference  \
0   동서울대 컴퓨터소프트웨어과 전화번호?  컴퓨터소프트웨어과 전화번호는 031-720-2090 입니다.   
1      동서울대 컴퓨터전자과 전화번호?     컴퓨터전자과 전화번호는 031-720-2070 입니다.   
2       동서울대 항공기계과 전화번호?      항공기계과 전화번호는 031-720-2055 입니다.   
3      동서울대 미래자동차과 전화번호?     미래자동차과 전화번호는 031-720-2040 입니다.   
4       동서울대 전기공학과 전화번호?      전기공학과 전화번호는 031-720-2060 입니다.   

                                           generated      bleu    meteor  \
0             동서울대 컴퓨터소프트웨어과 전화번호는 031-720-2090 입니다.  0.668740  0.967988   
1                동서울대 컴퓨터전자과 전화번호는 031-720-2070 입니다.  0.668740  0.967988   
2                 동서울대 항공기계과 전화번호는 031-720-2055 입니다.  0.668740  0.967988   
3  human: 동서울대 미래자동차과 전화번호는 031-720-2040 입니다.\n h...  0.212006  0.781250   
4                 동서울대 전기공학과 전화번호는 031-720-2060 입니다.  0.668740  0.967988   

    rouge_1   rouge_2   rouge_L  
0  1.000000  1.000000  1.000000  
1  1.000000  1.000000  1.000000  
2  1.000000  1.000


