In [1]:
from fastapi import FastAPI, HTTPException
from fastapi.responses import RedirectResponse
from pydantic import BaseModel
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from rouge_score import rouge_scorer

app = FastAPI()



In [2]:
# 평가 지표 계산 함수
def calculate_bleu(reference: str, generated: str):
    reference_tokens = reference.split()  # 참고 문장 토큰화
    generated_tokens = generated.split()  # 생성된 문장 토큰화
    return sentence_bleu([reference_tokens], generated_tokens)

def calculate_meteor(reference: str, generated: str):
    return meteor_score([reference], generated)

def calculate_rouge(reference: str, generated: str):
    scorer = rouge_scorer.RougeScorer(metrics=["rouge1", "rouge2", "rougeL"], use_stemmer=True)
    scores = scorer.score(reference, generated)
    return {
        "rouge_1": scores["rouge1"].fmeasure,
        "rouge_2": scores["rouge2"].fmeasure,
        "rouge_L": scores["rougeL"].fmeasure
    }



In [4]:
# 평가를 위한 함수
def evaluate_model_responses(csv_file: str):
    df = pd.read_csv(csv_file)
    
    evaluation_results = []
    
    for idx, row in df.iterrows():
        question = row[0]  # 첫 번째 열: 질문
        reference = row[1]  # 두 번째 열: 정답(label)

        # 모델의 응답을 FastAPI로 가져오기
        response = rag_query(QueryRequest(question=question))  # FastAPI 엔드포인트 호출

        generated_response = response['answer']  # FastAPI에서 반환된 응답

        # BLEU, METEOR, ROUGE 계산
        bleu_score = calculate_bleu(reference, generated_response)
        meteor_score_value = calculate_meteor(reference, generated_response)
        rouge_scores = calculate_rouge(reference, generated_response)

        evaluation_results.append({
            "question": question,
            "reference": reference,
            "generated": generated_response,
            "bleu": bleu_score,
            "meteor": meteor_score_value,
            "rouge_1": rouge_scores["rouge_1"],
            "rouge_2": rouge_scores["rouge_2"],
            "rouge_L": rouge_scores["rouge_L"]
        })
    
    results_df = pd.DataFrame(evaluation_results)
    results_df.to_csv("evaluation_results.csv", index=False)

    return results_df

In [None]:
@app.post("/evaluate-model")
async def evaluate_model():
    try:
        # 평가 실행
        results = evaluate_model_responses("evaluation.csv")
        return {"evaluation_results": results.to_dict(orient="records")}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))