#1. 필요한 라이브러리 임포트

In [None]:
# KoNLPy와 Kkma 설치
!pip install konlpy

# Java 설치 (KoNLPy에서 Kkma를 사용하기 위해 필요)
!apt-get install -y openjdk-11-jdk

# JPype1 설치 (KoNLPy가 Java와 상호작용하기 위해 필요)
!pip install jpype1

# KoNLPy에서 필요한 추가 파일 설치
!apt-get install -y curl
!bash <(curl -s https://raw.githubusercontent.com/konlpy/konlpy/master/scripts/mecab.sh)

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
openjdk-11-jdk is already the newest version (11.0.24+8-1ubuntu3~22.04).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


#2. 점수 계산 함수 구현

In [2]:
from konlpy.tag import Kkma
from collections import Counter
import math

# Kkma 형태소 분석기 생성
kkma = Kkma()

# n-gram 생성 함수
def get_ngrams(tokens, n):
    return [tuple(tokens[i:i+n]) for i in range(len(tokens)-n+1)]

# ROUGE 계산 함수
def calculate_rouge(reference, candidate, n=1):
    # n-gram 생성
    ref_ngrams = get_ngrams(reference, n)
    cand_ngrams = get_ngrams(candidate, n)

    # n-gram 카운트
    ref_counter = Counter(ref_ngrams)
    cand_counter = Counter(cand_ngrams)

    # 교집합 개수
    overlap = sum((ref_counter & cand_counter).values())

    # Precision, Recall, F1 계산
    if len(cand_ngrams) == 0:
        precision = 0.0
    else:
        precision = overlap / len(cand_ngrams)

    if len(ref_ngrams) == 0:
        recall = 0.0
    else:
        recall = overlap / len(ref_ngrams)

    if precision + recall == 0:
        f1_score = 0.0
    else:
        f1_score = 2 * precision * recall / (precision + recall)

    return precision, recall, f1_score

# BLEU 계산 함수
def calculate_bleu(reference, candidate, max_n=4):
    precisions = []
    for n in range(1, max_n+1):
        ref_ngrams = get_ngrams(reference, n)
        cand_ngrams = get_ngrams(candidate, n)

        ref_counter = Counter(ref_ngrams)
        cand_counter = Counter(cand_ngrams)

        overlap = sum((ref_counter & cand_counter).values())
        precision = overlap / len(cand_ngrams) if len(cand_ngrams) > 0 else 0
        precisions.append(precision)

    # Geometric mean of the precisions
    if all(p == 0 for p in precisions):
        bleu_score = 0
    else:
        bleu_score = math.exp(sum([math.log(p) if p > 0 else -999999 for p in precisions]) / max_n)

    # Brevity Penalty
    ref_len = len(reference)
    cand_len = len(candidate)
    brevity_penalty = math.exp(1 - ref_len / cand_len) if cand_len < ref_len else 1

    bleu_score *= brevity_penalty

    return bleu_score
# ROUGE-L 계산 (Longest Common Subsequence 기반)
def lcs(X, Y):
    m = len(X)
    n = len(Y)
    dp = [[0] * (n + 1) for i in range(m + 1)]

    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if X[i - 1] == Y[j - 1]:
                dp[i][j] = dp[i - 1][j - 1] + 1
            else:
                dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])

    return dp[m][n]

ROUGE-1 -> Precision: 1.0, Recall: 1.0, F1: 1.0
ROUGE-2 -> Precision: 0.75, Recall: 0.75, F1: 0.75
ROUGE-L -> Precision: 0.7777777777777778, Recall: 0.7777777777777778, F1: 0.7777777777777778
BLEU -> Score: 0.48109772909788073


#3. 점수 계산해보기

In [3]:
# 형태소 분석 결과 생성
reference = "고양이가 나무 위로 올라갔다."
candidate = "나무 위로 고양이가 올라갔다."

reference_tokens = kkma.morphs(reference)
candidate_tokens = kkma.morphs(candidate)
print("문장 1 형태소 분석 결과: ", reference_tokens)
print("문장 2 형태소 분석 결과: ", candidate_tokens)
# ROUGE-1 계산
precision_rouge1, recall_rouge1, f1_rouge1 = calculate_rouge(reference_tokens, candidate_tokens, n=1)
print(f"ROUGE-1 -> Precision: {precision_rouge1}, Recall: {recall_rouge1}, F1: {f1_rouge1}")

# ROUGE-2 계산
precision_rouge2, recall_rouge2, f1_rouge2 = calculate_rouge(reference_tokens, candidate_tokens, n=2)
print(f"ROUGE-2 -> Precision: {precision_rouge2}, Recall: {recall_rouge2}, F1: {f1_rouge2}")


lcs_length = lcs(reference_tokens, candidate_tokens)
precision_rougeL = lcs_length / len(candidate_tokens) if len(candidate_tokens) > 0 else 0
recall_rougeL = lcs_length / len(reference_tokens) if len(reference_tokens) > 0 else 0
f1_rougeL = 2 * precision_rougeL * recall_rougeL / (precision_rougeL + recall_rougeL) if precision_rougeL + recall_rougeL > 0 else 0

print(f"ROUGE-L -> Precision: {precision_rougeL}, Recall: {recall_rougeL}, F1: {f1_rougeL}")

# BLEU 계산
bleu_score = calculate_bleu(reference_tokens, candidate_tokens, max_n=4)
print(f"BLEU -> Score: {bleu_score}")

문장 1 형태소 분석 결과:  ['고양이', '가', '나무', '위', '로', '올라가', '었', '다', '.']
문장 2 형태소 분석 결과:  ['나무', '위', '로', '고양이', '가', '올라가', '었', '다', '.']
ROUGE-1 -> Precision: 1.0, Recall: 1.0, F1: 1.0
ROUGE-2 -> Precision: 0.75, Recall: 0.75, F1: 0.75
ROUGE-L -> Precision: 0.7777777777777778, Recall: 0.7777777777777778, F1: 0.7777777777777778
BLEU -> Score: 0.48109772909788073
