<a href="https://colab.research.google.com/github/rickiepark/MLQandAI/blob/main/supplementary/q19-evaluation-llms/rouge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ROUGE-1 (유니그램 ROUGE)

In [1]:
reference = "The quick brown fox jumps over the lazy dog"
candidate = "The fox jumps over the dog"

### `rouge` 라이브러리

In [2]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [3]:
from rouge import Rouge

rouge = Rouge()

scores = rouge.get_scores(candidate, reference, avg=True)

print(f"ROUGE-1 재현율: {scores['rouge-1']['r']:.2f}")
print(f"ROUGE-1 정밀도: {scores['rouge-1']['p']:.2f}")
print(f"ROUGE-1 F1: {scores['rouge-1']['f']:.2f}")

ROUGE-1 재현율: 0.67
ROUGE-1 정밀도: 1.00
ROUGE-1 F1: 0.80


### TorchMetrics

In [5]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)
Downloading torchmetrics-1.6.1-py3-none-any.whl (927 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m927.3/927.3 kB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.9-py3-none-any.whl (28 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.11.9 torchmetrics-1.6.1


In [7]:
from torchmetrics.text import ROUGEScore

rouge = ROUGEScore(rouge_keys='rouge1')

# ROUGE 점수를 계산합니다.
rouge_score = rouge(target=[reference], preds=[candidate])

print(f"ROUGE-1 재현율: {rouge_score['rouge1_recall']:.2f}")
print(f"ROUGE-1 정밀도: {rouge_score['rouge1_precision']:.2f}")
print(f"ROUGE-1 F1: {rouge_score['rouge1_fmeasure']:.2f}")

ROUGE-1 재현율: 0.67
ROUGE-1 정밀도: 1.00
ROUGE-1 F1: 0.80


### 직접 구현하기

In [8]:
from collections import Counter

def tokenize(sentence):
    return sentence.lower().split()

def ngrams(tokens, n):
    return [tuple(tokens[i:i+n]) for i in range(len(tokens)-n+1)]

def rouge_1(candidate, reference):
    candidate_tokens = tokenize(candidate)
    reference_tokens = tokenize(reference)

    candidate_1grams = Counter(ngrams(candidate_tokens, 1))
    reference_1grams = Counter(ngrams(reference_tokens, 1))

    overlapping_1grams = candidate_1grams & reference_1grams
    overlap_count = sum(overlapping_1grams.values())

    candidate_count = sum(candidate_1grams.values())
    reference_count = sum(reference_1grams.values())

    if candidate_count == 0 or reference_count == 0:
        return 0

    precision = overlap_count / candidate_count
    recall = overlap_count / reference_count
    f1_score = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0

    return precision, recall, f1_score


precision, recall, f1_score = rouge_1(candidate, reference)

print(f"ROUGE-1 재현율: {recall:.2f}")
print(f"ROUGE-1 정밀도: {precision:.2f}")
print(f"ROUGE-1 F1: {f1_score:.2f}")

ROUGE-1 재현율: 0.67
ROUGE-1 정밀도: 1.00
ROUGE-1 F1: 0.80
