In [1]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def bleu_eval(ref, hyp):
    """
    Computes the BLEU score for a text summary.

    Args:
        ref: A list of reference sentences.
        hyp: The summary sentence to be evaluated.

    Returns:
        The BLEU score for the summary.
    """
    # Tokenize the reference and summary sentences
    ref_tokens = [nltk.word_tokenize(sent.lower()) for sent in ref]
    hyp_tokens = nltk.word_tokenize(hyp.lower())

    # Compute the BLEU score for the summary
    weights = [0.25] * 4  # weights for BLEU-4
    smoothing_fn = SmoothingFunction().method1  # use method1 smoothing
    bleu_score = sentence_bleu(ref_tokens, hyp_tokens, weights, smoothing_function=smoothing_fn)

    return bleu_score


In [2]:
import nltk

def meteor_eval(ref, hyp):
    """
    Computes the METEOR score for a text summary.

    Args:
        ref: A list of reference sentences.
        hyp: The summary sentence to be evaluated.

    Returns:
        The METEOR score for the summary.
    """
    # Tokenize the reference and summary sentences
    ref_tokens = [nltk.word_tokenize(sent.lower()) for sent in ref]
    hyp_tokens = nltk.word_tokenize(hyp.lower())

    # Compute the METEOR score for the summary
    meteor_score = nltk.translate.meteor_score.meteor_score(ref_tokens, hyp_tokens)

    return meteor_score


In [3]:
ref = ["The cat sat on the mat.", "The dog ate my homework."]
hyp = "The cat ate the dog."

bleu_score = bleu_eval(ref, hyp)
print("BLEU score:", bleu_score)


meteor_score = meteor_eval(ref, hyp)
print("MEREOR score:", meteor_score)

BLEU score: 0.13512001548070346
MEREOR score: 0.5260416666666666
