In [1]:
def lcs_length(a, b):
    """Compute the length of the Longest Common Subsequence (LCS) between two token lists."""
    m, n = len(a), len(b)
    dp = [[0] * (n + 1) for _ in range(m + 1)]

    for i in range(m):
        for j in range(n):
            if a[i] == b[j]:
                dp[i+1][j+1] = dp[i][j] + 1
            else:
                dp[i+1][j+1] = max(dp[i][j+1], dp[i+1][j])

    return dp[m][n]


In [2]:
def rouge_l(candidate, reference):
    """Compute ROUGE-L score (precision, recall, F1) between two strings."""
    cand_tokens = candidate.strip().split()
    ref_tokens = reference.strip().split()

    lcs = lcs_length(cand_tokens, ref_tokens)

    prec = lcs / len(cand_tokens) if cand_tokens else 0.0
    rec = lcs / len(ref_tokens) if ref_tokens else 0.0

    if prec + rec == 0:
        f1 = 0.0
    else:
        f1 = (2 * prec * rec) / (prec + rec)

    return {"precision": prec, "recall": rec, "f1": f1}


In [3]:
cand = "the cat jump on the mat"
ref = "the cat sat on the mat"
print(rouge_l(cand, ref))


{'precision': 0.8333333333333334, 'recall': 0.8333333333333334, 'f1': 0.8333333333333334}
