In [None]:
!pip install rouge-score
!pip install python-Levenshtein
!pip install bert_score

In [31]:
import os
import pandas as pd
from nltk.translate.bleu_score import corpus_bleu
from rouge_score import rouge_scorer
import Levenshtein
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from bert_score import score

In [33]:
excel_file_path = '/content/100-lt-nematyti-test.xlsx'
df = pd.read_excel(excel_file_path)
df.dropna(subset=['original', 'mT5'], inplace=True)

# BLEU

In [34]:
original_sentences = df['original'].tolist()
simplified_sentences = df['mT5'].tolist()
reference_sentences = [[sentence.split()] for sentence in original_sentences]
hypothesis_sentences = [sentence.split() for sentence in simplified_sentences]
bleu_score = corpus_bleu(reference_sentences, hypothesis_sentences)
print(f'BLEU Score: {bleu_score:.4f}')


BLEU Score: 0.3933


# ROUGE

In [35]:
def calculate_corpus_rouge(df):
    references = df['original'].tolist()
    hypotheses = df['mT5'].tolist()
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge1_scores = []
    rouge2_scores = []
    rougeL_scores = []
    for reference, hypothesis in zip(references, hypotheses):
        scores = scorer.score(reference, hypothesis)
        rouge1_scores.append(scores['rouge1'].fmeasure)
        rouge2_scores.append(scores['rouge2'].fmeasure)
        rougeL_scores.append(scores['rougeL'].fmeasure)
    avg_rouge1 = sum(rouge1_scores) / len(rouge1_scores)
    avg_rouge2 = sum(rouge2_scores) / len(rouge2_scores)
    avg_rougeL = sum(rougeL_scores) / len(rougeL_scores)
    print(f'Average ROUGE-1 F-score : {avg_rouge1:.4f}')
    print(f'Average ROUGE-2 F-score : {avg_rouge2:.4f}')
    print(f'Average ROUGE-L F-score : {avg_rougeL:.4f}')

calculate_corpus_rouge(df)


Average ROUGE-1 F-score : 0.8333
Average ROUGE-2 F-score : 0.7548
Average ROUGE-L F-score : 0.8333


# Žodžiai

In [36]:
def calculate_sentences_and_words(df):
    num_original_sentences = df['original'].shape[0]
    num_generated_sentences = df['mT5'].shape[0]
    original_words_count = df['original'].apply(lambda x: len(str(x).split())).sum()
    generated_words_count = df['mT5'].apply(lambda x: len(str(x).split())).sum()
    return num_original_sentences, num_generated_sentences, original_words_count, generated_words_count

num_original_sentences, num_generated_sentences, original_words_count, generated_words_count = calculate_sentences_and_words(df)
print("Originalių sakinių skaičius:", num_original_sentences)
print("Supaprastintų sakinių skaičius:", num_generated_sentences)
print("Originalių žodžių skaičius:", original_words_count)
print("Supaprastintų žodžių skaičius:", generated_words_count)

Originalių sakinių skaičius: 3
Supaprastintų sakinių skaičius: 3
Originalių žodžių skaičius: 64
Supaprastintų žodžių skaičius: 48


# BERTscore

In [38]:
references = df['original'].tolist()
hypotheses = df['mT5'].tolist()
P, R, F1 = score(hypotheses, references, lang='lt', model_type='bert-large-uncased')
average_P = P.mean().item()
average_R = R.mean().item()
average_F1 = F1.mean().item()
print(f'BERTScore Precision: {average_P}')
print(f'BERTScore Recall: {average_R}')
print(f'BERTScore F1: {average_F1}')

BERTScore Precision: 0.9083678722381592
BERTScore Recall: 0.8198363184928894
BERTScore F1: 0.8612228035926819


# MeaningBERT

In [39]:
tokenizer = AutoTokenizer.from_pretrained("davebulaval/MeaningBERT")
model = AutoModelForSequenceClassification.from_pretrained("davebulaval/MeaningBERT")
original_sentences = df['original'].tolist()
translated_sentences = df['mT5'].tolist()
scores = []
for orig_sent, trans_sent in zip(original_sentences, translated_sentences):
    inputs = tokenizer(orig_sent, trans_sent, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    scores.append(logits.item())
average_score = sum(scores) / len(scores)
print(f"Average MeaningBERT Score: {average_score}")


Average MeaningBERT Score: 83.6210428873698


# ROLD+proportions

In [40]:
def replace_only_levenshtein(s1, s2):
    len_s1 = len(str(s1))
    len_s2 = len(str(s2))
    dp = [[0] * (len_s2 + 1) for _ in range(len_s1 + 1)]
    for i in range(len_s1 + 1):
        dp[i][0] = i
    for j in range(len_s2 + 1):
        dp[0][j] = j
    for i in range(1, len_s1 + 1):
        for j in range(1, len_s2 + 1):
            if str(s1)[i - 1] == str(s2)[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]
            else:
                dp[i][j] = dp[i - 1][j - 1] + 1
    return dp[len_s1][len_s2]

df['ROLD'] = df.apply(lambda row: replace_only_levenshtein(row['original'], row['mT5']), axis=1)

def calculate_word_operations(df):
    df['Operations'] = df.apply(lambda row: Levenshtein.editops(str(row['original']), str(row['mT5'])), axis=1)
    df['Original_Words'] = df['original'].apply(lambda x: len(x.split()))
    df['Generated_Words'] = df['mT5'].apply(lambda x: len(x.split()))
    df['Deleted'] = df['Operations'].apply(lambda ops: sum(1 for op in ops if op[0] == 'delete'))
    df['Added'] = df['Operations'].apply(lambda ops: sum(1 for op in ops if op[0] == 'insert'))
    df['Reordered'] = df['Operations'].apply(lambda ops: sum(1 for op in ops if op[0] == 'replace'))
    df['Proportion_Deleted'] = df['Deleted'] / df['Original_Words']
    df['Proportion_Added'] = df['Added'] / df['Generated_Words']
    df['Proportion_Reordered'] = df['Reordered'] / df['Original_Words']
    return df

df = calculate_word_operations(df)

output_file_path = '/content/processed_data.xlsx'

df.to_excel(output_file_path, index=False)

print(f"Išsaugota {output_file_path}")


Išsaugota /content/processed_data.xlsx
