In [3]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq
from torch.utils.data import DataLoader
from torch.optim import AdamW
from evaluate import load
from seq2seq import create_transformers_train_data, train_transformer, decode_with_transformer

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [5]:
data = pd.read_csv('../yelp_parallel/yelp_parallel/test_en_parallel.txt', sep='\t')

In [6]:
negative = data["Style 1"].values.tolist()
positive = data["Style 2"].values.tolist()

In [7]:
bleu = load("bleu")
bertscore = load("bertscore")

In [33]:
def run_experiment(model_name, negative, positive, lr=0.001, epochs=5, num_examples=30, batch_size=256, device=None):
    print(f"Model: {model_name}, Learning rate: {lr}, Epochs: {epochs}")

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

    train_dataset = create_transformers_train_data(negative, positive, tokenizer)
    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=data_collator)

    optimizer = AdamW(model.parameters(), lr=lr)
    train_transformer(model, train_loader, optimizer, epochs, device=device)

    all_predictions = []
    all_references = []
    all_inputs = []

    for i in range(num_examples):
        input_text = negative[i]
        reference = positive[i]

        prediction = decode_with_transformer(input_text, tokenizer, model)

        all_inputs.append(input_text)
        all_predictions.append(prediction)
        all_references.append(reference)

        # print("Negative: ", input_text)
        # print("Prediction: ", prediction)
        # print("Positive: ", reference)

    bleu_score = bleu.compute(
        predictions=all_predictions,
        references=all_references
    )

    bert_score = bertscore.compute(
        predictions=all_predictions,
        references=all_references,
        lang="en" #model_type='microsoft/deberta-xlarge-mnli' predolgo trae koga se koristi ovoj model_type pa zatoa koristam lang='en' bidejki samiot bert_score barashe barem edno od ovie da bide navedeno
    )

    avg_bert_f1 = sum(bert_score["f1"]) / len(bert_score["f1"])

    print("BLEU:", bleu_score)
    print("BERTScore f1: ", avg_bert_f1)
    # print("BERTScore: ", bert_score) premnogu golem output dava pa go skrativ za da bide popregledno

    return {
        "bleu": bleu_score,
        "BERTScore average f1": avg_bert_f1,
        # "bertscore": bert_score,
    }

In [34]:
test = run_experiment("t5-small", negative, positive, lr=0.001, epochs=3,num_examples=30,device=device)

Model: t5-small, Learning rate: 0.001, Epochs: 3




Epoch 1/3, Loss: 2.9925
Epoch 2/3, Loss: 2.3542
Epoch 3/3, Loss: 2.1506
BLEU: {'bleu': 0.2867368699753908, 'precisions': [0.5934579439252337, 0.41304347826086957, 0.3051948051948052, 0.22580645161290322], 'brevity_penalty': 0.7953508327485446, 'length_ratio': 0.8136882129277566, 'translation_length': 214, 'reference_length': 263}
BERTScore f1:  0.9140923142433166


In [36]:
results = []

In [37]:
results.append(run_experiment("t5-small", negative, positive, lr=0.001, epochs=3,num_examples=300,device=device))
results.append(run_experiment("t5-small", negative, positive, lr=0.001, epochs=5,num_examples=300,device=device))
results.append(run_experiment("t5-small", negative, positive, lr=0.0001, epochs=10,num_examples=300,device=device))

Model: t5-small, Learning rate: 0.001, Epochs: 3




Epoch 1/3, Loss: 2.9939
Epoch 2/3, Loss: 2.3969
Epoch 3/3, Loss: 2.1875
BLEU: {'bleu': 0.25250003970319285, 'precisions': [0.5709555345316935, 0.4013230429988975, 0.29260237780713344, 0.21499176276771004], 'brevity_penalty': 0.7287229327644853, 'length_ratio': 0.7596119295724039, 'translation_length': 2114, 'reference_length': 2783}
BERTScore f1:  0.9128046178817749
Model: t5-small, Learning rate: 0.001, Epochs: 5
Epoch 1/5, Loss: 3.0066
Epoch 2/5, Loss: 2.3683
Epoch 3/5, Loss: 2.1621
Epoch 4/5, Loss: 2.0051
Epoch 5/5, Loss: 1.8727
BLEU: {'bleu': 0.2594070106196202, 'precisions': [0.5810684161199625, 0.4116684841875682, 0.2953063885267275, 0.21636952998379255], 'brevity_penalty': 0.7377695903399782, 'length_ratio': 0.766798418972332, 'translation_length': 2134, 'reference_length': 2783}
BERTScore f1:  0.9149351690212886
Model: t5-small, Learning rate: 0.0001, Epochs: 10
Epoch 1/10, Loss: 3.5105
Epoch 2/10, Loss: 3.1570
Epoch 3/10, Loss: 2.9386
Epoch 4/10, Loss: 2.8073
Epoch 5/10, Loss:

In [38]:
results.append(run_experiment("google/flan-t5-small", negative, positive, lr=0.001, epochs=3,num_examples=300,device=device))
results.append(run_experiment("google/flan-t5-small", negative, positive, lr=0.001, epochs=5,num_examples=300,device=device))
results.append(run_experiment("google/flan-t5-small", negative, positive, lr=0.0001, epochs=10,num_examples=300,device=device))

Model: google/flan-t5-small, Learning rate: 0.001, Epochs: 3




Epoch 1/3, Loss: 2.5855
Epoch 2/3, Loss: 2.0798
Epoch 3/3, Loss: 1.8712
BLEU: {'bleu': 0.2599353402769382, 'precisions': [0.5741000467508182, 0.4067427949972811, 0.2969460688758934, 0.2195318805488297], 'brevity_penalty': 0.7400220700644776, 'length_ratio': 0.768595041322314, 'translation_length': 2139, 'reference_length': 2783}
BERTScore f1:  0.9138826098044713
Model: google/flan-t5-small, Learning rate: 0.001, Epochs: 5
Epoch 1/5, Loss: 2.5633
Epoch 2/5, Loss: 2.0781
Epoch 3/5, Loss: 1.8683
Epoch 4/5, Loss: 1.7021
Epoch 5/5, Loss: 1.5397
BLEU: {'bleu': 0.279262527507512, 'precisions': [0.5868971792538672, 0.42571127502634354, 0.311639549436796, 0.22650231124807396], 'brevity_penalty': 0.766323368368248, 'length_ratio': 0.789795185052102, 'translation_length': 2198, 'reference_length': 2783}
BERTScore f1:  0.9174800852934519
Model: google/flan-t5-small, Learning rate: 0.0001, Epochs: 10
Epoch 1/10, Loss: 2.9174
Epoch 2/10, Loss: 2.6556
Epoch 3/10, Loss: 2.4727
Epoch 4/10, Loss: 2.3624

In [39]:
for i in range(len(results)):
    print(f"{i}: {results[i]}")

0: {'bleu': {'bleu': 0.25250003970319285, 'precisions': [0.5709555345316935, 0.4013230429988975, 0.29260237780713344, 0.21499176276771004], 'brevity_penalty': 0.7287229327644853, 'length_ratio': 0.7596119295724039, 'translation_length': 2114, 'reference_length': 2783}, 'BERTScore average f1': 0.9128046178817749}
1: {'bleu': {'bleu': 0.2594070106196202, 'precisions': [0.5810684161199625, 0.4116684841875682, 0.2953063885267275, 0.21636952998379255], 'brevity_penalty': 0.7377695903399782, 'length_ratio': 0.766798418972332, 'translation_length': 2134, 'reference_length': 2783}, 'BERTScore average f1': 0.9149351690212886}
2: {'bleu': {'bleu': 0.2281950452628886, 'precisions': [0.5460557392536608, 0.3670886075949367, 0.25774555042847724, 0.18472906403940886], 'brevity_penalty': 0.7300836761145013, 'length_ratio': 0.7606899029823931, 'translation_length': 2117, 'reference_length': 2783}, 'BERTScore average f1': 0.9090086128314336}
3: {'bleu': {'bleu': 0.2599353402769382, 'precisions': [0.5741

In [None]:
instruction_prompt = "Translate this negative review into a positive one: "

In [None]:
instructional_negative = [instruction_prompt + sentence for sentence in negative]