In [1]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq
from torch.utils.data import DataLoader
from torch.optim import AdamW
from evaluate import load
from seq2seq import create_transformers_train_data, train_transformer, decode_with_transformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [3]:
data = pd.read_csv('../yelp_parallel/yelp_parallel/test_en_parallel.txt', sep='\t')

In [4]:
data.head()

Unnamed: 0,Style 1,Style 2
0,ever since joes has changed hands it's just go...,Ever since joes has changed hands it's gotten ...
1,there is definitely not enough room in that pa...,There is so much room in that part of the venue
2,so basically tasted watered down.,It didn't taste watered down at all.
3,she said she'd be back and disappeared for a f...,"She said she'd be back, and didn't disappear a..."
4,i can't believe how inconsiderate this pharmac...,This pharmacy is really considerate.


In [5]:
negative = data["Style 1"].values.tolist()
positive = data["Style 2"].values.tolist()

T5

In [6]:
bleu = load("bleu")
bertscore = load("bertscore")

In [7]:
model_name = "t5-small"

In [8]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [10]:
train_dataset = create_transformers_train_data(negative, positive, tokenizer)



In [11]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, collate_fn=data_collator)

In [12]:
optimizer = AdamW(model.parameters(), lr=0.001)

In [13]:
train_transformer(model, train_loader, optimizer, 5, device=device)

Epoch 1/5, Loss: 2.9907
Epoch 2/5, Loss: 2.3558
Epoch 3/5, Loss: 2.1473
Epoch 4/5, Loss: 2.0050
Epoch 5/5, Loss: 1.8778


In [14]:
predicted_sentence = decode_with_transformer(negative[0], tokenizer, model)
predicted_sentence



'ever since joes has changed hands'

In [15]:
reference_sentence = positive[0]
reference_sentence

"Ever since joes has changed hands it's gotten better and better."

In [16]:
bleu.compute(predictions=[predicted_sentence], references=[reference_sentence])

{'bleu': 0.27952792741962756,
 'precisions': [0.8333333333333334, 0.8, 0.75, 0.6666666666666666],
 'brevity_penalty': 0.36787944117144233,
 'length_ratio': 0.5,
 'translation_length': 6,
 'reference_length': 12}

In [17]:
bert_result = bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence], lang="en") # model_type='microsoft/deberta-xlarge-mnli' predolgo trae koga se koristi ovoj model_type

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [18]:
bert_result

{'precision': [0.9461432695388794],
 'recall': [0.90846848487854],
 'f1': [0.9269232153892517],
 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.57.3)'}

Probuvanje so razlichni hiperparametri(learning rate i epochs)

lr=0.0001

In [None]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, collate_fn=data_collator)

In [None]:
optimizer = AdamW(model.parameters(), lr=0.0001)

In [None]:
train_transformer(model, train_loader, optimizer, 5, device=device)

In [None]:
predicted_sentence = decode_with_transformer(negative[0], tokenizer, model)
predicted_sentence

In [None]:
reference_sentence = positive[0]
reference_sentence

In [None]:
bleu.compute(predictions=[predicted_sentence], references=[reference_sentence])

In [None]:
bert_result = bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence],
                                lang="en")  # model_type='microsoft/deberta-xlarge-mnli' predolgo trae koga se koristi ovoj model_type

In [None]:
bert_result

lr=0.0001 epochs = 10

In [None]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, collate_fn=data_collator)
optimizer = AdamW(model.parameters(), lr=0.0001)

In [None]:
train_transformer(model, train_loader, optimizer, 10, device=device)

In [None]:
predicted_sentence = decode_with_transformer(negative[0], tokenizer, model)
predicted_sentence

In [None]:
reference_sentence = positive[0]
reference_sentence

In [None]:
bleu.compute(predictions=[predicted_sentence], references=[reference_sentence])

In [None]:
bert_result = bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence],
                                lang="en")  # model_type='microsoft/deberta-xlarge-mnli' predolgo trae koga se koristi ovoj model_type

In [None]:
bert_result

lr=0.001 epochs=3

In [None]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, collate_fn=data_collator)

In [None]:
optimizer = AdamW(model.parameters(), lr=0.001)

In [None]:
train_transformer(model, train_loader, optimizer, 3, device=device)

In [None]:
predicted_sentence = decode_with_transformer(negative[0], tokenizer, model)
predicted_sentence

In [None]:
reference_sentence = positive[0]
reference_sentence

In [None]:
bleu.compute(predictions=[predicted_sentence], references=[reference_sentence])

In [None]:
bert_result = bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence],
                                lang="en")  # model_type='microsoft/deberta-xlarge-mnli' predolgo trae koga se koristi ovoj model_type

In [None]:
bert_result

Flan-t5

In [19]:
model_name = "google/flan-t5-small"

In [20]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

In [21]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [22]:
train_dataset = create_transformers_train_data(negative, positive, tokenizer)



In [23]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [24]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, collate_fn=data_collator)

In [25]:
optimizer = AdamW(model.parameters(), lr=0.001)

In [26]:
train_transformer(model, train_loader, optimizer, 5, device=device)

Epoch 1/5, Loss: 2.6183
Epoch 2/5, Loss: 2.0917
Epoch 3/5, Loss: 1.8857
Epoch 4/5, Loss: 1.7017
Epoch 5/5, Loss: 1.5556


In [27]:
predicted_sentence = decode_with_transformer(negative[0], tokenizer, model)
predicted_sentence



'Ever since joes has changed hands'

In [28]:
reference_sentence = positive[0]
reference_sentence

"Ever since joes has changed hands it's gotten better and better."

In [29]:
bleu.compute(predictions=[predicted_sentence], references=[reference_sentence])

{'bleu': 0.36787944117144233,
 'precisions': [1.0, 1.0, 1.0, 1.0],
 'brevity_penalty': 0.36787944117144233,
 'length_ratio': 0.5,
 'translation_length': 6,
 'reference_length': 12}

In [30]:
bert_result = bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence],
                                lang="en")  # model_type='microsoft/deberta-xlarge-mnli' predolgo trae koga se koristi ovoj model_type

In [32]:
bert_result

{'precision': [0.9449430108070374],
 'recall': [0.9070550203323364],
 'f1': [0.9256114363670349],
 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.57.3)'}

lr=0.0001

In [None]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, collate_fn=data_collator)

In [None]:
optimizer = AdamW(model.parameters(), lr=0.0001)

In [None]:
train_transformer(model, train_loader, optimizer, 5, device=device)

In [None]:
predicted_sentence = decode_with_transformer(negative[0], tokenizer, model)
predicted_sentence

In [None]:
reference_sentence = positive[0]
reference_sentence

In [None]:
bleu.compute(predictions=[predicted_sentence], references=[reference_sentence])

In [None]:
bert_result = bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence],
                                lang="en")  # model_type='microsoft/deberta-xlarge-mnli' predolgo trae koga se koristi ovoj model_type

In [None]:
bert_result

lr=0.0001 epochs=10

In [None]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, collate_fn=data_collator)

In [None]:
optimizer = AdamW(model.parameters(), lr=0.0001)

In [None]:
train_transformer(model, train_loader, optimizer, 10, device=device)

In [None]:
predicted_sentence = decode_with_transformer(negative[0], tokenizer, model)
predicted_sentence

In [None]:
reference_sentence = positive[0]
reference_sentence

In [None]:
bleu.compute(predictions=[predicted_sentence], references=[reference_sentence])

In [None]:
bert_result = bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence],
                                lang="en")  # model_type='microsoft/deberta-xlarge-mnli' predolgo trae koga se koristi ovoj model_type

In [None]:
bert_result

lr=0.001 epochs=3

In [None]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, collate_fn=data_collator)

In [None]:
optimizer = AdamW(model.parameters(), lr=0.001)

In [None]:
train_transformer(model, train_loader, optimizer, 3, device=device)

In [None]:
predicted_sentence = decode_with_transformer(negative[0], tokenizer, model)
predicted_sentence

In [None]:
reference_sentence = positive[0]
reference_sentence

In [None]:
bleu.compute(predictions=[predicted_sentence], references=[reference_sentence])

In [None]:
bert_result = bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence],
                                lang="en")  # model_type='microsoft/deberta-xlarge-mnli' predolgo trae koga se koristi ovoj model_type

In [None]:
bert_result