In [1]:
import torch
from transformers import set_seed
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch import cuda
from sentence_transformers import SentenceTransformer, util
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

SEED = 42
set_seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

device = 'cuda' if cuda.is_available() else 'cpu'

model_name_fl = "textattack/roberta-base-CoLA"
model_name_sta = "SkolkovoInstitute/roberta_toxicity_classifier"
model_name_sim = "sentence-transformers/all-MiniLM-L6-v2"

PATH = "../data/inheritim/"

In [2]:
df = pd.read_csv(PATH + 'filtered.csv', index_col=0)
df.head()

Unnamed: 0,reference,translation,similarity,lenght_diff,ref_tox,trn_tox,avg_word_ref,avg_word_trans
0,"If Alkar is flooding her with psychic waste, t...","if Alkar floods her with her mental waste, it ...",0.785171,0.010309,0.981983,0.014195,15,16
1,Now you're getting nasty.,you're becoming disgusting.,0.749687,0.071429,0.999039,0.065473,4,3
2,"Well, we could spare your life, for one.","well, we can spare your life.",0.919051,0.268293,0.985068,0.213313,8,6
3,"Ah! Monkey, you've got to snap out of it.","monkey, you have to wake up.",0.664333,0.309524,0.994215,0.053362,9,6
4,I've got orders to put her down.,I have orders to kill her.,0.726639,0.181818,0.999348,0.009402,7,6


In [3]:
df_train, df_test = train_test_split(df, test_size=0.2, random_state=SEED)
df_test = df_test.sample(5000, random_state=SEED)
print(df_test.shape)

(5000, 8)


In [4]:
# load tokenizer and model weights
tokenizer_sta = AutoTokenizer.from_pretrained(model_name_sta)
model_sta = AutoModelForSequenceClassification.from_pretrained(model_name_sta).to(device)

tokenizer_sim = SentenceTransformer(model_name_sim).to(device)

model_fl = AutoModelForSequenceClassification.from_pretrained(model_name_fl).to(device)
tokenizer_fl = AutoTokenizer.from_pretrained(model_name_fl)

Some weights of the model checkpoint at SkolkovoInstitute/roberta_toxicity_classifier were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpo

In [5]:
def calculate_part_sta(batch: list, tokenizer, model):
    """
    calculate non-toxicity of the sentences
    :param batch: list of sentences
    :param tokenizer: tokenizer to encode sentences
    :param model: model to calculate non-toxicity
    :return: probability of each sentences
    """
    text = tokenizer(batch, return_tensors='pt', padding=True, truncation=True).to(device)
    out = model(**text)
    return torch.softmax(out.logits, dim=-1)[:, 0].cpu()

def calculate_part_score_fl(batch, tokenizer, model):
    """
    calculate fluency of the sentences
    :param batch: list of sentences
    :param tokenizer: tokenizer to encode sentences
    :param model: model to calculate fluency
    :return: probability of each sentences 
    """
    text = tokenizer(batch, return_tensors='pt', padding=True, truncation=True).to(device)
    out = model(**text)
    return torch.softmax(out.logits, dim=-1)[:, 1].cpu()

def calculate_sim(batch1, batch2, tokenizer):
    """
    calculate similarities of the sentences
    :param batch1: first list of sentences 
    :param batch2: second list of sentences
    :param tokenizer: tokenizer to encode sentences
    :return: cosine similarity of each sentences
    """
    embedding_1= tokenizer.encode(batch1, convert_to_tensor=True).to(device)
    embedding_2 = tokenizer.encode(batch2, convert_to_tensor=True).to(device)

    return util.pytorch_cos_sim(embedding_1, embedding_2).diagonal(0).cpu()

def calculate_joint_score(batch_reference, batch_predicted):
    """
    calculate total score of the sentences based on non-toxicity, fluency, similarity
    :param batch_reference: target list of sentences 
    :param batch_predicted: predicted list of sentences 
    :return: dict with values
    """
    sta = calculate_part_sta(batch_predicted, tokenizer_sta, model_sta)
    torch.cuda.empty_cache()
    
    fl  = calculate_part_score_fl(batch_predicted, tokenizer_fl, model_fl)
    torch.cuda.empty_cache()
    
    sim = calculate_sim(batch_reference, batch_predicted, tokenizer_sim)
    torch.cuda.empty_cache()
    
    return {
            "non-toxic": torch.sum(sta),
            "fluency":torch.sum(fl),
            "similarity": torch.sum(sim),
            "total": torch.sum(sta * fl * sim)
            }

In [6]:
calculate_joint_score(["fuck fuck fuck"], [ "They drank the pub."])

{'non-toxic': tensor(1.0000, grad_fn=<SumBackward0>),
 'fluency': tensor(0.0404, grad_fn=<SumBackward0>),
 'similarity': tensor(0.1598),
 'total': tensor(0.0065, grad_fn=<SumBackward0>)}

# Preparation for BERT

In [7]:
import re
import torch
import torch.nn as nn
import numpy as np
from transformers import BertTokenizer, BertModel
import warnings

warnings.filterwarnings('ignore')

PATH_OUT = "../models/bert-detoxification/"

SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

In [8]:
def clear_str(string: str) -> str:
    """
    Tokenization/string cleaning for text
    """
    string = re.sub('([.,!?()])', r' \1 ', string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip().lower()

In [9]:
class BertClassifier(nn.Module):
    def __init__(self, bert: BertModel, num_classes: int):
        super().__init__()
        self.bert = bert
        self.classifier = nn.Linear(bert.config.hidden_size, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_ids, attention_mask=None):
        x = self.bert(input_ids, attention_mask=attention_mask)
        cls_x = x[1] # sentence embedding. Pooler_output is the embedding of the [CLS] special token. It is considered as a valid representation of the complete sentence.
        cls_x = self.classifier(cls_x)
        # print(cls_x.shape)
        out = self.softmax(cls_x)
        return out


In [10]:
def prediction(model: BertClassifier, out_text: list, tokenizer) -> torch.Tensor:
    """
    function for doing prediciton
    :param model: Our model
    :param out_text: toxic text
    :param tokenizer: tokenizer
    :return: probabilities
    """
    with torch.no_grad():
        x = tokenizer(out_text, add_special_tokens=True, max_length=120, truncation=True, padding=True, return_tensors="pt").to(device)
        outputs = model(x["input_ids"], attention_mask=x["attention_mask"])
    return outputs


def inference(model: BertClassifier, input_text: str, tokenizer: BertTokenizer) -> str:
    """
    make inference
    :param model: Our model
    :param input_text: toxic text
    :param tokenizer: tokenizer
    :return: detoxified text
    """
    input_text = clear_str(input_text)
    while True:
        # find most toxic word
        input_text = input_text.split()
        out_text = [" ".join(input_text[:i] + ["<oov>"] + input_text[min(i + 1, len(input_text)):]) for i in range(len(input_text))]
        probs = prediction(model, out_text, tokenizer)
        idx = torch.argmax(probs[:, 0])

        # delete toxic word
        input_text = re.sub("\s*<oov>\s*", " ", out_text[idx]).strip()

        # check if sentence still toxic
        toxicity = prediction(model, [input_text], tokenizer)
        if torch.argmax(toxicity[0]) == 0:
            break
    input_text = re.sub(r'\s([?.!,¿](?:\s|$))', r'\1', input_text)
    return input_text


In [11]:
bert_model_name = 'bert-base-cased'
tokenizer = BertTokenizer.from_pretrained(bert_model_name)
bert      = BertModel.from_pretrained(bert_model_name)
model     = BertClassifier(bert, 2).to(device)

model.load_state_dict(torch.load(PATH_OUT + "model-final.pt"))
model.eval()

BertClassifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_af

## Calculate scores

In [12]:
non_toxicity_bert = 0
fluency_bert = 0
similarity_bert = 0
total_score_bert = 0

batch_size = 32
batch_pred = []
batch_true = []

for accum, (idx, item) in tqdm(enumerate(df_test.iterrows())):
    pred = inference(model, item.reference, tokenizer)
    
    target = clear_str(item.translation)
    target = re.sub(r'\s([?.!,¿](?:\s|$))', r'\1', target)

    batch_true.append(target)
    batch_pred.append(pred)

    if accum % batch_size == 0 or accum == df_test.shape[0] - 1:
        scores = calculate_joint_score(batch_true, batch_pred)

        non_toxicity_bert += scores["non-toxic"].item()
        fluency_bert += scores["fluency"].item()
        similarity_bert += scores["similarity"].item()
        total_score_bert += scores["total"].item()
        batch_true = []
        batch_pred = []

non_toxicity_bert = non_toxicity_bert / df_test.shape[0]
fluency_bert = fluency_bert / df_test.shape[0]
similarity_bert = similarity_bert / df_test.shape[0]
total_score_bert = total_score_bert / df_test.shape[0]

print(non_toxicity_bert, fluency_bert, similarity_bert, total_score_bert)

5000it [02:19, 35.79it/s]

0.9531837767124176 0.47177488119006156 0.6414214318633079 0.2928031578540802





# Preparation for encoder-decoder model

In [13]:
import torch
import torch.nn as nn

import numpy as np
import unicodedata
import re

In [14]:
df_test_gru = df_test.copy()

In [15]:
def unicode_to_ascii(s: str) -> str:
    """
    Normalizes latin chars with accent to their canonical decomposition

    :param s: input sentence
    :return: normalized sentence
    """
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                   if unicodedata.category(c) != 'Mn')

def preprocess_sentence(w) -> str:
    """
    preprocess the sentence
    :param w: input sentence
    :return: preprocessed sentence
    """
    w = unicode_to_ascii(w.lower().strip())
    # creating a space between a word and the punctuation following it
    # eg: "he is a boy." => "he is a boy ." 
    w = re.sub(r"([?.!,¿])", r" \1 ", w)
    w = re.sub(r'[" "]+', " ", w)
    # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)
    w = w.rstrip().strip()
    # adding a start and an end token to the sentence
    # so that the model know when to start and stop predicting.
    w = '<start> ' + w + ' <end>'
    return w


df['reference'] = df['reference'].apply(lambda w: preprocess_sentence(w))
df['translation'] = df['translation'].apply(lambda w: preprocess_sentence(w))

df_test_gru['reference'] = df_test_gru['reference'].apply(lambda w: preprocess_sentence(w))
df_test_gru['translation'] = df_test_gru['translation'].apply(lambda w: preprocess_sentence(w))

In [16]:
# This class creates a word -> index mapping (e.g,. "dad" -> 5) and vice-versa 
# (e.g., 5 -> "dad") for each language,
class LanguageIndex():
    def __init__(self, lang: list):
        """ lang are the list of phrases from each language """
        self.lang = lang
        self.word2idx = {}
        self.idx2word = {}
        self.vocab = set()
        self.create_index()

    def create_index(self):
        """ create word2idx, idx2word and vocab """
        for phrase in self.lang:
            # update with individual tokens
            self.vocab.update(phrase.split(' '))
        # sort the vocab
        self.vocab = sorted(self.vocab)
        # add a padding token with index 0
        self.word2idx['<pad>'] = 0
        # word to index mapping
        for index, word in enumerate(self.vocab):
            self.word2idx[word] = index + 1 # +1 because of pad token
        # index to word mapping
        for word, index in self.word2idx.items():
            self.idx2word[index] = word

In [17]:
def max_length(tensor: list) -> int:
    """
    calculate the max_length of input and output tensor
    :param tensor: input tensor
    :return: max length
    """
    return max(len(t) for t in tensor)


# index language using the class above
inp_lang = LanguageIndex(df['reference'].values.tolist())
targ_lang = LanguageIndex(df['translation'].values.tolist())

# Vectorize the input and target languages
input_tensor = [[inp_lang.word2idx[s] for s in reference.split(' ')]  for reference in df['reference'].values.tolist()]
target_tensor = [[targ_lang.word2idx[s] for s in translation.split(' ')]  for translation in df['translation'].values.tolist()]

max_length_inp, max_length_tar = max_length(input_tensor), max_length(target_tensor)

In [18]:
class Encoder(nn.Module):
    def __init__(self, vocab_size: int, embedding_dim: int, enc_units: int, batch_sz: int):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.gru = nn.GRU(self.embedding_dim, self.enc_units)

    def forward(self, x: torch.Tensor, device: torch.device) -> (torch.Tensor, torch.Tensor):
        # x: batch_size, max_length 

        # x: batch_size, max_length, embedding_dim
        x = self.embedding(x)

        self.hidden = self.initialize_hidden_state(device)

        # output: max_length, batch_size, enc_units
        # self.hidden: 1, batch_size, enc_units
        output, self.hidden = self.gru(x, self.hidden) # gru returns hidden state of all timesteps as well as hidden state at last timestep

        return output, self.hidden

    def initialize_hidden_state(self, device: torch.device) -> torch.Tensor:
        return torch.zeros((1, self.batch_sz, self.enc_units)).to(device)

In [19]:
class Decoder(nn.Module):
    def __init__(self, vocab_size: int, embedding_dim: int, dec_units: int, enc_units: int, batch_sz: int):
        super(Decoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.enc_units = enc_units
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.gru = nn.GRU(self.embedding_dim + self.enc_units,
                          self.dec_units,
                          batch_first=True)
        self.fc = nn.Linear(self.enc_units, self.vocab_size)

        # used for attention
        self.W1 = nn.Linear(self.enc_units, self.dec_units)
        self.W2 = nn.Linear(self.enc_units, self.dec_units)
        self.V = nn.Linear(self.enc_units, 1)

    def forward(self, x: torch.Tensor, hidden: torch.Tensor, enc_output: torch.Tensor) -> (torch.Tensor, torch.Tensor, torch.Tensor):
        # enc_output converted == (batch_size, max_length, hidden_size)
        enc_output = enc_output.permute(1,0,2)

        # hidden shape == (batch_size, hidden size) we convert it to (batch_size, 1, hidden size)
        hidden_with_time_axis = hidden.permute(1, 0, 2)

        # score: (batch_size, max_length, hidden_size) # Bahdanaus's
        # It doesn't matter which FC we pick for each of the inputs
        score = self.V(torch.tanh(self.W2(enc_output) + self.W1(hidden_with_time_axis)))

        #attention_weights shape == (batch_size, max_length, 1)
        attention_weights = nn.Softmax(dim=1)(score)

        # context_vector shape after sum == (batch_size, hidden_size)
        context_vector = torch.sum(attention_weights * enc_output, dim=1)

        # pass the context vector into embedding layer
        # x shape after passing through embedding == (batch_size, 1, embedding_dim)
        x = self.embedding(x)

        # concatenate the context vector and x
        # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
        x = torch.cat((context_vector.unsqueeze(1), x), -1)

        # passing the concatenated vector to the GRU
        # output: (batch_size, 1, hidden_size)
        output, state = self.gru(x)

        # output shape == (batch_size * 1, hidden_size)
        output =  output.view(-1, output.size(2))

        # output shape == (batch_size * 1, vocab)
        x = self.fc(output)
        return x, state, attention_weights

    def initialize_hidden_state(self) -> torch.Tensor:
        return torch.zeros((1, self.batch_sz, self.dec_units))

In [32]:
PATH_OUT = "../models/gru-detoxification/"

BATCH_SIZE = 32
embedding_dim = 256
units = 512
vocab_inp_size = len(inp_lang.word2idx)
vocab_tar_size = len(targ_lang.word2idx)

encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
decoder = Decoder(vocab_tar_size, embedding_dim, units, units, BATCH_SIZE)

encoder.load_state_dict(torch.load(PATH_OUT + "encoder-final.pt"))
decoder.load_state_dict(torch.load(PATH_OUT + "decoder-final.pt"))

encoder.to(device)
decoder.to(device)


Decoder(
  (embedding): Embedding(61679, 256)
  (gru): GRU(768, 512, batch_first=True)
  (fc): Linear(in_features=512, out_features=61679, bias=True)
  (W1): Linear(in_features=512, out_features=512, bias=True)
  (W2): Linear(in_features=512, out_features=512, bias=True)
  (V): Linear(in_features=512, out_features=1, bias=True)
)

In [33]:
def pad_sequences(x: list, max_len: int) -> list:
    """
    add padding
    :param x: token's sentence
    :param max_len: max length of words from column
    :return: token's sentence with padding
    """
    padded = np.zeros((max_len), dtype=np.int64)
    if len(x) > max_len: padded[:] = x[:max_len]
    else: padded[:len(x)] = x
    return padded

In [34]:
def inference(encoder: Encoder, decoder: Decoder, sentence: str, special_tokens: bool=True) -> list:
    """
    get inference of the encoder-decoder model
    :param encoder: our encoder
    :param decoder: our decoder
    :param sentence: toxic sentence
    :param special_tokens: print special tokens or not
    :return: translated text
    """
    encoder.eval()
    decoder.eval()

    sentence = torch.unsqueeze(sentence, dim=1)
    with torch.no_grad():
        enc_output, enc_hidden = encoder(sentence.to(device), device)
        dec_hidden = enc_hidden
        dec_input = torch.tensor([[targ_lang.word2idx['<start>']]] * 1)

        out_sentence = []
        for t in range(1, sentence.size(0)):
            predictions, dec_hidden, _ = decoder(dec_input.to(device),
                                                 dec_hidden.to(device),
                                                 enc_output.to(device))
            dec_input = predictions.argmax(dim=1).unsqueeze(1)
            out_word = targ_lang.idx2word[predictions.squeeze().argmax().item()]
            if special_tokens:
                out_sentence.append(out_word)
            else:
                if out_word != "<pad>" and out_word != "<end>":
                    out_sentence.append(out_word)

    out_sentence = " ".join(out_sentence)
    out_sentence = re.sub(r'\s([?.!,¿](?:\s|$))', r'\1', out_sentence)
    return out_sentence


## Calculate scores

In [35]:
encoder.batch_sz = 1
encoder.initialize_hidden_state(device)
decoder.batch_sz = 1
decoder.initialize_hidden_state()

batch_size = 8
batch_pred = []
batch_true = []

non_toxicity_gru = 0
fluency_gru = 0
similarity_gru = 0
total_score_gru = 0

for accum, (idx, item) in tqdm(enumerate(df_test_gru.iterrows())):    
    test_sentence = item.reference
    test_sentence = [inp_lang.word2idx[s] for s in test_sentence.split(' ')]
    test_sentence = pad_sequences(test_sentence, max_length_inp)
    
    pred = inference(encoder, decoder, torch.tensor(test_sentence).to(device), special_tokens=False)

    target = " ".join(item.translation.split()[1:-1])
    target = re.sub(r'\s([?.!,¿](?:\s|$))', r'\1', target)


    batch_true.append(target)
    batch_pred.append(pred)
    if accum % batch_size == 0 or accum == df_test_gru.shape[0] - 1:
        scores = calculate_joint_score(batch_true, batch_pred)

        non_toxicity_gru += scores["non-toxic"].item()
        fluency_gru += scores["fluency"].item()
        similarity_gru += scores["similarity"].item()
        total_score_gru += scores["total"].item()

        batch_true = []
        batch_pred = []


non_toxicity_gru = non_toxicity_gru / df_test_gru.shape[0]
fluency_gru = fluency_gru / df_test_gru.shape[0]
similarity_gru = similarity_gru / df_test_gru.shape[0]
total_score_gru = total_score_gru / df_test_gru.shape[0]

print(non_toxicity_gru, fluency_gru, similarity_gru, total_score_gru)

5000it [23:09,  3.60it/s]

0.6517880827307702 0.5338920620784163 0.549631848704815 0.19717650542557238





# Preparation for T5

In [36]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

SAVE_PATH = "../models/t5-detoxification/checkpoint-final/"
model_checkpoint = "t5-small"

In [37]:
model = AutoModelForSeq2SeqLM.from_pretrained(SAVE_PATH).to(device)
model.eval()
model.config.use_cache = False
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [38]:
def inference(model, inference_request, tokenizer=tokenizer):
    """
    get inference of the t5 model
    :param model: fine-tuned model 
    :param inference_request: toxic text
    :param tokenizer: t5 tokenizer
    :return: translated text
    """
    inputs = tokenizer(inference_request, return_tensors="pt").to(device)
    inputs = {k: v for k, v in inputs.items()}

    outputs = model.generate(**inputs, num_beams=1, do_sample=False)
    for ex in outputs:
        return tokenizer.decode(ex, skip_special_tokens=True)

## Calculate scores

In [39]:
torch.cuda.empty_cache()

batch_size = 32
batch_pred = []
batch_true = []

non_toxicity_t5 = 0
fluency_t5 = 0
similarity_t5 = 0
total_score_t5 = 0


for accum, (idx, item) in tqdm(enumerate(df_test.iterrows())):
    test_sentence = item.reference
    pred = inference(model, item.reference.lower().strip(), tokenizer)
    target = item.translation.lower().strip()

    batch_true.append(target)
    batch_pred.append(pred)
    if accum % batch_size == 0 or accum == df_test.shape[0] - 1:
        scores = calculate_joint_score(batch_true, batch_pred)

        non_toxicity_t5 += scores["non-toxic"].item()
        fluency_t5 += scores["fluency"].item()
        similarity_t5 += scores["similarity"].item()
        total_score_t5 += scores["total"].item()

        batch_true = []
        batch_pred = []


non_toxicity_t5 = non_toxicity_t5 / df_test.shape[0]
fluency_t5 = fluency_t5 / df_test.shape[0]
similarity_t5 = similarity_t5 / df_test.shape[0]
total_score_t5 = total_score_t5 / df_test.shape[0]

print(non_toxicity_t5, fluency_t5, similarity_t5, total_score_t5)

5000it [05:55, 14.07it/s]

0.6123539594650269 0.7395521600604057 0.6748421402215957 0.3157254221439362





# Results

In [40]:
data = {
        "BERT Deletion": [non_toxicity_bert, fluency_bert, similarity_bert, total_score_bert],
        "Encoder-Decoder": [non_toxicity_gru, fluency_gru, similarity_gru, total_score_gru],
        "T5-small": [non_toxicity_t5, fluency_t5, similarity_t5, total_score_t5]
        }
columns = ["STA", "FL", "SIM", "Total"]

results = pd.DataFrame.from_dict(data, orient='index', columns=columns)
results

Unnamed: 0,STA,FL,SIM,Total
BERT Deletion,0.953184,0.471775,0.641421,0.292803
Encoder-Decoder,0.651788,0.533892,0.549632,0.197177
T5-small,0.612354,0.739552,0.674842,0.315725


In [41]:
results.to_csv("../reports/" + "result.csv")