In [1]:
### Project was ran in a kaggle notebook
# install packages
!pip install wget -q
# !pip install transformers torch -q

In [2]:
import numpy as np
import pandas as pd
import torch
import os
import wget
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
# Importing the T5 modules from huggingface/transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration

# WandB – Import the wandb library


In [3]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
device

'cuda'

<h2> Loading the Dataset </h2>
<p> Initially, we download the data and load it in a dataframe.
</p>

In [4]:
if not os.path.exists('./paradetox.tsv'):
    url_csv = 'https://raw.githubusercontent.com/s-nlp/paradetox/main/paradetox/paradetox.tsv'
    wget.download(url_csv, 'paradetox.tsv')
    
df = pd.read_csv("paradetox.tsv", sep='\t')

df

Unnamed: 0,toxic,neutral1,neutral2,neutral3
0,he had steel balls too !,he was brave too!,,
1,"dude should have been taken to api , he would ...",It would have been good if he went to api. He ...,,
2,"im not gonna sell the fucking picture , i just...","I'm not gonna sell the picture, i just want to...",,
3,the garbage that is being created by cnn and o...,the news that is being created by cnn and othe...,The news that is being created by cnn and othe...,the garbage that is being created by cnn and o...
4,the reason they dont exist is because neither ...,The reason they don't exist is because neither...,,
...,...,...,...,...
11922,"this is the "" dumb "" shit they 're laughing at","this is the "" nonsense "" situation they 're la...",,
11923,no seriously you 're fucking retarded,no seriously you 're slow..,,
11924,christians love to shit on the pope .,christians love to criticize the pope,,
11925,"but if saying "" fuck that group "" is much more...","but if saying"" that group is bad"" is much more...",,


<p> Now, we define the Dataset class. Here, we are flattening each row of toxic text to at most three different neutral rows.</p>




In [5]:
class ParaDetoxDataset(Dataset):
    def __init__(self, tokenizer, data, max_length=512):
        self.tokenizer = tokenizer
        # Expand the dataset to include a row for each neutral alternative
        self.data = []
        self.max_length = max_length
        for _, row in data.iterrows():
            toxic_text = row['toxic']
            for neutral in ['neutral1', 'neutral2', 'neutral3']:
                if pd.notnull(row[neutral]):  # Ensure the neutral text is not null
                    self.data.append((toxic_text, row[neutral]))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        toxic_text, neutral_text = self.data[index]
        
        inputs = self.tokenizer.encode_plus(
            "Detoxify following sentence from bad words: " + toxic_text,
            None,
            add_special_tokens=True,
            max_length=self.max_length,
            padding="max_length",
            return_token_type_ids=False,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        targets = self.tokenizer.encode_plus(
            neutral_text,
            None,
            add_special_tokens=True,
            max_length=self.max_length,
            padding="max_length",
            return_token_type_ids=False,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': targets['input_ids'].flatten()
        }

<p> Before splitting the dataset and wrapping the train and test datasets with the defined Dataset class, we initialize the text tokenizer with the pretrained tokenizer of T, the model that we are going to use as our base model. </p>


In [6]:
# Initialize tokenizer
model_name = 't5-small' # You can choose other versions based on your needs and computational resources
tokenizer = T5Tokenizer.from_pretrained(model_name)

# Prepare dataset
df_train, df_val = train_test_split(df, test_size=0.1)
train_dataset = ParaDetoxDataset(tokenizer, df_train)
val_dataset = ParaDetoxDataset(tokenizer, df_val)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


<h2> Training Phase (Fine-Tune) </h2>

In [7]:
model = T5ForConditionalGeneration.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(device)

optimizer = AdamW(model.parameters(), lr=3e-4)

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

cuda


In [8]:

instance1 = "Fuck this situation, This shitty loss should have got better."
instance2 = "Oh for fuck sake. This crazy moron is not even getting better."
instance3 = "Come on move your lazy ass!"


In [9]:

def generate_sentence(sentence):
    
    input_ids = tokenizer.encode("Detoxify following sentence from bad words: " + sentence, return_tensors="pt").to(device)
    output = model.generate(input_ids = input_ids,
                            max_length=200, 
                            num_beams=2,
                            repetition_penalty=2.5, 
                            length_penalty=1.0, 
                            early_stopping=True)
    generated_sentence = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_sentence

sentence = "Fuck this situation, This shitty loss should have got better."


generate_sentence(sentence)

'Detoxify following sentence from bad words: Fuck this situation, This shitty loss should have got better.'

In [10]:
epochs = 3
from tqdm import tqdm
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}")


100%|██████████| 2223/2223 [14:02<00:00,  2.64it/s]


Epoch 1, Loss: 0.035578084507143504


100%|██████████| 2223/2223 [14:03<00:00,  2.64it/s]


Epoch 2, Loss: 0.02533544662349459


100%|██████████| 2223/2223 [14:03<00:00,  2.63it/s]

Epoch 3, Loss: 0.022915600865008497





In [11]:
print('*********************')
print(instance1)
print(generate_sentence(instance1))
print('*********************')
print(instance2)
print(generate_sentence(instance2))
print('*********************')
print(instance3)
print(generate_sentence(instance3))
print('*********************')
print('no seriously you \'re fucking retarded')
print(generate_sentence('no seriously you \'re fucking retarded'))
print('*********************')
print('he had steel balls too')
print(generate_sentence('he had steel balls too'))
print('*********************')
print('christians love to shit on the pope .')
print(generate_sentence('christians love to shit on the pope .'))
print('*********************')


*********************
Fuck this situation, This shitty loss should have got better.
This situation should have got better.
*********************
Oh for fuck sake. This crazy moron is not even getting better.
Oh, this is not even getting better.
*********************
Come on move your lazy ass!
Come on move your lazy
*********************
no seriously you 're fucking retarded
No seriously you're no serious.
*********************
he had steel balls too
he had steel balls too
*********************
christians love to shit on the pope .
christians love to destroy pope.
*********************


In [12]:
ind = 8091
print(df.iloc[ind]['toxic'])
print(df.iloc[ind]['neutral1'])
print(generate_sentence(df.iloc[ind]['toxic']))

com see everyone flipping shit in that thread ?
com see everyone flipping in that thread ?
com see everyone flipping in that thread?


In [13]:
# from huggingface_hub import notebook_login

# notebook_login()
# tokenizer.push_to_hub('detoxify')
# model.push_to_hub('detoxify')


In [14]:
model.eval()
predictions = []
actuals = []
with torch.no_grad():
    for data in tqdm(val_loader):
        y = data['labels'].to(device, dtype = torch.long)
        ids = data['input_ids'].to(device, dtype = torch.long)
        mask = data['attention_mask'].to(device, dtype = torch.long)

        generated_ids = model.generate(
            input_ids = ids,
            attention_mask = mask, 
            max_length=150, 
            num_beams=2,
            repetition_penalty=2.5, 
            length_penalty=1.0, 
            early_stopping=True
        )
        preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
        target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]
        
        predictions.extend(preds)
        actuals.extend(target)


100%|██████████| 246/246 [03:08<00:00,  1.30it/s]


In [15]:
for i in range(len(predictions[:15])):
    print(f'Actual: {actuals[i]}')
    print(f'Predicted: {predictions[i]}')
    print('*******************************')

Actual: its entirely because he is a criminal and adn refuses to list minorities who commit crimes.
Predicted: its entirely because he is a black criminal and adn refuses to list minorities who commit crimes.
*******************************
Actual: 78
Predicted: 78 white men walk inarate christian churches and kill multiple people one of themyears old
*******************************
Actual: hurry up. i wanna get out of my crib
Predicted: hurry up i wanna get out my crib
*******************************
Actual: It's not easy to forget when they just made and delete a facebook post about it!
Predicted: hard to wash their hands when they just made and deleted a facebook post about it!
*******************************
Actual: Seafood would not be good after that oil spill.
Predicted: yeah nice seafood would be ruined after that oil spill ey?
*******************************
Actual: Super fast servers in Switzerland with premium means good fast downloads.
Predicted: fast servers in switzerland

In [16]:
!pip install bert_score -q
!pip install rouge -q
!pip install nltk -q


In [17]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu
from rouge import Rouge
from bert_score import score

# BLEU (1-4)
bleu_1_scores = [sentence_bleu([ref.split()], pred.split(), weights=(1, 0, 0, 0)) for pred, ref in zip(predictions, actuals)]
bleu_2_scores = [sentence_bleu([ref.split()], pred.split(), weights=(0, 1, 0, 0)) for pred, ref in zip(predictions, actuals)]
bleu_3_scores = [sentence_bleu([ref.split()], pred.split(), weights=(0, 0, 1, 0)) for pred, ref in zip(predictions, actuals)]
bleu_4_scores = [sentence_bleu([ref.split()], pred.split(), weights=(0, 0, 0, 1)) for pred, ref in zip(predictions, actuals)]

average_bleu_1 = corpus_bleu([[ref.split()] for ref in actuals], [pred.split() for pred in predictions], weights=(1, 0, 0, 0))
average_bleu_2 = corpus_bleu([[ref.split()] for ref in actuals], [pred.split() for pred in predictions], weights=(0, 1, 0, 0))
average_bleu_3 = corpus_bleu([[ref.split()] for ref in actuals], [pred.split() for pred in predictions], weights=(0, 0, 1, 0))
average_bleu_4 = corpus_bleu([[ref.split()] for ref in actuals], [pred.split() for pred in predictions], weights=(0, 0, 0, 1))

# ROUGE
rouge = Rouge()
rouge_scores = rouge.get_scores(predictions, actuals, avg=True)

# BERT-score
P, R, F1 = score(predictions, actuals, lang="en", verbose=True)

print("Average BLEU-1 Score:", average_bleu_1)
print("Average BLEU-2 Score:", average_bleu_2)
print("Average BLEU-3 Score:", average_bleu_3)
print("Average BLEU-4 Score:", average_bleu_4)
print("ROUGE Scores:", rouge_scores)
print("BERT Precision:", P.mean().item())
print("BERT Recall:", R.mean().item())
print("BERT F1 Score:", F1.mean().item())


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/47 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/31 [00:00<?, ?it/s]

done in 5.31 seconds, 370.26 sentences/sec
Average BLEU-1 Score: 0.6873702519216742
Average BLEU-2 Score: 0.5700592760751672
Average BLEU-3 Score: 0.48154277901705406
Average BLEU-4 Score: 0.40992232523177147
ROUGE Scores: {'rouge-1': {'r': 0.710771803143178, 'p': 0.7087725441272228, 'f': 0.7033824001346968}, 'rouge-2': {'r': 0.5759906979149876, 'p': 0.5789480737460074, 'f': 0.5720376805000468}, 'rouge-l': {'r': 0.7076076871307646, 'p': 0.7058426556082127, 'f': 0.7003851357298524}}
BERT Precision: 0.9511870741844177
BERT Recall: 0.9537311792373657
BERT F1 Score: 0.9522857069969177
