### Imports

In [None]:
import torch
import pandas as pd
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import Dataset


In [13]:
print(os.getcwd())

/home/charaf/DeepLearning/Project/Deep_Learning


## Read data/paradox.tsv file as pandas dataframe

In [None]:
current_dir = os.getcwd()
data_dir = os.path.join(current_dir, 'data')
df = pd.read_csv(data_dir +'/paradetox.tsv', sep='\t')
df.head(5)


Unnamed: 0,toxic,neutral1,neutral2,neutral3
0,he had steel balls too !,he was brave too!,,
1,"dude should have been taken to api , he would ...",It would have been good if he went to api. He ...,,
2,"im not gonna sell the fucking picture , i just...","I'm not gonna sell the picture, i just want to...",,
3,the garbage that is being created by cnn and o...,the news that is being created by cnn and othe...,The news that is being created by cnn and othe...,the garbage that is being created by cnn and o...
4,the reason they dont exist is because neither ...,The reason they don't exist is because neither...,,


### Kept for every toxic sentence just one non-toxic

In [31]:
!pip list

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Package                  Version
------------------------ --------------------
accelerate               1.6.0
aiohappyeyeballs         2.6.1
aiohttp                  3.11.18
aiosignal                1.3.2
asttokens                3.0.0
async-timeout            5.0.1
attrs                    21.2.0
Automat                  20.2.0
Babel                    2.8.0
bcrypt                   3.2.0
beautifulsoup4           4.10.0
blinker                  1.4
breezy                   3.2.1
brz-debian               2.8.51
certifi                  2020.6.20
Chameleon                3.8.1
chardet                  4.0.0
charset-normalizer       3.4.2
click                    8.0.3
cloud-init               24.4.1
colorama                 0.4.4
comm                     0.2.2
command-not-found        0.3
configobj                5.0.6
constantly               15.1.0
cryptography             3.4.8
datasets                 3.5.1
dbus-python              1.2.18
debmutate                0.48
debugpy       

In [15]:
## get NaN values per column
df.isna().sum()

toxic           0
neutral1        0
neutral2     5973
neutral3    10065
dtype: int64

In [None]:
df.rename(columns={'neutral1': 'neutral'}, inplace=True)

In [17]:
filtered_df = df[["toxic", "neutral"]]

In [20]:
df_dict = filtered_df.to_dict(orient="records")
df_dict = Dataset.from_list(df_dict)

In [None]:
# Model
model_name = "Qwen/Qwen2.5-3B-Instruct"
base_tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForCausalLM.from_pretrained(model_name)

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.63s/it]


In [None]:
def detoxify_sentence(sentence: str, max_new_tokens: int = 100) -> str:
    messages = [
        {
            "role": "system",
            "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."
        },
        {
            "role": "user",
            "content": f'Detoxify this sentence: "{sentence}"'
        }
    ]

    # Format the input using Qwen's chat template
    prompt_text = base_tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = base_tokenizer([prompt_text], return_tensors="pt").to(base_model.device)

    # Generate output
    with torch.no_grad():
        outputs = base_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False
        )

    generated_ids = outputs[0][inputs.input_ids.shape[1]:]
    response = base_tokenizer.decode(generated_ids, skip_special_tokens=True)

    return response.strip()

toxic = "toxic sentence here"
cleaned = detoxify_sentence(toxic)
print("Detoxified Output:", cleaned)

In [24]:
def preprocess(example):
    input_text = f"detoxify: {example['toxic']}"
    target_text = example["neutral"]
    input_enc = base_tokenizer(input_text, truncation=True, padding="max_length", max_length=64)
    target_enc = base_tokenizer(target_text, truncation=True, padding="max_length", max_length=64)

    input_enc["labels"] = target_enc["input_ids"]
    return input_enc

tokenized_dataset = df_dict.map(preprocess)
train_test_split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

Map: 100%|██████████| 11927/11927 [00:04<00:00, 2921.44 examples/s]


### Whole pipeline

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Seq2SeqTrainer, Seq2SeqTrainingArguments
from datasets import Dataset
import pandas as pd

dataset = Dataset.from_pandas(filtered_df)

model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

def preprocess(example):
    input_text = f"detoxify: {example['toxic']}"
    target_text = example["neutral"]
    input_enc = tokenizer(input_text, truncation=True, padding="max_length", max_length=64)
    target_enc = tokenizer(target_text, truncation=True, padding="max_length", max_length=64)
    input_enc["labels"] = target_enc["input_ids"]
    return input_enc

tokenized_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)
split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset, eval_dataset = split["train"], split["test"]

training_args = Seq2SeqTrainingArguments(
    output_dir="./detoxifier",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    #evaluation_strategy="epoch",
    save_total_limit=2,
    predict_with_generate=True,
    logging_dir='./logs',
)

# === Trainer ===
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

trainer.train()


In [None]:
trainer.model.save_pretrained("models/sft-detoxifier")
trainer.tokenizer.save_pretrained("models/sft-detoxifier")

In [None]:
## Load the model and tokenizer from models
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
model_name = "models/sft-detoxifier"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [None]:
def detoxify(text, model=model, tokenizer=tokenizer):
    inputs = tokenizer(f"toxic: {text}. neutral: ", return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=120)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

print(detoxify("toxic sentence here", model=base_model, tokenizer=base_tokenizer))

In [None]:
## Load detoxifier last checkpoint from models/detoxifier/checkpoint-23500
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
model_name = "detoxifier/checkpoint-23500"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [None]:
# Test the model
def detoxify(text, model=model, tokenizer=tokenizer):
    inputs = tokenizer(f"toxic: {text}. neutral: ", return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=120)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)
print(detoxify("toxic sentence here", model=model, tokenizer=tokenizer))