In [1]:
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from data.dataset import HateSpeechDataset
from data.toxigen import ToxiGenDataModule, custom_label_strategy, human_threshold_strategy
from models.model import HateSpeechClassifier
from models.classification_heads import SimpleLinearHead, MLPHead
from training.trainer import Trainer
from utils.checkpoints import ModelCheckpoint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# TODO: Use Config file for this step

config = {
        "run_name": "mlp_head",
        "model_name": "microsoft/deberta-v3-base",
        "num_classes": 2,
        "batch_size": 32,
        "learning_rate": 2e-5,
        "num_epochs": 1,
        "max_length": 128,
        "num_workers": 4,
        "device": "cuda" if torch.cuda.is_available() else "cpu",
    }

In [3]:
torch.cuda.is_available()

True

In [4]:
data_module = ToxiGenDataModule(
        tokenizer_name=config["model_name"],
        batch_size=config["batch_size"],
        max_length=config["max_length"],
        label_strategy=human_threshold_strategy,
        num_workers=config["num_workers"],
    )

    # Setup datasets and get dataloaders
data_module.setup()
dataloaders = data_module.get_dataloaders()
train_loader = dataloaders["train"]
val_loader = dataloaders["test"]



In [5]:
classification_head = MLPHead(
        768, 1536, 384, config["num_classes"]
    )  # 768 is BERT's hidden size
model = HateSpeechClassifier(
        config["model_name"], classification_head, freeze_bert=False
    ).to(config["device"])

optimizer = AdamW(model.parameters(), lr=config["learning_rate"])
criterion = torch.nn.CrossEntropyLoss()

In [6]:
from training.metrics import accuracy_metric, f1_metric

metrics = {
    'accuracy': accuracy_metric,
    'f1_score': f1_metric
}

# Initialize learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.1,
        patience=3,
        verbose=True,
    )


# Initialize trainer
trainer = Trainer(
        model=model, optimizer=optimizer, criterion=criterion, device=config["device"],scheduler=scheduler,
        metrics=metrics,
        early_stopping_patience=7,
        checkpoint_dir='checkpoints'
    )



In [7]:
# Import for path
import os
from pathlib import Path

# # Train the model
# history = trainer.train(
#     train_loader=train_loader,
#     val_loader=val_loader,
#     num_epochs=30,
#     log_interval=1,
# )

# Load best checkpoint for inference
best_checkpoint = list(Path('checkpoints').glob('best_model_*.pt'))[0]
trainer.load_checkpoint(str(best_checkpoint))

  checkpoint = torch.load(checkpoint_path, map_location=self.device)


0

In [8]:
# ModelCheckpoint.save_checkpoint(
#         model, optimizer, epoch, val_metrics["loss"],{}, "checkpoints",config, config["run_name"]
#     )


In [9]:
# checkpoint = ModelCheckpoint.load_checkpoint("checkpoints/checkpoint.pt", model, optimizer)

In [10]:
from utils.predictor import HateSpeechPredictor

predictor = HateSpeechPredictor(model, config["model_name"])



In [11]:
texts = ["Where should i begin? I am so happy right now", "I love you", "I hate gays", "I hate you", "Black people are the worst"] 
prediction = predictor.predict(texts)
print(prediction)

[{'text': 'Where should i begin? I am so happy right now', 'prediction': 0, 'toxic_probability': 0.026428600773215294, 'non_toxic_probability': 0.9735714197158813}, {'text': 'I love you', 'prediction': 0, 'toxic_probability': 0.019253328442573547, 'non_toxic_probability': 0.9807466864585876}, {'text': 'I hate gays', 'prediction': 1, 'toxic_probability': 0.8715593814849854, 'non_toxic_probability': 0.12844060361385345}, {'text': 'I hate you', 'prediction': 1, 'toxic_probability': 0.9498029947280884, 'non_toxic_probability': 0.050196945667266846}, {'text': 'Black people are the worst', 'prediction': 1, 'toxic_probability': 0.9802342057228088, 'non_toxic_probability': 0.019765831530094147}]


In [22]:
texts = ["We should help africa", "Why should we help Africa", "Why should we help Africa, we shouldnt encourage begging"] 
prediction = predictor.predict(texts)
print(prediction)

[{'text': 'We should help africa', 'prediction': 0, 'toxic_probability': 0.22286871075630188, 'non_toxic_probability': 0.7771313190460205}, {'text': 'Why should we help Africa', 'prediction': 0, 'toxic_probability': 0.35938793420791626, 'non_toxic_probability': 0.6406120657920837}, {'text': 'Why should we help Africa, we shouldnt encourage begging', 'prediction': 1, 'toxic_probability': 0.8812673091888428, 'non_toxic_probability': 0.11873272061347961}, {'text': 'Despite their culture being that of apes some africans are really kind', 'prediction': 1, 'toxic_probability': 0.7226390242576599, 'non_toxic_probability': 0.2773609459400177}, {'text': 'I would never say nigger', 'prediction': 0, 'toxic_probability': 0.4120437204837799, 'non_toxic_probability': 0.5879563093185425}]


In [23]:
texts = [ "I would never say nigger", "I would never say nigger nigger",  "I would never say nigger nigger nigger"] 
prediction = predictor.predict(texts)
print(prediction)

[{'text': 'I would never say nigger', 'prediction': 0, 'toxic_probability': 0.4120437204837799, 'non_toxic_probability': 0.5879563093185425}, {'text': 'I would never say nigger nigger', 'prediction': 0, 'toxic_probability': 0.4200800955295563, 'non_toxic_probability': 0.5799199342727661}, {'text': 'I would never say nigger nigger nigger', 'prediction': 0, 'toxic_probability': 0.41566675901412964, 'non_toxic_probability': 0.5843332409858704}]


In [28]:
texts = ["Jews love nature", "Jews love money", "Jews love coins", "Jews love value", "Jews love to be valued"] 
prediction = predictor.predict(texts)
print(prediction)

[{'text': 'Jews love nature', 'prediction': 0, 'toxic_probability': 0.009527722373604774, 'non_toxic_probability': 0.9904723167419434}, {'text': 'Jews love money', 'prediction': 1, 'toxic_probability': 0.8688678741455078, 'non_toxic_probability': 0.13113218545913696}, {'text': 'Jews love coins', 'prediction': 0, 'toxic_probability': 0.17644533514976501, 'non_toxic_probability': 0.8235546946525574}, {'text': 'Jews love value', 'prediction': 0, 'toxic_probability': 0.039441220462322235, 'non_toxic_probability': 0.9605587720870972}, {'text': 'Jews love to be valued', 'prediction': 0, 'toxic_probability': 0.02172423154115677, 'non_toxic_probability': 0.9782757759094238}]


In [29]:
texts = ["The Holocaust happened", "The Holocaust did not happen","The Holocaust did not not happen","Yeah  right, The Holocaust happened"] 
prediction = predictor.predict(texts)
print(prediction)

[{'text': 'The Holocaust happened', 'prediction': 0, 'toxic_probability': 0.010812245309352875, 'non_toxic_probability': 0.9891877770423889}, {'text': 'The Holocaust did not happen', 'prediction': 0, 'toxic_probability': 0.074808269739151, 'non_toxic_probability': 0.9251917600631714}, {'text': 'The Holocaust did not not happen', 'prediction': 0, 'toxic_probability': 0.056685030460357666, 'non_toxic_probability': 0.9433150291442871}, {'text': 'Yeah  right, The Holocaust happened', 'prediction': 0, 'toxic_probability': 0.3381359875202179, 'non_toxic_probability': 0.6618639826774597}]


In [12]:
from data.toxigen import ToxiGenDataset


ds=ToxiGenDataset("train",config["model_name"])
ds.dataset

Dataset({
    features: ['text', 'target_group', 'factual?', 'ingroup_effect', 'lewd', 'framing', 'predicted_group', 'stereotyping', 'intent', 'toxicity_ai', 'toxicity_human', 'predicted_author', 'actual_method'],
    num_rows: 8960
})