In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [2]:
# Load the model and tokenizer
model_name = "facebook/roberta-hate-speech-dynabench-r4-target"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [3]:
# Function to classify a sentence
def classify_sentence(sentence):
    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    probabilities = torch.softmax(logits, dim=-1).squeeze()
    predicted_class = torch.argmax(probabilities).item()
    return predicted_class, probabilities


In [5]:
# Test the model on example sentences
example_sentences = [
    "I like you. I love you.",
    "fuck you mother fucker",
    "You are so stupid and annoying.",
    "if you are white in that state watch yourself !",
    "Yep, and these are the stupid cops that are suppose to be protecting us. If they keep doing this shit something is definitely gonna go down real soon"
]

for sentence in example_sentences:
    predicted_class, probabilities = classify_sentence(sentence)
    print(f"Sentence: {sentence}")
    print(f"Predicted class: {predicted_class}")
    print(f"Probabilities: {probabilities.tolist()}\n")

Sentence: I like you. I love you.
Predicted class: 0
Probabilities: [0.9998583793640137, 0.0001415598817402497]

Sentence: fuck you mother fucker
Predicted class: 1
Probabilities: [0.001021717325784266, 0.9989782571792603]

Sentence: You are so stupid and annoying.
Predicted class: 0
Probabilities: [0.9995531439781189, 0.0004467867547646165]

Sentence: if you are white in that state watch yourself !
Predicted class: 0
Probabilities: [0.9367551803588867, 0.06324481964111328]

Sentence: Yep, and these are the stupid cops that are suppose to be protecting us. If they keep doing this shit something is definitely gonna go down real soon
Predicted class: 0
Probabilities: [0.9998579025268555, 0.00014205249317456037]

