### Detecting toxic phrases

In [1]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification

In [2]:
corpus = load_dataset('merionum/ru_paraphraser', data_files='plus.jsonl')

Using custom data configuration merionum--ru_paraphraser-8471dca11b8f9d26
Found cached dataset json (/home/medic/.cache/huggingface/datasets/merionum___json/merionum--ru_paraphraser-8471dca11b8f9d26/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab)


  0%|          | 0/1 [00:00<?, ?it/s]

In [3]:
model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
# if torch.cuda.is_available():
#     model.cuda()

In [4]:
def text2toxicity(text, aggregate = True):
    """ Calculate toxicity of a text (if aggregate=True) or a vector of toxicity aspects (if aggregate=False)"""
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True).to(model.device)
        proba = torch.sigmoid(model(**inputs).logits).cpu().numpy()
    if isinstance(text, str):
        proba = proba[0]
    if aggregate:
        return 1 - proba.T[0] * (1 - proba.T[-1])
    return proba


In [5]:
print(text2toxicity('Слишком сложно выбрать нужную модель', True))

0.023072274617099375


In [6]:
print(text2toxicity('Да иди ты в жопу', False))

[8.8611082e-04 9.7716516e-01 3.7684384e-01 1.7920768e-02 9.1513145e-01]


In [7]:
print(text2toxicity('Да иди ты в жопу', True))

0.9999247970591534
