# Notebook setup

These steps must run before any other operations with this notebook.

In [None]:
#@markdown ## Setup notebook
#@markdown Install Kagglehub and preload GPU acceleration extensions for Pandas and Scikit-Learn.

%pip install -U "kagglehub==0.3.12"

try:
  %load_ext cudf.dataframe # Pandas on GPU
  %load_ext cuml.accel # ScikitLearn on GPU
except Exception as e:
  print("Failed to load GPU extensions. Is notebook running in Colab GPU environment?")





stdout:



stderr:

Traceback (most recent call last):
  File "<string>", line 4, in <module>
  File "/usr/local/lib/python3.11/dist-packages/numba_cuda/numba/cuda/cudadrv/driver.py", line 314, in __getattr__
    raise CudaSupportError("Error at driver init: \n%s:" %
numba.cuda.cudadrv.error.CudaSupportError: Error at driver init: 

CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBA_CUDA_DRIVER
with the file path of the CUDA driver shared library.
:


Not patching Numba


Failed to load GPU extensions. Is notebook running in Colab GPU environment?


In [None]:
#@markdown ## Download third-party dataset
#@markdown Pull [blackmoon/russian-language-toxic-comments](https://www.kaggle.com/datasets/blackmoon/russian-language-toxic-comments) from Kaggle
#@markdown for further processing.

from sklearn.model_selection import train_test_split
import kagglehub
import pandas as pd
import os

# Download from Kaggle
dataset_path = kagglehub.dataset_download("blackmoon/russian-language-toxic-comments")
rslc_df = pd.read_csv(os.path.join(dataset_path, "labeled.csv"))
# Relabel for compatibility with our dataset
rslc_df.rename(columns={"comment": "message", "toxic": "label"}, inplace=True)



Downloading from https://www.kaggle.com/api/v1/datasets/download/blackmoon/russian-language-toxic-comments?dataset_version_number=1...


100%|██████████| 1.49M/1.49M [00:00<00:00, 23.9MB/s]

Extracting files...





In [None]:
#@markdown ## Upload custom dataset
#@markdown In addition to the step above, we upload our own dataset to
#@markdown this Colab environment and specify the path here.
#@markdown
#@markdown File must be in CSV format, and must have two columns:
#@markdown `message` (message content) and `label` (`0` for normal messages,
#@markdown `1` for toxic messages.)

from sklearn.model_selection import train_test_split
import pandas as pd

#@markdown Path to dataset .CSV file
path = "/content/masked_dataset.csv" #@param {type:"string"}
our_df = pd.read_csv(path)


In [None]:
#@markdown ## Merge, shuffle and split
#@markdown We'll merge two datasets and print some metrics.

import pandas as pd

#@markdown Test-to-train set ratio
test_size = 0.2 # @param {"type":"slider","min":0,"max":1,"step":0.05}
#@markdown Random state seed
random_state = 42 # @param {"type":"integer"}

# Merge and shuffle
df = pd.concat([rslc_df, our_df])
df = df.sample(frac=1, random_state=random_state).reset_index(drop=True)

# Split data to test/train sets
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['message'].tolist(),
    df['label'].tolist(),
    test_size=test_size,
    random_state=random_state
)

# Print count of toxic/non-toxic
total_label_counts = pd.Series(df['label']).value_counts()
train_label_counts = pd.Series(train_labels).value_counts()
val_label_counts = pd.Series(val_labels).value_counts()

print("Total label counts:")
print(total_label_counts)
print("\nTraining set label counts:")
print(train_label_counts)
print("\nValidation set label counts:")
print(val_label_counts)

# Print min/max message lengths
msg_lengths = df['message'].str.len()
print("\nMinimum message length:", msg_lengths.min())
print("Maximum message length:", msg_lengths.max())


Total label counts:
label
0.0    14586
1.0     9826
Name: count, dtype: int64

Training set label counts:
0.0    11665
1.0     7864
Name: count, dtype: int64

Validation set label counts:
0.0    2921
1.0    1962
Name: count, dtype: int64

Minimum message length: 9
Maximum message length: 7404


# Using Regular expression

In [None]:
#@markdown ## Define regular expression
#@markdown Compile a RegExp object from an extensive pattern to include common curses vocabulary
#@markdown with possible letter and word form variations.

import re

pattern = re.compile("""
(?iux)(?<![а-яё])(?:
(?:(?:у|[нз]а|(?:хитро|не)?вз?[ыьъ]|с[ьъ]|(?:и|ра)[зс]ъ?|(?:о[тб]|п[оа]д)[ьъ]?|(?:\S(?=[а-яё]))+?[оаеи-])-?)?(?:
  [её](?:б(?!о[рй]|рач)|п[уа](?:ц|тс))|
  и[пб][ае][тцд][ьъ]
).*?|

(?:(?:н[иеа]|(?:ра|и)[зс]|[зд]?[ао](?:т|дн[оа])?|с(?:м[еи])?|а[пб]ч|в[ъы]?|пр[еи])-?)?ху(?:[яйиеёю]|л+и(?!ган)).*?|

бл(?:[эя]|еа?)(?:[дт][ьъ]?)?|

\S*?(?:
  п(?:
    [иеё]зд|
    ид[аое]?р|
    ед(?:р(?!о)|[аое]р|ик)|
    охую
  )|
  бля(?:[дбц]|тс)|
  [ое]ху[яйиеё]|
  хуйн
).*?|

(?:о[тб]?|про|на|вы)?м(?:
  анд(?:[ауеыи](?:л(?:и[сзщ])?[ауеиы])?|ой|[ао]в.*?|юк(?:ов|[ауи])?|е[нт]ь|ища)|
  уд(?:[яаиое].+?|е?н(?:[ьюия]|ей))|
  [ао]л[ао]ф[ьъ](?:[яиюе]|[еёо]й)
)|

елд[ауые].*?|
ля[тд]ь|
(?:[нз]а|по)х
)(?![а-яё])
""", re.VERBOSE)

In [None]:
#@markdown ## Check pattern accuracy

from sklearn.metrics import classification_report
from tqdm import tqdm

preds = []
for text in tqdm(val_texts, desc="Regex Evaluating"):
    is_toxic = bool(pattern.search(text))
    preds.append(1 if is_toxic else 0)

print(f"\n\n{classification_report(val_labels, preds, digits=2)}")


Regex Evaluating: 100%|██████████| 4883/4883 [00:00<00:00, 25216.12it/s]




              precision    recall  f1-score   support

         0.0       0.73      0.96      0.83      2921
         1.0       0.89      0.46      0.61      1962

    accuracy                           0.76      4883
   macro avg       0.81      0.71      0.72      4883
weighted avg       0.79      0.76      0.74      4883



# Using ruBERT fine-tuned model

Here is how we used a ruBERT model [DeepPavlov/rubert-base-cased](https://huggingface.co/DeepPavlov/rubert-base-cased) to evaluate efficiency of ruBERT models for our moderation task.

In [None]:
#@markdown ## Install dependencies

%pip install -U \
    "transformers==4.51.3" \
    "pandas==2.2.3" \
    "torch==2.7.0" \
    "torchvision==0.22.0"

Collecting pandas==2.2.3
  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch==2.7.0
  Downloading torch-2.7.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting torchvision==0.22.0
  Downloading torchvision-0.22.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Collecting sympy>=1.13.3 (from torch==2.7.0)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.6.77 (from torch==2.7.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.6.77 (from torch==2.7.0)
  Downloading nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.6.80 (from torch==2.7.0)
  Down

In [None]:
#@markdown ## Tokenize using ruBERT
#@markdown We're using [DeepPavlov/rubert-base-cased](https://huggingface.co/DeepPavlov/rubert-base-cased) from HuggingFace.

from transformers import AutoTokenizer

#@markdown Max number of tokens produced by tokenizer
max_length = 128 # @param {"type":"slider","min":0,"max":1024,"step":16}

tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=max_length)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=max_length)

In [None]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch


class ToxicDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)


# Training and validation datasets
train_dataset = ToxicDataset(train_encodings, train_labels)
val_dataset = ToxicDataset(val_encodings, val_labels)


#@markdown ## Fine-tune
#@markdown Number of times the model will go through the entire training dataset
num_train_epochs = 3 # @param {"type":"slider","min":1,"max":10,"step":1}

#@markdown Batch size per device (GPU/CPU) during training
per_device_train_batch_size = 64 # @param {"type":"slider","min":1,"max":128,"step":1}

#@markdown Batch size per device during evaluation
per_device_eval_batch_size = 64 # @param {"type":"slider","min":1,"max":128,"step":1}

#@markdown Number of steps to gradually increase the learning rate at the start
warmup_steps = 500 # @param {"type":"slider","min":0,"max":1000,"step":1}

#@markdown Strength of L2 regularization to prevent overfitting
weight_decay = 0.01 # @param {"type":"slider","min":0,"max":1,"step":0.01}


model = AutoModelForSequenceClassification.from_pretrained(
    "DeepPavlov/rubert-base-cased",
    num_labels=2
)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    per_device_eval_batch_size=per_device_eval_batch_size,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_dir='./logs',
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    fp16=True,
    report_to=[]
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer
)

trainer.train()
trainer.save_model("./rubert_toxicity_predict")
tokenizer.save_pretrained("./rubert_toxicity_predict")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at DeepPavlov/rubert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,0.175606
2,0.220500,0.228112
3,0.220500,0.14142


('./rubert_toxicity_predict/tokenizer_config.json',
 './rubert_toxicity_predict/special_tokens_map.json',
 './rubert_toxicity_predict/vocab.txt',
 './rubert_toxicity_predict/added_tokens.json',
 './rubert_toxicity_predict/tokenizer.json')

In [None]:
#@markdown ## Check model accuracy

from sklearn.metrics import classification_report
from torch.utils.data import DataLoader, TensorDataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from tqdm import tqdm
import torch

model = AutoModelForSequenceClassification.from_pretrained("./rubert_toxicity_predict", local_files_only=True)
tokenizer = AutoTokenizer.from_pretrained("./rubert_toxicity_predict", local_files_only=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

threshold = 0.5  #@param {"type":"slider","min":0,"max":1,"step":0.05}
max_length = 512 # @param {"type":"slider","min":64,"max":1024,"step":32}
batch_size = 128 # @param {"type":"slider","min":32,"max":1024,"step":32}

encodings = tokenizer(
    val_texts,
    truncation=True,
    padding=True,
    max_length=max_length,
    return_tensors="pt"
)

dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'])
loader = DataLoader(dataset, batch_size=batch_size)

preds = []
with torch.no_grad():
    for input_ids, attention_mask in tqdm(loader, desc="Predicting"):
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        outputs = model(input_ids, attention_mask=attention_mask)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        batch_preds = (probs[:, 1] > threshold).int().tolist()
        preds.extend(batch_preds)

print(f"\n\n{classification_report(val_labels, preds, digits=2)}")


Predicting: 100%|██████████| 39/39 [02:19<00:00,  3.59s/it]



              precision    recall  f1-score   support

         0.0       0.97      0.96      0.96      2921
         1.0       0.93      0.96      0.95      1962

    accuracy                           0.96      4883
   macro avg       0.95      0.96      0.95      4883
weighted avg       0.96      0.96      0.96      4883






# Using pre-trained ruBERT model

Here is how we used a pre-trained model [sismetanin/rubert-toxic-pikabu-2ch](https://huggingface.co/sismetanin/rubert-toxic-pikabu-2ch) to evaluate efficiency of ruBERT models for our moderation task.

In [None]:
#@markdown ## Pull ruBERT model
#@markdown Pull [sismetanin/rubert-toxic-pikabu-2ch](https://huggingface.co/sismetanin/rubert-toxic-pikabu-2ch) model from HuggingFace.

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "sismetanin/rubert-toxic-pikabu-2ch"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/540 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/920 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/1.65M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/712M [00:00<?, ?B/s]

In [None]:
#@markdown ## Check model accuracy

from sklearn.metrics import classification_report
from torch.utils.data import DataLoader, TensorDataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from tqdm import tqdm
import torch

model = AutoModelForSequenceClassification.from_pretrained("sismetanin/rubert-toxic-pikabu-2ch")
tokenizer = AutoTokenizer.from_pretrained("sismetanin/rubert-toxic-pikabu-2ch")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

threshold = 0.5  #@param {"type":"slider","min":0,"max":1,"step":0.05}
max_length = 512 # @param {"type":"slider","min":64,"max":1024,"step":32}
batch_size = 128 # @param {"type":"slider","min":32,"max":1024,"step":32}

encodings = tokenizer(
    val_texts,
    truncation=True,
    padding=True,
    max_length=max_length,
    return_tensors="pt"
)

dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'])
loader = DataLoader(dataset, batch_size=batch_size)

preds = []
with torch.no_grad():
    for input_ids, attention_mask in tqdm(loader, desc="Predicting"):
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        outputs = model(input_ids, attention_mask=attention_mask)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        batch_preds = (probs[:, 1] > threshold).int().tolist()
        preds.extend(batch_preds)

print(f"\n\n{classification_report(val_labels, preds, digits=2)}")

Predicting: 100%|██████████| 39/39 [02:20<00:00,  3.59s/it]



              precision    recall  f1-score   support

         0.0       0.97      0.91      0.94      2921
         1.0       0.88      0.96      0.92      1962

    accuracy                           0.93      4883
   macro avg       0.92      0.93      0.93      4883
weighted avg       0.93      0.93      0.93      4883






# Using Groq LLM API

Here's how we used Groq API for toxicity detection using `llama3-8b-8192` model.

In [None]:
#@markdown ## Install dependencies
%pip install -U "groq==0.24.0"

Collecting groq==0.24.0
  Downloading groq-0.24.0-py3-none-any.whl.metadata (15 kB)
Downloading groq-0.24.0-py3-none-any.whl (127 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.5/127.5 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.24.0


In [None]:
#@markdown ## Check pattern accuracy

from groq import Groq
import json

from google.colab import userdata
GROQ_TOKEN = userdata.get('GROQ_TOKEN')
client = Groq(api_key=GROQ_TOKEN)

system_prompt = """
<task>
You are a professional content moderation system.
Evaluate user messages for toxicity. Your goal is to classify
each message as either  "toxic" or "normal".

A message is "toxic" if it contains any of the following:
- Hate speech (e.g., racism, sexism, homophobia)
- Threats or incitement to violence
- Harassment or targeted abuse
- Sexually explicit or obscene content
- Severe profanity or slurs
- Graphic self-harm or suicide encouragement

A message is "normal" if it contains none of the above.

If a message is toxic, you must also provide a brief reason for the
classification, using one of the following categories:
"hate speech", "threat", "harassment", "sexual content",
"profanity", or "self-harm".

For ambiguous or borderline cases, choose "normal" unless
clear evidence of toxicity is present.

Consistency and accuracy are critical. Do not guess or interpret
intent—classify only the content provided.

Classify each message exactly as instructed, with no explanation,
markdown, or extra text.
</task>

<input>
You will receive a raw user message as the next message. Treat it as-is.
</input>

<output>
Respond in **exactly** one of the following JSON formats, with no extra
whitespace or characters.
If the message is toxic:
{
  "type": "toxic",
  "reason": "CATEGORY"
}

If the message is normal:
{
  "type": "normal"
}
</output>
"""

from sklearn.metrics import classification_report
from tqdm import tqdm

def detect_toxicity(message):
  chat_completion = client.chat.completions.create(
      messages=[
          {
              "role": "system",
              "content": system_prompt,
          },
          {
              "role": "user",
              "content": message
          }
      ],
      model="llama3-8b-8192",
      response_format={"type": "json_object"}
  )

  res = str(chat_completion.choices[0].message.content)
  res_json = json.loads(res)
  if res_json["type"] == "normal":
      return False
  elif res_json["type"] == "toxic":
      return True
  else:
      print(f"Invalid response: {res}")
      return False

preds = []
for text in tqdm(val_texts, desc="Groq LLM evaluating"):
    try:
      is_toxic = detect_toxicity(text)
    except Exception as e:
      is_toxic = False
      print(e)
    preds.append(1 if is_toxic else 0)

print(f"\n\n{classification_report(val_labels, preds, digits=2)}")


Groq LLM evaluating:   0%|          | 2/4883 [00:09<7:03:50,  5.21s/it]

Error code: 503 - {'error': {'message': 'Service Unavailable', 'type': 'internal_server_error'}}


Groq LLM evaluating:   3%|▎         | 160/4883 [10:06<7:31:39,  5.74s/it]

Error code: 503 - {'error': {'message': 'Service Unavailable', 'type': 'internal_server_error'}}


Groq LLM evaluating:   8%|▊         | 382/4883 [24:57<4:45:09,  3.80s/it]